1/* Amalgamated source file */ 2#include "upb.h" 3/* 4* This is where we define macros used across upb. 5* 6* All of these macros are undef'd in port_undef.inc to avoid leaking them to 7* users. 8* 9* The correct usage is: 10* 11* #include "upb/foobar.h" 12* #include "upb/baz.h" 13* 14* // MUST be last included header. 15* #include "upb/port_def.inc" 16* 17* // Code for this file. 18* // <...> 19* 20* // Can be omitted for .c files, required for .h. 21* #include "upb/port_undef.inc" 22* 23* This file is private and must not be included by users! 24*/ 25#include <stdint.h> 26 27#if UINTPTR_MAX == 0xffffffff 28#define UPB_SIZE(size32, size64) size32 29#else 30#define UPB_SIZE(size32, size64) size64 31#endif 32 33/* If we always read/write as a consistent type to each address, this shouldn't 34 * violate aliasing. 35 */ 36#define UPB_PTR_AT(msg, ofs, type) ((type*)((char*)(msg) + (ofs))) 37 38#define UPB_READ_ONEOF(msg, fieldtype, offset, case_offset, case_val, default) \ 39 *UPB_PTR_AT(msg, case_offset, int) == case_val \ 40 ? *UPB_PTR_AT(msg, offset, fieldtype) \ 41 : default 42 43#define UPB_WRITE_ONEOF(msg, fieldtype, offset, value, case_offset, case_val) \ 44 *UPB_PTR_AT(msg, case_offset, int) = case_val; \ 45 *UPB_PTR_AT(msg, offset, fieldtype) = value; 46 47#define UPB_MAPTYPE_STRING 0 48 49/* UPB_INLINE: inline if possible, emit standalone code if required. */ 50#ifdef __cplusplus 51#define UPB_INLINE inline 52#elif defined (__GNUC__) || defined(__clang__) 53#define UPB_INLINE static __inline__ 54#else 55#define UPB_INLINE static 56#endif 57 58/* Hints to the compiler about likely/unlikely branches. */ 59#if defined (__GNUC__) || defined(__clang__) 60#define UPB_LIKELY(x) __builtin_expect((x),1) 61#define UPB_UNLIKELY(x) __builtin_expect((x),0) 62#else 63#define UPB_LIKELY(x) (x) 64#define UPB_UNLIKELY(x) (x) 65#endif 66 67/* Define UPB_BIG_ENDIAN manually if you're on big endian and your compiler 68 * doesn't provide these preprocessor symbols. */ 69#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__) 70#define UPB_BIG_ENDIAN 71#endif 72 73/* Macros for function attributes on compilers that support them. */ 74#ifdef __GNUC__ 75#define UPB_FORCEINLINE __inline__ __attribute__((always_inline)) 76#define UPB_NOINLINE __attribute__((noinline)) 77#define UPB_NORETURN __attribute__((__noreturn__)) 78#else /* !defined(__GNUC__) */ 79#define UPB_FORCEINLINE 80#define UPB_NOINLINE 81#define UPB_NORETURN 82#endif 83 84#if __STDC_VERSION__ >= 199901L || __cplusplus >= 201103L 85/* C99/C++11 versions. */ 86#include <stdio.h> 87#define _upb_snprintf snprintf 88#define _upb_vsnprintf vsnprintf 89#define _upb_va_copy(a, b) va_copy(a, b) 90#elif defined(_MSC_VER) 91/* Microsoft C/C++ versions. */ 92#include <stdarg.h> 93#include <stdio.h> 94#if _MSC_VER < 1900 95int msvc_snprintf(char* s, size_t n, const char* format, ...); 96int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg); 97#define UPB_MSVC_VSNPRINTF 98#define _upb_snprintf msvc_snprintf 99#define _upb_vsnprintf msvc_vsnprintf 100#else 101#define _upb_snprintf snprintf 102#define _upb_vsnprintf vsnprintf 103#endif 104#define _upb_va_copy(a, b) va_copy(a, b) 105#elif defined __GNUC__ 106/* A few hacky workarounds for functions not in C89. 107 * For internal use only! 108 * TODO(haberman): fix these by including our own implementations, or finding 109 * another workaround. 110 */ 111#define _upb_snprintf __builtin_snprintf 112#define _upb_vsnprintf __builtin_vsnprintf 113#define _upb_va_copy(a, b) __va_copy(a, b) 114#else 115#error Need implementations of [v]snprintf and va_copy 116#endif 117 118#ifdef __cplusplus 119#if __cplusplus >= 201103L || defined(__GXX_EXPERIMENTAL_CXX0X__) || \ 120 (defined(_MSC_VER) && _MSC_VER >= 1900) 121/* C++11 is present */ 122#else 123#error upb requires C++11 for C++ support 124#endif 125#endif 126 127#define UPB_MAX(x, y) ((x) > (y) ? (x) : (y)) 128#define UPB_MIN(x, y) ((x) < (y) ? (x) : (y)) 129 130#define UPB_UNUSED(var) (void)var 131 132/* UPB_ASSUME(): in release mode, we tell the compiler to assume this is true. 133 */ 134#ifdef NDEBUG 135#ifdef __GNUC__ 136#define UPB_ASSUME(expr) if (!(expr)) __builtin_unreachable() 137#else 138#define UPB_ASSUME(expr) do {} if (false && (expr)) 139#endif 140#else 141#define UPB_ASSUME(expr) assert(expr) 142#endif 143 144/* UPB_ASSERT(): in release mode, we use the expression without letting it be 145 * evaluated. This prevents "unused variable" warnings. */ 146#ifdef NDEBUG 147#define UPB_ASSERT(expr) do {} while (false && (expr)) 148#else 149#define UPB_ASSERT(expr) assert(expr) 150#endif 151 152/* UPB_ASSERT_DEBUGVAR(): assert that uses functions or variables that only 153 * exist in debug mode. This turns into regular assert. */ 154#define UPB_ASSERT_DEBUGVAR(expr) assert(expr) 155 156#if defined(__GNUC__) || defined(__clang__) 157#define UPB_UNREACHABLE() do { assert(0); __builtin_unreachable(); } while(0) 158#else 159#define UPB_UNREACHABLE() do { assert(0); } while(0) 160#endif 161 162/* UPB_INFINITY representing floating-point positive infinity. */ 163#include <math.h> 164#ifdef INFINITY 165#define UPB_INFINITY INFINITY 166#else 167#define UPB_INFINITY (1.0 / 0.0) 168#endif 169 170#include <setjmp.h> 171#include <string.h> 172 173 174 175/* Maps descriptor type -> upb field type. */ 176static const uint8_t desctype_to_fieldtype[] = { 177 -1, /* invalid descriptor type */ 178 UPB_TYPE_DOUBLE, /* DOUBLE */ 179 UPB_TYPE_FLOAT, /* FLOAT */ 180 UPB_TYPE_INT64, /* INT64 */ 181 UPB_TYPE_UINT64, /* UINT64 */ 182 UPB_TYPE_INT32, /* INT32 */ 183 UPB_TYPE_UINT64, /* FIXED64 */ 184 UPB_TYPE_UINT32, /* FIXED32 */ 185 UPB_TYPE_BOOL, /* BOOL */ 186 UPB_TYPE_STRING, /* STRING */ 187 UPB_TYPE_MESSAGE, /* GROUP */ 188 UPB_TYPE_MESSAGE, /* MESSAGE */ 189 UPB_TYPE_BYTES, /* BYTES */ 190 UPB_TYPE_UINT32, /* UINT32 */ 191 UPB_TYPE_ENUM, /* ENUM */ 192 UPB_TYPE_INT32, /* SFIXED32 */ 193 UPB_TYPE_INT64, /* SFIXED64 */ 194 UPB_TYPE_INT32, /* SINT32 */ 195 UPB_TYPE_INT64, /* SINT64 */ 196}; 197 198/* Maps descriptor type -> upb map size. */ 199static const uint8_t desctype_to_mapsize[] = { 200 -1, /* invalid descriptor type */ 201 8, /* DOUBLE */ 202 4, /* FLOAT */ 203 8, /* INT64 */ 204 8, /* UINT64 */ 205 4, /* INT32 */ 206 8, /* FIXED64 */ 207 4, /* FIXED32 */ 208 1, /* BOOL */ 209 UPB_MAPTYPE_STRING, /* STRING */ 210 sizeof(void *), /* GROUP */ 211 sizeof(void *), /* MESSAGE */ 212 UPB_MAPTYPE_STRING, /* BYTES */ 213 4, /* UINT32 */ 214 4, /* ENUM */ 215 4, /* SFIXED32 */ 216 8, /* SFIXED64 */ 217 4, /* SINT32 */ 218 8, /* SINT64 */ 219}; 220 221static const unsigned fixed32_ok = (1 << UPB_DTYPE_FLOAT) | 222 (1 << UPB_DTYPE_FIXED32) | 223 (1 << UPB_DTYPE_SFIXED32); 224 225static const unsigned fixed64_ok = (1 << UPB_DTYPE_DOUBLE) | 226 (1 << UPB_DTYPE_FIXED64) | 227 (1 << UPB_DTYPE_SFIXED64); 228 229/* Op: an action to be performed for a wire-type/field-type combination. */ 230#define OP_SCALAR_LG2(n) (n) 231#define OP_FIXPCK_LG2(n) (n + 4) 232#define OP_VARPCK_LG2(n) (n + 8) 233#define OP_STRING 4 234#define OP_SUBMSG 5 235 236static const int8_t varint_ops[19] = { 237 -1, /* field not found */ 238 -1, /* DOUBLE */ 239 -1, /* FLOAT */ 240 OP_SCALAR_LG2(3), /* INT64 */ 241 OP_SCALAR_LG2(3), /* UINT64 */ 242 OP_SCALAR_LG2(2), /* INT32 */ 243 -1, /* FIXED64 */ 244 -1, /* FIXED32 */ 245 OP_SCALAR_LG2(0), /* BOOL */ 246 -1, /* STRING */ 247 -1, /* GROUP */ 248 -1, /* MESSAGE */ 249 -1, /* BYTES */ 250 OP_SCALAR_LG2(2), /* UINT32 */ 251 OP_SCALAR_LG2(2), /* ENUM */ 252 -1, /* SFIXED32 */ 253 -1, /* SFIXED64 */ 254 OP_SCALAR_LG2(2), /* SINT32 */ 255 OP_SCALAR_LG2(3), /* SINT64 */ 256}; 257 258static const int8_t delim_ops[37] = { 259 /* For non-repeated field type. */ 260 -1, /* field not found */ 261 -1, /* DOUBLE */ 262 -1, /* FLOAT */ 263 -1, /* INT64 */ 264 -1, /* UINT64 */ 265 -1, /* INT32 */ 266 -1, /* FIXED64 */ 267 -1, /* FIXED32 */ 268 -1, /* BOOL */ 269 OP_STRING, /* STRING */ 270 -1, /* GROUP */ 271 OP_SUBMSG, /* MESSAGE */ 272 OP_STRING, /* BYTES */ 273 -1, /* UINT32 */ 274 -1, /* ENUM */ 275 -1, /* SFIXED32 */ 276 -1, /* SFIXED64 */ 277 -1, /* SINT32 */ 278 -1, /* SINT64 */ 279 /* For repeated field type. */ 280 OP_FIXPCK_LG2(3), /* REPEATED DOUBLE */ 281 OP_FIXPCK_LG2(2), /* REPEATED FLOAT */ 282 OP_VARPCK_LG2(3), /* REPEATED INT64 */ 283 OP_VARPCK_LG2(3), /* REPEATED UINT64 */ 284 OP_VARPCK_LG2(2), /* REPEATED INT32 */ 285 OP_FIXPCK_LG2(3), /* REPEATED FIXED64 */ 286 OP_FIXPCK_LG2(2), /* REPEATED FIXED32 */ 287 OP_VARPCK_LG2(0), /* REPEATED BOOL */ 288 OP_STRING, /* REPEATED STRING */ 289 OP_SUBMSG, /* REPEATED GROUP */ 290 OP_SUBMSG, /* REPEATED MESSAGE */ 291 OP_STRING, /* REPEATED BYTES */ 292 OP_VARPCK_LG2(2), /* REPEATED UINT32 */ 293 OP_VARPCK_LG2(2), /* REPEATED ENUM */ 294 OP_FIXPCK_LG2(2), /* REPEATED SFIXED32 */ 295 OP_FIXPCK_LG2(3), /* REPEATED SFIXED64 */ 296 OP_VARPCK_LG2(2), /* REPEATED SINT32 */ 297 OP_VARPCK_LG2(3), /* REPEATED SINT64 */ 298}; 299 300/* Data pertaining to the parse. */ 301typedef struct { 302 const char *limit; /* End of delimited region or end of buffer. */ 303 upb_arena *arena; 304 int depth; 305 uint32_t end_group; /* Set to field number of END_GROUP tag, if any. */ 306 jmp_buf err; 307} upb_decstate; 308 309typedef union { 310 bool bool_val; 311 int32_t int32_val; 312 int64_t int64_val; 313 uint32_t uint32_val; 314 uint64_t uint64_val; 315 upb_strview str_val; 316} wireval; 317 318static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, 319 const upb_msglayout *layout); 320 321UPB_NORETURN static void decode_err(upb_decstate *d) { longjmp(d->err, 1); } 322 323static bool decode_reserve(upb_decstate *d, upb_array *arr, int elem) { 324 bool need_realloc = arr->size - arr->len < elem; 325 if (need_realloc && !_upb_array_realloc(arr, arr->len + elem, d->arena)) { 326 decode_err(d); 327 } 328 return need_realloc; 329} 330 331UPB_NOINLINE 332static const char *decode_longvarint64(upb_decstate *d, const char *ptr, 333 const char *limit, uint64_t *val) { 334 uint8_t byte; 335 int bitpos = 0; 336 uint64_t out = 0; 337 338 do { 339 if (bitpos >= 70 || ptr == limit) decode_err(d); 340 byte = *ptr; 341 out |= (uint64_t)(byte & 0x7F) << bitpos; 342 ptr++; 343 bitpos += 7; 344 } while (byte & 0x80); 345 346 *val = out; 347 return ptr; 348} 349 350UPB_FORCEINLINE 351static const char *decode_varint64(upb_decstate *d, const char *ptr, 352 const char *limit, uint64_t *val) { 353 if (UPB_LIKELY(ptr < limit && (*ptr & 0x80) == 0)) { 354 *val = (uint8_t)*ptr; 355 return ptr + 1; 356 } else { 357 return decode_longvarint64(d, ptr, limit, val); 358 } 359} 360 361static const char *decode_varint32(upb_decstate *d, const char *ptr, 362 const char *limit, uint32_t *val) { 363 uint64_t u64; 364 ptr = decode_varint64(d, ptr, limit, &u64); 365 if (u64 > UINT32_MAX) decode_err(d); 366 *val = (uint32_t)u64; 367 return ptr; 368} 369 370static void decode_munge(int type, wireval *val) { 371 switch (type) { 372 case UPB_DESCRIPTOR_TYPE_BOOL: 373 val->bool_val = val->uint64_val != 0; 374 break; 375 case UPB_DESCRIPTOR_TYPE_SINT32: { 376 uint32_t n = val->uint32_val; 377 val->int32_val = (n >> 1) ^ -(int32_t)(n & 1); 378 break; 379 } 380 case UPB_DESCRIPTOR_TYPE_SINT64: { 381 uint64_t n = val->uint64_val; 382 val->int64_val = (n >> 1) ^ -(int64_t)(n & 1); 383 break; 384 } 385 } 386} 387 388static const upb_msglayout_field *upb_find_field(const upb_msglayout *l, 389 uint32_t field_number) { 390 static upb_msglayout_field none = {0}; 391 392 /* Lots of optimization opportunities here. */ 393 int i; 394 if (l == NULL) return &none; 395 for (i = 0; i < l->field_count; i++) { 396 if (l->fields[i].number == field_number) { 397 return &l->fields[i]; 398 } 399 } 400 401 return &none; /* Unknown field. */ 402} 403 404static upb_msg *decode_newsubmsg(upb_decstate *d, const upb_msglayout *layout, 405 const upb_msglayout_field *field) { 406 const upb_msglayout *subl = layout->submsgs[field->submsg_index]; 407 return _upb_msg_new(subl, d->arena); 408} 409 410static void decode_tosubmsg(upb_decstate *d, upb_msg *submsg, 411 const upb_msglayout *layout, 412 const upb_msglayout_field *field, upb_strview val) { 413 const upb_msglayout *subl = layout->submsgs[field->submsg_index]; 414 const char *saved_limit = d->limit; 415 if (--d->depth < 0) decode_err(d); 416 d->limit = val.data + val.size; 417 decode_msg(d, val.data, submsg, subl); 418 d->limit = saved_limit; 419 if (d->end_group != 0) decode_err(d); 420 d->depth++; 421} 422 423static const char *decode_group(upb_decstate *d, const char *ptr, 424 upb_msg *submsg, const upb_msglayout *subl, 425 uint32_t number) { 426 if (--d->depth < 0) decode_err(d); 427 ptr = decode_msg(d, ptr, submsg, subl); 428 if (d->end_group != number) decode_err(d); 429 d->end_group = 0; 430 d->depth++; 431 return ptr; 432} 433 434static const char *decode_togroup(upb_decstate *d, const char *ptr, 435 upb_msg *submsg, const upb_msglayout *layout, 436 const upb_msglayout_field *field) { 437 const upb_msglayout *subl = layout->submsgs[field->submsg_index]; 438 return decode_group(d, ptr, submsg, subl, field->number); 439} 440 441static const char *decode_toarray(upb_decstate *d, const char *ptr, 442 upb_msg *msg, const upb_msglayout *layout, 443 const upb_msglayout_field *field, wireval val, 444 int op) { 445 upb_array **arrp = UPB_PTR_AT(msg, field->offset, void); 446 upb_array *arr = *arrp; 447 void *mem; 448 449 if (!arr) { 450 upb_fieldtype_t type = desctype_to_fieldtype[field->descriptortype]; 451 arr = _upb_array_new(d->arena, type); 452 if (!arr) decode_err(d); 453 *arrp = arr; 454 } 455 456 decode_reserve(d, arr, 1); 457 458 switch (op) { 459 case OP_SCALAR_LG2(0): 460 case OP_SCALAR_LG2(2): 461 case OP_SCALAR_LG2(3): 462 /* Append scalar value. */ 463 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << op, void); 464 arr->len++; 465 memcpy(mem, &val, 1 << op); 466 return ptr; 467 case OP_STRING: 468 /* Append string. */ 469 mem = 470 UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(upb_strview), void); 471 arr->len++; 472 memcpy(mem, &val, sizeof(upb_strview)); 473 return ptr; 474 case OP_SUBMSG: { 475 /* Append submessage / group. */ 476 upb_msg *submsg = decode_newsubmsg(d, layout, field); 477 *UPB_PTR_AT(_upb_array_ptr(arr), arr->len * sizeof(void *), upb_msg *) = 478 submsg; 479 arr->len++; 480 if (UPB_UNLIKELY(field->descriptortype == UPB_DTYPE_GROUP)) { 481 ptr = decode_togroup(d, ptr, submsg, layout, field); 482 } else { 483 decode_tosubmsg(d, submsg, layout, field, val.str_val); 484 } 485 return ptr; 486 } 487 case OP_FIXPCK_LG2(2): 488 case OP_FIXPCK_LG2(3): { 489 /* Fixed packed. */ 490 int lg2 = op - OP_FIXPCK_LG2(0); 491 int mask = (1 << lg2) - 1; 492 int count = val.str_val.size >> lg2; 493 if ((val.str_val.size & mask) != 0) { 494 decode_err(d); /* Length isn't a round multiple of elem size. */ 495 } 496 decode_reserve(d, arr, count); 497 mem = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); 498 arr->len += count; 499 memcpy(mem, val.str_val.data, count << op); 500 return ptr; 501 } 502 case OP_VARPCK_LG2(0): 503 case OP_VARPCK_LG2(2): 504 case OP_VARPCK_LG2(3): { 505 /* Varint packed. */ 506 int lg2 = op - OP_VARPCK_LG2(0); 507 int scale = 1 << lg2; 508 const char *ptr = val.str_val.data; 509 const char *end = ptr + val.str_val.size; 510 char *out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); 511 while (ptr < end) { 512 wireval elem; 513 ptr = decode_varint64(d, ptr, end, &elem.uint64_val); 514 decode_munge(field->descriptortype, &elem); 515 if (decode_reserve(d, arr, 1)) { 516 out = UPB_PTR_AT(_upb_array_ptr(arr), arr->len << lg2, void); 517 } 518 arr->len++; 519 memcpy(out, &elem, scale); 520 out += scale; 521 } 522 if (ptr != end) decode_err(d); 523 return ptr; 524 } 525 default: 526 UPB_UNREACHABLE(); 527 } 528} 529 530static void decode_tomap(upb_decstate *d, upb_msg *msg, 531 const upb_msglayout *layout, 532 const upb_msglayout_field *field, wireval val) { 533 upb_map **map_p = UPB_PTR_AT(msg, field->offset, upb_map *); 534 upb_map *map = *map_p; 535 upb_map_entry ent; 536 const upb_msglayout *entry = layout->submsgs[field->submsg_index]; 537 538 if (!map) { 539 /* Lazily create map. */ 540 const upb_msglayout *entry = layout->submsgs[field->submsg_index]; 541 const upb_msglayout_field *key_field = &entry->fields[0]; 542 const upb_msglayout_field *val_field = &entry->fields[1]; 543 char key_size = desctype_to_mapsize[key_field->descriptortype]; 544 char val_size = desctype_to_mapsize[val_field->descriptortype]; 545 UPB_ASSERT(key_field->offset == 0); 546 UPB_ASSERT(val_field->offset == sizeof(upb_strview)); 547 map = _upb_map_new(d->arena, key_size, val_size); 548 *map_p = map; 549 } 550 551 /* Parse map entry. */ 552 memset(&ent, 0, sizeof(ent)); 553 554 if (entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || 555 entry->fields[1].descriptortype == UPB_DESCRIPTOR_TYPE_GROUP) { 556 /* Create proactively to handle the case where it doesn't appear. */ 557 ent.v.val.val = (uint64_t)_upb_msg_new(entry->submsgs[0], d->arena); 558 } 559 560 decode_tosubmsg(d, &ent.k, layout, field, val.str_val); 561 562 /* Insert into map. */ 563 _upb_map_set(map, &ent.k, map->key_size, &ent.v, map->val_size, d->arena); 564} 565 566static const char *decode_tomsg(upb_decstate *d, const char *ptr, upb_msg *msg, 567 const upb_msglayout *layout, 568 const upb_msglayout_field *field, wireval val, 569 int op) { 570 void *mem = UPB_PTR_AT(msg, field->offset, void); 571 int type = field->descriptortype; 572 573 /* Set presence if necessary. */ 574 if (field->presence < 0) { 575 /* Oneof case */ 576 *UPB_PTR_AT(msg, -field->presence, int32_t) = field->number; 577 } else if (field->presence > 0) { 578 /* Hasbit */ 579 uint32_t hasbit = field->presence; 580 *UPB_PTR_AT(msg, hasbit / 8, uint8_t) |= (1 << (hasbit % 8)); 581 } 582 583 /* Store into message. */ 584 switch (op) { 585 case OP_SUBMSG: { 586 upb_msg **submsgp = mem; 587 upb_msg *submsg = *submsgp; 588 if (!submsg) { 589 submsg = decode_newsubmsg(d, layout, field); 590 *submsgp = submsg; 591 } 592 if (UPB_UNLIKELY(type == UPB_DTYPE_GROUP)) { 593 ptr = decode_togroup(d, ptr, submsg, layout, field); 594 } else { 595 decode_tosubmsg(d, submsg, layout, field, val.str_val); 596 } 597 break; 598 } 599 case OP_STRING: 600 memcpy(mem, &val, sizeof(upb_strview)); 601 break; 602 case OP_SCALAR_LG2(3): 603 memcpy(mem, &val, 8); 604 break; 605 case OP_SCALAR_LG2(2): 606 memcpy(mem, &val, 4); 607 break; 608 case OP_SCALAR_LG2(0): 609 memcpy(mem, &val, 1); 610 break; 611 default: 612 UPB_UNREACHABLE(); 613 } 614 615 return ptr; 616} 617 618static const char *decode_msg(upb_decstate *d, const char *ptr, upb_msg *msg, 619 const upb_msglayout *layout) { 620 while (ptr < d->limit) { 621 uint32_t tag; 622 const upb_msglayout_field *field; 623 int field_number; 624 int wire_type; 625 const char *field_start = ptr; 626 wireval val; 627 int op; 628 629 ptr = decode_varint32(d, ptr, d->limit, &tag); 630 field_number = tag >> 3; 631 wire_type = tag & 7; 632 633 field = upb_find_field(layout, field_number); 634 635 switch (wire_type) { 636 case UPB_WIRE_TYPE_VARINT: 637 ptr = decode_varint64(d, ptr, d->limit, &val.uint64_val); 638 op = varint_ops[field->descriptortype]; 639 decode_munge(field->descriptortype, &val); 640 break; 641 case UPB_WIRE_TYPE_32BIT: 642 if (d->limit - ptr < 4) decode_err(d); 643 memcpy(&val, ptr, 4); 644 ptr += 4; 645 op = OP_SCALAR_LG2(2); 646 if (((1 << field->descriptortype) & fixed32_ok) == 0) goto unknown; 647 break; 648 case UPB_WIRE_TYPE_64BIT: 649 if (d->limit - ptr < 8) decode_err(d); 650 memcpy(&val, ptr, 8); 651 ptr += 8; 652 op = OP_SCALAR_LG2(3); 653 if (((1 << field->descriptortype) & fixed64_ok) == 0) goto unknown; 654 break; 655 case UPB_WIRE_TYPE_DELIMITED: { 656 uint32_t size; 657 int ndx = field->descriptortype; 658 if (_upb_isrepeated(field)) ndx += 18; 659 ptr = decode_varint32(d, ptr, d->limit, &size); 660 if (size >= INT32_MAX || (size_t)(d->limit - ptr) < size) { 661 decode_err(d); /* Length overflow. */ 662 } 663 val.str_val.data = ptr; 664 val.str_val.size = size; 665 ptr += size; 666 op = delim_ops[ndx]; 667 break; 668 } 669 case UPB_WIRE_TYPE_START_GROUP: 670 val.int32_val = field_number; 671 op = OP_SUBMSG; 672 if (field->descriptortype != UPB_DTYPE_GROUP) goto unknown; 673 break; 674 case UPB_WIRE_TYPE_END_GROUP: 675 d->end_group = field_number; 676 return ptr; 677 default: 678 decode_err(d); 679 } 680 681 if (op >= 0) { 682 /* Parse, using op for dispatch. */ 683 switch (field->label) { 684 case UPB_LABEL_REPEATED: 685 case _UPB_LABEL_PACKED: 686 ptr = decode_toarray(d, ptr, msg, layout, field, val, op); 687 break; 688 case _UPB_LABEL_MAP: 689 decode_tomap(d, msg, layout, field, val); 690 break; 691 default: 692 ptr = decode_tomsg(d, ptr, msg, layout, field, val, op); 693 break; 694 } 695 } else { 696 unknown: 697 /* Skip unknown field. */ 698 if (field_number == 0) decode_err(d); 699 if (wire_type == UPB_WIRE_TYPE_START_GROUP) { 700 ptr = decode_group(d, ptr, NULL, NULL, field_number); 701 } 702 if (msg) { 703 if (!_upb_msg_addunknown(msg, field_start, ptr - field_start, 704 d->arena)) { 705 decode_err(d); 706 } 707 } 708 } 709 } 710 711 if (ptr != d->limit) decode_err(d); 712 return ptr; 713} 714 715bool upb_decode(const char *buf, size_t size, void *msg, const upb_msglayout *l, 716 upb_arena *arena) { 717 upb_decstate state; 718 state.limit = buf + size; 719 state.arena = arena; 720 state.depth = 64; 721 state.end_group = 0; 722 723 if (setjmp(state.err)) return false; 724 725 if (size == 0) return true; 726 decode_msg(&state, buf, msg, l); 727 728 return state.end_group == 0; 729} 730 731#undef OP_SCALAR_LG2 732#undef OP_FIXPCK_LG2 733#undef OP_VARPCK_LG2 734#undef OP_STRING 735#undef OP_SUBMSG 736/* We encode backwards, to avoid pre-computing lengths (one-pass encode). */ 737 738 739#include <string.h> 740 741 742 743#define UPB_PB_VARINT_MAX_LEN 10 744#define CHK(x) do { if (!(x)) { return false; } } while(0) 745 746static size_t upb_encode_varint(uint64_t val, char *buf) { 747 size_t i; 748 if (val < 128) { buf[0] = val; return 1; } 749 i = 0; 750 while (val) { 751 uint8_t byte = val & 0x7fU; 752 val >>= 7; 753 if (val) byte |= 0x80U; 754 buf[i++] = byte; 755 } 756 return i; 757} 758 759static uint32_t upb_zzencode_32(int32_t n) { return ((uint32_t)n << 1) ^ (n >> 31); } 760static uint64_t upb_zzencode_64(int64_t n) { return ((uint64_t)n << 1) ^ (n >> 63); } 761 762typedef struct { 763 upb_alloc *alloc; 764 char *buf, *ptr, *limit; 765} upb_encstate; 766 767static size_t upb_roundup_pow2(size_t bytes) { 768 size_t ret = 128; 769 while (ret < bytes) { 770 ret *= 2; 771 } 772 return ret; 773} 774 775static bool upb_encode_growbuffer(upb_encstate *e, size_t bytes) { 776 size_t old_size = e->limit - e->buf; 777 size_t new_size = upb_roundup_pow2(bytes + (e->limit - e->ptr)); 778 char *new_buf = upb_realloc(e->alloc, e->buf, old_size, new_size); 779 CHK(new_buf); 780 781 /* We want previous data at the end, realloc() put it at the beginning. */ 782 if (old_size > 0) { 783 memmove(new_buf + new_size - old_size, e->buf, old_size); 784 } 785 786 e->ptr = new_buf + new_size - (e->limit - e->ptr); 787 e->limit = new_buf + new_size; 788 e->buf = new_buf; 789 return true; 790} 791 792/* Call to ensure that at least "bytes" bytes are available for writing at 793 * e->ptr. Returns false if the bytes could not be allocated. */ 794static bool upb_encode_reserve(upb_encstate *e, size_t bytes) { 795 CHK(UPB_LIKELY((size_t)(e->ptr - e->buf) >= bytes) || 796 upb_encode_growbuffer(e, bytes)); 797 798 e->ptr -= bytes; 799 return true; 800} 801 802/* Writes the given bytes to the buffer, handling reserve/advance. */ 803static bool upb_put_bytes(upb_encstate *e, const void *data, size_t len) { 804 if (len == 0) return true; 805 CHK(upb_encode_reserve(e, len)); 806 memcpy(e->ptr, data, len); 807 return true; 808} 809 810static bool upb_put_fixed64(upb_encstate *e, uint64_t val) { 811 /* TODO(haberman): byte-swap for big endian. */ 812 return upb_put_bytes(e, &val, sizeof(uint64_t)); 813} 814 815static bool upb_put_fixed32(upb_encstate *e, uint32_t val) { 816 /* TODO(haberman): byte-swap for big endian. */ 817 return upb_put_bytes(e, &val, sizeof(uint32_t)); 818} 819 820static bool upb_put_varint(upb_encstate *e, uint64_t val) { 821 size_t len; 822 char *start; 823 CHK(upb_encode_reserve(e, UPB_PB_VARINT_MAX_LEN)); 824 len = upb_encode_varint(val, e->ptr); 825 start = e->ptr + UPB_PB_VARINT_MAX_LEN - len; 826 memmove(start, e->ptr, len); 827 e->ptr = start; 828 return true; 829} 830 831static bool upb_put_double(upb_encstate *e, double d) { 832 uint64_t u64; 833 UPB_ASSERT(sizeof(double) == sizeof(uint64_t)); 834 memcpy(&u64, &d, sizeof(uint64_t)); 835 return upb_put_fixed64(e, u64); 836} 837 838static bool upb_put_float(upb_encstate *e, float d) { 839 uint32_t u32; 840 UPB_ASSERT(sizeof(float) == sizeof(uint32_t)); 841 memcpy(&u32, &d, sizeof(uint32_t)); 842 return upb_put_fixed32(e, u32); 843} 844 845static uint32_t upb_readcase(const char *msg, const upb_msglayout_field *f) { 846 uint32_t ret; 847 memcpy(&ret, msg - f->presence, sizeof(ret)); 848 return ret; 849} 850 851static bool upb_readhasbit(const char *msg, const upb_msglayout_field *f) { 852 uint32_t hasbit = f->presence; 853 UPB_ASSERT(f->presence > 0); 854 return (*UPB_PTR_AT(msg, hasbit / 8, uint8_t)) & (1 << (hasbit % 8)); 855} 856 857static bool upb_put_tag(upb_encstate *e, int field_number, int wire_type) { 858 return upb_put_varint(e, (field_number << 3) | wire_type); 859} 860 861static bool upb_put_fixedarray(upb_encstate *e, const upb_array *arr, 862 size_t elem_size, uint32_t tag) { 863 size_t bytes = arr->len * elem_size; 864 const char* data = _upb_array_constptr(arr); 865 const char* ptr = data + bytes - elem_size; 866 if (tag) { 867 while (true) { 868 CHK(upb_put_bytes(e, ptr, elem_size) && upb_put_varint(e, tag)); 869 if (ptr == data) break; 870 ptr -= elem_size; 871 } 872 return true; 873 } else { 874 return upb_put_bytes(e, data, bytes) && upb_put_varint(e, bytes); 875 } 876} 877 878bool upb_encode_message(upb_encstate *e, const char *msg, 879 const upb_msglayout *m, size_t *size); 880 881static bool upb_encode_scalarfield(upb_encstate *e, const void *_field_mem, 882 const upb_msglayout *m, 883 const upb_msglayout_field *f, 884 bool skip_zero_value) { 885 const char *field_mem = _field_mem; 886#define CASE(ctype, type, wire_type, encodeval) do { \ 887 ctype val = *(ctype*)field_mem; \ 888 if (skip_zero_value && val == 0) { \ 889 return true; \ 890 } \ 891 return upb_put_ ## type(e, encodeval) && \ 892 upb_put_tag(e, f->number, wire_type); \ 893} while(0) 894 895 switch (f->descriptortype) { 896 case UPB_DESCRIPTOR_TYPE_DOUBLE: 897 CASE(double, double, UPB_WIRE_TYPE_64BIT, val); 898 case UPB_DESCRIPTOR_TYPE_FLOAT: 899 CASE(float, float, UPB_WIRE_TYPE_32BIT, val); 900 case UPB_DESCRIPTOR_TYPE_INT64: 901 case UPB_DESCRIPTOR_TYPE_UINT64: 902 CASE(uint64_t, varint, UPB_WIRE_TYPE_VARINT, val); 903 case UPB_DESCRIPTOR_TYPE_UINT32: 904 CASE(uint32_t, varint, UPB_WIRE_TYPE_VARINT, val); 905 case UPB_DESCRIPTOR_TYPE_INT32: 906 case UPB_DESCRIPTOR_TYPE_ENUM: 907 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, (int64_t)val); 908 case UPB_DESCRIPTOR_TYPE_SFIXED64: 909 case UPB_DESCRIPTOR_TYPE_FIXED64: 910 CASE(uint64_t, fixed64, UPB_WIRE_TYPE_64BIT, val); 911 case UPB_DESCRIPTOR_TYPE_FIXED32: 912 case UPB_DESCRIPTOR_TYPE_SFIXED32: 913 CASE(uint32_t, fixed32, UPB_WIRE_TYPE_32BIT, val); 914 case UPB_DESCRIPTOR_TYPE_BOOL: 915 CASE(bool, varint, UPB_WIRE_TYPE_VARINT, val); 916 case UPB_DESCRIPTOR_TYPE_SINT32: 917 CASE(int32_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_32(val)); 918 case UPB_DESCRIPTOR_TYPE_SINT64: 919 CASE(int64_t, varint, UPB_WIRE_TYPE_VARINT, upb_zzencode_64(val)); 920 case UPB_DESCRIPTOR_TYPE_STRING: 921 case UPB_DESCRIPTOR_TYPE_BYTES: { 922 upb_strview view = *(upb_strview*)field_mem; 923 if (skip_zero_value && view.size == 0) { 924 return true; 925 } 926 return upb_put_bytes(e, view.data, view.size) && 927 upb_put_varint(e, view.size) && 928 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); 929 } 930 case UPB_DESCRIPTOR_TYPE_GROUP: { 931 size_t size; 932 void *submsg = *(void **)field_mem; 933 const upb_msglayout *subm = m->submsgs[f->submsg_index]; 934 if (submsg == NULL) { 935 return true; 936 } 937 return upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && 938 upb_encode_message(e, submsg, subm, &size) && 939 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP); 940 } 941 case UPB_DESCRIPTOR_TYPE_MESSAGE: { 942 size_t size; 943 void *submsg = *(void **)field_mem; 944 const upb_msglayout *subm = m->submsgs[f->submsg_index]; 945 if (submsg == NULL) { 946 return true; 947 } 948 return upb_encode_message(e, submsg, subm, &size) && 949 upb_put_varint(e, size) && 950 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED); 951 } 952 } 953#undef CASE 954 UPB_UNREACHABLE(); 955} 956 957static bool upb_encode_array(upb_encstate *e, const char *field_mem, 958 const upb_msglayout *m, 959 const upb_msglayout_field *f) { 960 const upb_array *arr = *(const upb_array**)field_mem; 961 bool packed = f->label == _UPB_LABEL_PACKED; 962 963 if (arr == NULL || arr->len == 0) { 964 return true; 965 } 966 967#define VARINT_CASE(ctype, encode) \ 968 { \ 969 const ctype *start = _upb_array_constptr(arr); \ 970 const ctype *ptr = start + arr->len; \ 971 size_t pre_len = e->limit - e->ptr; \ 972 uint32_t tag = packed ? 0 : (f->number << 3) | UPB_WIRE_TYPE_VARINT; \ 973 do { \ 974 ptr--; \ 975 CHK(upb_put_varint(e, encode)); \ 976 if (tag) CHK(upb_put_varint(e, tag)); \ 977 } while (ptr != start); \ 978 if (!tag) CHK(upb_put_varint(e, e->limit - e->ptr - pre_len)); \ 979 } \ 980 break; \ 981 do { \ 982 ; \ 983 } while (0) 984 985#define TAG(wire_type) (packed ? 0 : (f->number << 3 | wire_type)) 986 987 switch (f->descriptortype) { 988 case UPB_DESCRIPTOR_TYPE_DOUBLE: 989 CHK(upb_put_fixedarray(e, arr, sizeof(double), TAG(UPB_WIRE_TYPE_64BIT))); 990 break; 991 case UPB_DESCRIPTOR_TYPE_FLOAT: 992 CHK(upb_put_fixedarray(e, arr, sizeof(float), TAG(UPB_WIRE_TYPE_32BIT))); 993 break; 994 case UPB_DESCRIPTOR_TYPE_SFIXED64: 995 case UPB_DESCRIPTOR_TYPE_FIXED64: 996 CHK(upb_put_fixedarray(e, arr, sizeof(uint64_t), TAG(UPB_WIRE_TYPE_64BIT))); 997 break; 998 case UPB_DESCRIPTOR_TYPE_FIXED32: 999 case UPB_DESCRIPTOR_TYPE_SFIXED32: 1000 CHK(upb_put_fixedarray(e, arr, sizeof(uint32_t), TAG(UPB_WIRE_TYPE_32BIT))); 1001 break; 1002 case UPB_DESCRIPTOR_TYPE_INT64: 1003 case UPB_DESCRIPTOR_TYPE_UINT64: 1004 VARINT_CASE(uint64_t, *ptr); 1005 case UPB_DESCRIPTOR_TYPE_UINT32: 1006 VARINT_CASE(uint32_t, *ptr); 1007 case UPB_DESCRIPTOR_TYPE_INT32: 1008 case UPB_DESCRIPTOR_TYPE_ENUM: 1009 VARINT_CASE(int32_t, (int64_t)*ptr); 1010 case UPB_DESCRIPTOR_TYPE_BOOL: 1011 VARINT_CASE(bool, *ptr); 1012 case UPB_DESCRIPTOR_TYPE_SINT32: 1013 VARINT_CASE(int32_t, upb_zzencode_32(*ptr)); 1014 case UPB_DESCRIPTOR_TYPE_SINT64: 1015 VARINT_CASE(int64_t, upb_zzencode_64(*ptr)); 1016 case UPB_DESCRIPTOR_TYPE_STRING: 1017 case UPB_DESCRIPTOR_TYPE_BYTES: { 1018 const upb_strview *start = _upb_array_constptr(arr); 1019 const upb_strview *ptr = start + arr->len; 1020 do { 1021 ptr--; 1022 CHK(upb_put_bytes(e, ptr->data, ptr->size) && 1023 upb_put_varint(e, ptr->size) && 1024 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); 1025 } while (ptr != start); 1026 return true; 1027 } 1028 case UPB_DESCRIPTOR_TYPE_GROUP: { 1029 const void *const*start = _upb_array_constptr(arr); 1030 const void *const*ptr = start + arr->len; 1031 const upb_msglayout *subm = m->submsgs[f->submsg_index]; 1032 do { 1033 size_t size; 1034 ptr--; 1035 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_END_GROUP) && 1036 upb_encode_message(e, *ptr, subm, &size) && 1037 upb_put_tag(e, f->number, UPB_WIRE_TYPE_START_GROUP)); 1038 } while (ptr != start); 1039 return true; 1040 } 1041 case UPB_DESCRIPTOR_TYPE_MESSAGE: { 1042 const void *const*start = _upb_array_constptr(arr); 1043 const void *const*ptr = start + arr->len; 1044 const upb_msglayout *subm = m->submsgs[f->submsg_index]; 1045 do { 1046 size_t size; 1047 ptr--; 1048 CHK(upb_encode_message(e, *ptr, subm, &size) && 1049 upb_put_varint(e, size) && 1050 upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); 1051 } while (ptr != start); 1052 return true; 1053 } 1054 } 1055#undef VARINT_CASE 1056 1057 if (packed) { 1058 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); 1059 } 1060 return true; 1061} 1062 1063static bool upb_encode_map(upb_encstate *e, const char *field_mem, 1064 const upb_msglayout *m, 1065 const upb_msglayout_field *f) { 1066 const upb_map *map = *(const upb_map**)field_mem; 1067 const upb_msglayout *entry = m->submsgs[f->submsg_index]; 1068 const upb_msglayout_field *key_field = &entry->fields[0]; 1069 const upb_msglayout_field *val_field = &entry->fields[1]; 1070 upb_strtable_iter i; 1071 if (map == NULL) { 1072 return true; 1073 } 1074 1075 upb_strtable_begin(&i, &map->table); 1076 for(; !upb_strtable_done(&i); upb_strtable_next(&i)) { 1077 size_t pre_len = e->limit - e->ptr; 1078 size_t size; 1079 upb_strview key = upb_strtable_iter_key(&i); 1080 const upb_value val = upb_strtable_iter_value(&i); 1081 const void *keyp = 1082 map->key_size == UPB_MAPTYPE_STRING ? (void *)&key : key.data; 1083 const void *valp = 1084 map->val_size == UPB_MAPTYPE_STRING ? upb_value_getptr(val) : &val; 1085 1086 CHK(upb_encode_scalarfield(e, valp, entry, val_field, false)); 1087 CHK(upb_encode_scalarfield(e, keyp, entry, key_field, false)); 1088 size = (e->limit - e->ptr) - pre_len; 1089 CHK(upb_put_varint(e, size)); 1090 CHK(upb_put_tag(e, f->number, UPB_WIRE_TYPE_DELIMITED)); 1091 } 1092 1093 return true; 1094} 1095 1096 1097bool upb_encode_message(upb_encstate *e, const char *msg, 1098 const upb_msglayout *m, size_t *size) { 1099 int i; 1100 size_t pre_len = e->limit - e->ptr; 1101 const char *unknown; 1102 size_t unknown_size; 1103 1104 unknown = upb_msg_getunknown(msg, &unknown_size); 1105 1106 if (unknown) { 1107 upb_put_bytes(e, unknown, unknown_size); 1108 } 1109 1110 for (i = m->field_count - 1; i >= 0; i--) { 1111 const upb_msglayout_field *f = &m->fields[i]; 1112 1113 if (_upb_isrepeated(f)) { 1114 CHK(upb_encode_array(e, msg + f->offset, m, f)); 1115 } else if (f->label == _UPB_LABEL_MAP) { 1116 CHK(upb_encode_map(e, msg + f->offset, m, f)); 1117 } else { 1118 bool skip_empty = false; 1119 if (f->presence == 0) { 1120 /* Proto3 presence. */ 1121 skip_empty = true; 1122 } else if (f->presence > 0) { 1123 /* Proto2 presence: hasbit. */ 1124 if (!upb_readhasbit(msg, f)) { 1125 continue; 1126 } 1127 } else { 1128 /* Field is in a oneof. */ 1129 if (upb_readcase(msg, f) != f->number) { 1130 continue; 1131 } 1132 } 1133 CHK(upb_encode_scalarfield(e, msg + f->offset, m, f, skip_empty)); 1134 } 1135 } 1136 1137 *size = (e->limit - e->ptr) - pre_len; 1138 return true; 1139} 1140 1141char *upb_encode(const void *msg, const upb_msglayout *m, upb_arena *arena, 1142 size_t *size) { 1143 upb_encstate e; 1144 e.alloc = upb_arena_alloc(arena); 1145 e.buf = NULL; 1146 e.limit = NULL; 1147 e.ptr = NULL; 1148 1149 if (!upb_encode_message(&e, msg, m, size)) { 1150 *size = 0; 1151 return NULL; 1152 } 1153 1154 *size = e.limit - e.ptr; 1155 1156 if (*size == 0) { 1157 static char ch; 1158 return &ch; 1159 } else { 1160 UPB_ASSERT(e.ptr); 1161 return e.ptr; 1162 } 1163} 1164 1165#undef CHK 1166 1167 1168 1169 1170/** upb_msg *******************************************************************/ 1171 1172static const char _upb_fieldtype_to_sizelg2[12] = { 1173 0, 1174 0, /* UPB_TYPE_BOOL */ 1175 2, /* UPB_TYPE_FLOAT */ 1176 2, /* UPB_TYPE_INT32 */ 1177 2, /* UPB_TYPE_UINT32 */ 1178 2, /* UPB_TYPE_ENUM */ 1179 UPB_SIZE(2, 3), /* UPB_TYPE_MESSAGE */ 1180 3, /* UPB_TYPE_DOUBLE */ 1181 3, /* UPB_TYPE_INT64 */ 1182 3, /* UPB_TYPE_UINT64 */ 1183 UPB_SIZE(3, 4), /* UPB_TYPE_STRING */ 1184 UPB_SIZE(3, 4), /* UPB_TYPE_BYTES */ 1185}; 1186 1187static uintptr_t tag_arrptr(void* ptr, int elem_size_lg2) { 1188 UPB_ASSERT(elem_size_lg2 <= 4); 1189 return (uintptr_t)ptr | elem_size_lg2; 1190} 1191 1192static int upb_msg_internalsize(const upb_msglayout *l) { 1193 return sizeof(upb_msg_internal) - l->extendable * sizeof(void *); 1194} 1195 1196static size_t upb_msg_sizeof(const upb_msglayout *l) { 1197 return l->size + upb_msg_internalsize(l); 1198} 1199 1200static upb_msg_internal *upb_msg_getinternal(upb_msg *msg) { 1201 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); 1202} 1203 1204static const upb_msg_internal *upb_msg_getinternal_const(const upb_msg *msg) { 1205 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal), upb_msg_internal); 1206} 1207 1208static upb_msg_internal_withext *upb_msg_getinternalwithext( 1209 upb_msg *msg, const upb_msglayout *l) { 1210 UPB_ASSERT(l->extendable); 1211 return UPB_PTR_AT(msg, -sizeof(upb_msg_internal_withext), 1212 upb_msg_internal_withext); 1213} 1214 1215upb_msg *_upb_msg_new(const upb_msglayout *l, upb_arena *a) { 1216 void *mem = upb_arena_malloc(a, upb_msg_sizeof(l)); 1217 upb_msg_internal *in; 1218 upb_msg *msg; 1219 1220 if (!mem) { 1221 return NULL; 1222 } 1223 1224 msg = UPB_PTR_AT(mem, upb_msg_internalsize(l), upb_msg); 1225 1226 /* Initialize normal members. */ 1227 memset(msg, 0, l->size); 1228 1229 /* Initialize internal members. */ 1230 in = upb_msg_getinternal(msg); 1231 in->unknown = NULL; 1232 in->unknown_len = 0; 1233 in->unknown_size = 0; 1234 1235 if (l->extendable) { 1236 upb_msg_getinternalwithext(msg, l)->extdict = NULL; 1237 } 1238 1239 return msg; 1240} 1241 1242bool _upb_msg_addunknown(upb_msg *msg, const char *data, size_t len, 1243 upb_arena *arena) { 1244 upb_msg_internal *in = upb_msg_getinternal(msg); 1245 if (len > in->unknown_size - in->unknown_len) { 1246 upb_alloc *alloc = upb_arena_alloc(arena); 1247 size_t need = in->unknown_size + len; 1248 size_t newsize = UPB_MAX(in->unknown_size * 2, need); 1249 void *mem = upb_realloc(alloc, in->unknown, in->unknown_size, newsize); 1250 if (!mem) return false; 1251 in->unknown = mem; 1252 in->unknown_size = newsize; 1253 } 1254 memcpy(in->unknown + in->unknown_len, data, len); 1255 in->unknown_len += len; 1256 return true; 1257} 1258 1259const char *upb_msg_getunknown(const upb_msg *msg, size_t *len) { 1260 const upb_msg_internal *in = upb_msg_getinternal_const(msg); 1261 *len = in->unknown_len; 1262 return in->unknown; 1263} 1264 1265/** upb_array *****************************************************************/ 1266 1267upb_array *_upb_array_new(upb_arena *a, upb_fieldtype_t type) { 1268 upb_array *arr = upb_arena_malloc(a, sizeof(upb_array)); 1269 1270 if (!arr) { 1271 return NULL; 1272 } 1273 1274 arr->data = tag_arrptr(NULL, _upb_fieldtype_to_sizelg2[type]); 1275 arr->len = 0; 1276 arr->size = 0; 1277 1278 return arr; 1279} 1280 1281bool _upb_array_realloc(upb_array *arr, size_t min_size, upb_arena *arena) { 1282 size_t new_size = UPB_MAX(arr->size, 4); 1283 int elem_size_lg2 = arr->data & 7; 1284 size_t old_bytes = arr->size << elem_size_lg2; 1285 size_t new_bytes; 1286 void* ptr = _upb_array_ptr(arr); 1287 1288 /* Log2 ceiling of size. */ 1289 while (new_size < min_size) new_size *= 2; 1290 1291 new_bytes = new_size << elem_size_lg2; 1292 ptr = upb_arena_realloc(arena, ptr, old_bytes, new_bytes); 1293 1294 if (!ptr) { 1295 return false; 1296 } 1297 1298 arr->data = tag_arrptr(ptr, elem_size_lg2); 1299 arr->size = new_size; 1300 return true; 1301} 1302 1303static upb_array *getorcreate_array(upb_array **arr_ptr, upb_fieldtype_t type, 1304 upb_arena *arena) { 1305 upb_array *arr = *arr_ptr; 1306 if (!arr) { 1307 arr = _upb_array_new(arena, type); 1308 if (!arr) return NULL; 1309 *arr_ptr = arr; 1310 } 1311 return arr; 1312} 1313 1314static bool resize_array(upb_array *arr, size_t size, upb_arena *arena) { 1315 if (size > arr->size && !_upb_array_realloc(arr, size, arena)) { 1316 return false; 1317 } 1318 1319 arr->len = size; 1320 return true; 1321} 1322 1323void *_upb_array_resize_fallback(upb_array **arr_ptr, size_t size, 1324 upb_fieldtype_t type, upb_arena *arena) { 1325 upb_array *arr = getorcreate_array(arr_ptr, type, arena); 1326 return arr && resize_array(arr, size, arena) ? _upb_array_ptr(arr) : NULL; 1327} 1328 1329bool _upb_array_append_fallback(upb_array **arr_ptr, const void *value, 1330 upb_fieldtype_t type, upb_arena *arena) { 1331 upb_array *arr = getorcreate_array(arr_ptr, type, arena); 1332 size_t elem = arr->len; 1333 int lg2 = _upb_fieldtype_to_sizelg2[type]; 1334 char *data; 1335 1336 if (!arr || !resize_array(arr, elem + 1, arena)) return false; 1337 1338 data = _upb_array_ptr(arr); 1339 memcpy(data + (elem << lg2), value, 1 << lg2); 1340 return true; 1341} 1342 1343/** upb_map *******************************************************************/ 1344 1345upb_map *_upb_map_new(upb_arena *a, size_t key_size, size_t value_size) { 1346 upb_map *map = upb_arena_malloc(a, sizeof(upb_map)); 1347 1348 if (!map) { 1349 return NULL; 1350 } 1351 1352 upb_strtable_init2(&map->table, UPB_CTYPE_INT32, upb_arena_alloc(a)); 1353 map->key_size = key_size; 1354 map->val_size = value_size; 1355 1356 return map; 1357} 1358/* 1359** upb_table Implementation 1360** 1361** Implementation is heavily inspired by Lua's ltable.c. 1362*/ 1363 1364 1365#include <string.h> 1366 1367 1368#define UPB_MAXARRSIZE 16 /* 64k. */ 1369 1370/* From Chromium. */ 1371#define ARRAY_SIZE(x) \ 1372 ((sizeof(x)/sizeof(0[x])) / ((size_t)(!(sizeof(x) % sizeof(0[x]))))) 1373 1374static const double MAX_LOAD = 0.85; 1375 1376/* The minimum utilization of the array part of a mixed hash/array table. This 1377 * is a speed/memory-usage tradeoff (though it's not straightforward because of 1378 * cache effects). The lower this is, the more memory we'll use. */ 1379static const double MIN_DENSITY = 0.1; 1380 1381bool is_pow2(uint64_t v) { return v == 0 || (v & (v - 1)) == 0; } 1382 1383int log2ceil(uint64_t v) { 1384 int ret = 0; 1385 bool pow2 = is_pow2(v); 1386 while (v >>= 1) ret++; 1387 ret = pow2 ? ret : ret + 1; /* Ceiling. */ 1388 return UPB_MIN(UPB_MAXARRSIZE, ret); 1389} 1390 1391char *upb_strdup(const char *s, upb_alloc *a) { 1392 return upb_strdup2(s, strlen(s), a); 1393} 1394 1395char *upb_strdup2(const char *s, size_t len, upb_alloc *a) { 1396 size_t n; 1397 char *p; 1398 1399 /* Prevent overflow errors. */ 1400 if (len == SIZE_MAX) return NULL; 1401 /* Always null-terminate, even if binary data; but don't rely on the input to 1402 * have a null-terminating byte since it may be a raw binary buffer. */ 1403 n = len + 1; 1404 p = upb_malloc(a, n); 1405 if (p) { 1406 memcpy(p, s, len); 1407 p[len] = 0; 1408 } 1409 return p; 1410} 1411 1412/* A type to represent the lookup key of either a strtable or an inttable. */ 1413typedef union { 1414 uintptr_t num; 1415 struct { 1416 const char *str; 1417 size_t len; 1418 } str; 1419} lookupkey_t; 1420 1421static lookupkey_t strkey2(const char *str, size_t len) { 1422 lookupkey_t k; 1423 k.str.str = str; 1424 k.str.len = len; 1425 return k; 1426} 1427 1428static lookupkey_t intkey(uintptr_t key) { 1429 lookupkey_t k; 1430 k.num = key; 1431 return k; 1432} 1433 1434typedef uint32_t hashfunc_t(upb_tabkey key); 1435typedef bool eqlfunc_t(upb_tabkey k1, lookupkey_t k2); 1436 1437/* Base table (shared code) ***************************************************/ 1438 1439/* For when we need to cast away const. */ 1440static upb_tabent *mutable_entries(upb_table *t) { 1441 return (upb_tabent*)t->entries; 1442} 1443 1444static bool isfull(upb_table *t) { 1445 if (upb_table_size(t) == 0) { 1446 return true; 1447 } else { 1448 return ((double)(t->count + 1) / upb_table_size(t)) > MAX_LOAD; 1449 } 1450} 1451 1452static bool init(upb_table *t, uint8_t size_lg2, upb_alloc *a) { 1453 size_t bytes; 1454 1455 t->count = 0; 1456 t->size_lg2 = size_lg2; 1457 t->mask = upb_table_size(t) ? upb_table_size(t) - 1 : 0; 1458 bytes = upb_table_size(t) * sizeof(upb_tabent); 1459 if (bytes > 0) { 1460 t->entries = upb_malloc(a, bytes); 1461 if (!t->entries) return false; 1462 memset(mutable_entries(t), 0, bytes); 1463 } else { 1464 t->entries = NULL; 1465 } 1466 return true; 1467} 1468 1469static void uninit(upb_table *t, upb_alloc *a) { 1470 upb_free(a, mutable_entries(t)); 1471} 1472 1473static upb_tabent *emptyent(upb_table *t) { 1474 upb_tabent *e = mutable_entries(t) + upb_table_size(t); 1475 while (1) { if (upb_tabent_isempty(--e)) return e; UPB_ASSERT(e > t->entries); } 1476} 1477 1478static upb_tabent *getentry_mutable(upb_table *t, uint32_t hash) { 1479 return (upb_tabent*)upb_getentry(t, hash); 1480} 1481 1482static const upb_tabent *findentry(const upb_table *t, lookupkey_t key, 1483 uint32_t hash, eqlfunc_t *eql) { 1484 const upb_tabent *e; 1485 1486 if (t->size_lg2 == 0) return NULL; 1487 e = upb_getentry(t, hash); 1488 if (upb_tabent_isempty(e)) return NULL; 1489 while (1) { 1490 if (eql(e->key, key)) return e; 1491 if ((e = e->next) == NULL) return NULL; 1492 } 1493} 1494 1495static upb_tabent *findentry_mutable(upb_table *t, lookupkey_t key, 1496 uint32_t hash, eqlfunc_t *eql) { 1497 return (upb_tabent*)findentry(t, key, hash, eql); 1498} 1499 1500static bool lookup(const upb_table *t, lookupkey_t key, upb_value *v, 1501 uint32_t hash, eqlfunc_t *eql) { 1502 const upb_tabent *e = findentry(t, key, hash, eql); 1503 if (e) { 1504 if (v) { 1505 _upb_value_setval(v, e->val.val); 1506 } 1507 return true; 1508 } else { 1509 return false; 1510 } 1511} 1512 1513/* The given key must not already exist in the table. */ 1514static void insert(upb_table *t, lookupkey_t key, upb_tabkey tabkey, 1515 upb_value val, uint32_t hash, 1516 hashfunc_t *hashfunc, eqlfunc_t *eql) { 1517 upb_tabent *mainpos_e; 1518 upb_tabent *our_e; 1519 1520 UPB_ASSERT(findentry(t, key, hash, eql) == NULL); 1521 1522 t->count++; 1523 mainpos_e = getentry_mutable(t, hash); 1524 our_e = mainpos_e; 1525 1526 if (upb_tabent_isempty(mainpos_e)) { 1527 /* Our main position is empty; use it. */ 1528 our_e->next = NULL; 1529 } else { 1530 /* Collision. */ 1531 upb_tabent *new_e = emptyent(t); 1532 /* Head of collider's chain. */ 1533 upb_tabent *chain = getentry_mutable(t, hashfunc(mainpos_e->key)); 1534 if (chain == mainpos_e) { 1535 /* Existing ent is in its main posisiton (it has the same hash as us, and 1536 * is the head of our chain). Insert to new ent and append to this chain. */ 1537 new_e->next = mainpos_e->next; 1538 mainpos_e->next = new_e; 1539 our_e = new_e; 1540 } else { 1541 /* Existing ent is not in its main position (it is a node in some other 1542 * chain). This implies that no existing ent in the table has our hash. 1543 * Evict it (updating its chain) and use its ent for head of our chain. */ 1544 *new_e = *mainpos_e; /* copies next. */ 1545 while (chain->next != mainpos_e) { 1546 chain = (upb_tabent*)chain->next; 1547 UPB_ASSERT(chain); 1548 } 1549 chain->next = new_e; 1550 our_e = mainpos_e; 1551 our_e->next = NULL; 1552 } 1553 } 1554 our_e->key = tabkey; 1555 our_e->val.val = val.val; 1556 UPB_ASSERT(findentry(t, key, hash, eql) == our_e); 1557} 1558 1559static bool rm(upb_table *t, lookupkey_t key, upb_value *val, 1560 upb_tabkey *removed, uint32_t hash, eqlfunc_t *eql) { 1561 upb_tabent *chain = getentry_mutable(t, hash); 1562 if (upb_tabent_isempty(chain)) return false; 1563 if (eql(chain->key, key)) { 1564 /* Element to remove is at the head of its chain. */ 1565 t->count--; 1566 if (val) _upb_value_setval(val, chain->val.val); 1567 if (removed) *removed = chain->key; 1568 if (chain->next) { 1569 upb_tabent *move = (upb_tabent*)chain->next; 1570 *chain = *move; 1571 move->key = 0; /* Make the slot empty. */ 1572 } else { 1573 chain->key = 0; /* Make the slot empty. */ 1574 } 1575 return true; 1576 } else { 1577 /* Element to remove is either in a non-head position or not in the 1578 * table. */ 1579 while (chain->next && !eql(chain->next->key, key)) { 1580 chain = (upb_tabent*)chain->next; 1581 } 1582 if (chain->next) { 1583 /* Found element to remove. */ 1584 upb_tabent *rm = (upb_tabent*)chain->next; 1585 t->count--; 1586 if (val) _upb_value_setval(val, chain->next->val.val); 1587 if (removed) *removed = rm->key; 1588 rm->key = 0; /* Make the slot empty. */ 1589 chain->next = rm->next; 1590 return true; 1591 } else { 1592 /* Element to remove is not in the table. */ 1593 return false; 1594 } 1595 } 1596} 1597 1598static size_t next(const upb_table *t, size_t i) { 1599 do { 1600 if (++i >= upb_table_size(t)) 1601 return SIZE_MAX; 1602 } while(upb_tabent_isempty(&t->entries[i])); 1603 1604 return i; 1605} 1606 1607static size_t begin(const upb_table *t) { 1608 return next(t, -1); 1609} 1610 1611 1612/* upb_strtable ***************************************************************/ 1613 1614/* A simple "subclass" of upb_table that only adds a hash function for strings. */ 1615 1616static upb_tabkey strcopy(lookupkey_t k2, upb_alloc *a) { 1617 uint32_t len = (uint32_t) k2.str.len; 1618 char *str = upb_malloc(a, k2.str.len + sizeof(uint32_t) + 1); 1619 if (str == NULL) return 0; 1620 memcpy(str, &len, sizeof(uint32_t)); 1621 memcpy(str + sizeof(uint32_t), k2.str.str, k2.str.len); 1622 str[sizeof(uint32_t) + k2.str.len] = '\0'; 1623 return (uintptr_t)str; 1624} 1625 1626static uint32_t strhash(upb_tabkey key) { 1627 uint32_t len; 1628 char *str = upb_tabstr(key, &len); 1629 return upb_murmur_hash2(str, len, 0); 1630} 1631 1632static bool streql(upb_tabkey k1, lookupkey_t k2) { 1633 uint32_t len; 1634 char *str = upb_tabstr(k1, &len); 1635 return len == k2.str.len && memcmp(str, k2.str.str, len) == 0; 1636} 1637 1638bool upb_strtable_init2(upb_strtable *t, upb_ctype_t ctype, upb_alloc *a) { 1639 return init(&t->t, 2, a); 1640} 1641 1642void upb_strtable_clear(upb_strtable *t) { 1643 size_t bytes = upb_table_size(&t->t) * sizeof(upb_tabent); 1644 t->t.count = 0; 1645 memset((char*)t->t.entries, 0, bytes); 1646} 1647 1648void upb_strtable_uninit2(upb_strtable *t, upb_alloc *a) { 1649 size_t i; 1650 for (i = 0; i < upb_table_size(&t->t); i++) 1651 upb_free(a, (void*)t->t.entries[i].key); 1652 uninit(&t->t, a); 1653} 1654 1655bool upb_strtable_resize(upb_strtable *t, size_t size_lg2, upb_alloc *a) { 1656 upb_strtable new_table; 1657 upb_strtable_iter i; 1658 1659 if (!init(&new_table.t, size_lg2, a)) 1660 return false; 1661 upb_strtable_begin(&i, t); 1662 for ( ; !upb_strtable_done(&i); upb_strtable_next(&i)) { 1663 upb_strview key = upb_strtable_iter_key(&i); 1664 upb_strtable_insert3( 1665 &new_table, key.data, key.size, 1666 upb_strtable_iter_value(&i), a); 1667 } 1668 upb_strtable_uninit2(t, a); 1669 *t = new_table; 1670 return true; 1671} 1672 1673bool upb_strtable_insert3(upb_strtable *t, const char *k, size_t len, 1674 upb_value v, upb_alloc *a) { 1675 lookupkey_t key; 1676 upb_tabkey tabkey; 1677 uint32_t hash; 1678 1679 if (isfull(&t->t)) { 1680 /* Need to resize. New table of double the size, add old elements to it. */ 1681 if (!upb_strtable_resize(t, t->t.size_lg2 + 1, a)) { 1682 return false; 1683 } 1684 } 1685 1686 key = strkey2(k, len); 1687 tabkey = strcopy(key, a); 1688 if (tabkey == 0) return false; 1689 1690 hash = upb_murmur_hash2(key.str.str, key.str.len, 0); 1691 insert(&t->t, key, tabkey, v, hash, &strhash, &streql); 1692 return true; 1693} 1694 1695bool upb_strtable_lookup2(const upb_strtable *t, const char *key, size_t len, 1696 upb_value *v) { 1697 uint32_t hash = upb_murmur_hash2(key, len, 0); 1698 return lookup(&t->t, strkey2(key, len), v, hash, &streql); 1699} 1700 1701bool upb_strtable_remove3(upb_strtable *t, const char *key, size_t len, 1702 upb_value *val, upb_alloc *alloc) { 1703 uint32_t hash = upb_murmur_hash2(key, len, 0); 1704 upb_tabkey tabkey; 1705 if (rm(&t->t, strkey2(key, len), val, &tabkey, hash, &streql)) { 1706 if (alloc) { 1707 /* Arena-based allocs don't need to free and won't pass this. */ 1708 upb_free(alloc, (void*)tabkey); 1709 } 1710 return true; 1711 } else { 1712 return false; 1713 } 1714} 1715 1716/* Iteration */ 1717 1718void upb_strtable_begin(upb_strtable_iter *i, const upb_strtable *t) { 1719 i->t = t; 1720 i->index = begin(&t->t); 1721} 1722 1723void upb_strtable_next(upb_strtable_iter *i) { 1724 i->index = next(&i->t->t, i->index); 1725} 1726 1727bool upb_strtable_done(const upb_strtable_iter *i) { 1728 if (!i->t) return true; 1729 return i->index >= upb_table_size(&i->t->t) || 1730 upb_tabent_isempty(str_tabent(i)); 1731} 1732 1733upb_strview upb_strtable_iter_key(const upb_strtable_iter *i) { 1734 upb_strview key; 1735 uint32_t len; 1736 UPB_ASSERT(!upb_strtable_done(i)); 1737 key.data = upb_tabstr(str_tabent(i)->key, &len); 1738 key.size = len; 1739 return key; 1740} 1741 1742upb_value upb_strtable_iter_value(const upb_strtable_iter *i) { 1743 UPB_ASSERT(!upb_strtable_done(i)); 1744 return _upb_value_val(str_tabent(i)->val.val); 1745} 1746 1747void upb_strtable_iter_setdone(upb_strtable_iter *i) { 1748 i->t = NULL; 1749 i->index = SIZE_MAX; 1750} 1751 1752bool upb_strtable_iter_isequal(const upb_strtable_iter *i1, 1753 const upb_strtable_iter *i2) { 1754 if (upb_strtable_done(i1) && upb_strtable_done(i2)) 1755 return true; 1756 return i1->t == i2->t && i1->index == i2->index; 1757} 1758 1759 1760/* upb_inttable ***************************************************************/ 1761 1762/* For inttables we use a hybrid structure where small keys are kept in an 1763 * array and large keys are put in the hash table. */ 1764 1765static uint32_t inthash(upb_tabkey key) { return upb_inthash(key); } 1766 1767static bool inteql(upb_tabkey k1, lookupkey_t k2) { 1768 return k1 == k2.num; 1769} 1770 1771static upb_tabval *mutable_array(upb_inttable *t) { 1772 return (upb_tabval*)t->array; 1773} 1774 1775static upb_tabval *inttable_val(upb_inttable *t, uintptr_t key) { 1776 if (key < t->array_size) { 1777 return upb_arrhas(t->array[key]) ? &(mutable_array(t)[key]) : NULL; 1778 } else { 1779 upb_tabent *e = 1780 findentry_mutable(&t->t, intkey(key), upb_inthash(key), &inteql); 1781 return e ? &e->val : NULL; 1782 } 1783} 1784 1785static const upb_tabval *inttable_val_const(const upb_inttable *t, 1786 uintptr_t key) { 1787 return inttable_val((upb_inttable*)t, key); 1788} 1789 1790size_t upb_inttable_count(const upb_inttable *t) { 1791 return t->t.count + t->array_count; 1792} 1793 1794static void check(upb_inttable *t) { 1795 UPB_UNUSED(t); 1796#if defined(UPB_DEBUG_TABLE) && !defined(NDEBUG) 1797 { 1798 /* This check is very expensive (makes inserts/deletes O(N)). */ 1799 size_t count = 0; 1800 upb_inttable_iter i; 1801 upb_inttable_begin(&i, t); 1802 for(; !upb_inttable_done(&i); upb_inttable_next(&i), count++) { 1803 UPB_ASSERT(upb_inttable_lookup(t, upb_inttable_iter_key(&i), NULL)); 1804 } 1805 UPB_ASSERT(count == upb_inttable_count(t)); 1806 } 1807#endif 1808} 1809 1810bool upb_inttable_sizedinit(upb_inttable *t, size_t asize, int hsize_lg2, 1811 upb_alloc *a) { 1812 size_t array_bytes; 1813 1814 if (!init(&t->t, hsize_lg2, a)) return false; 1815 /* Always make the array part at least 1 long, so that we know key 0 1816 * won't be in the hash part, which simplifies things. */ 1817 t->array_size = UPB_MAX(1, asize); 1818 t->array_count = 0; 1819 array_bytes = t->array_size * sizeof(upb_value); 1820 t->array = upb_malloc(a, array_bytes); 1821 if (!t->array) { 1822 uninit(&t->t, a); 1823 return false; 1824 } 1825 memset(mutable_array(t), 0xff, array_bytes); 1826 check(t); 1827 return true; 1828} 1829 1830bool upb_inttable_init2(upb_inttable *t, upb_ctype_t ctype, upb_alloc *a) { 1831 return upb_inttable_sizedinit(t, 0, 4, a); 1832} 1833 1834void upb_inttable_uninit2(upb_inttable *t, upb_alloc *a) { 1835 uninit(&t->t, a); 1836 upb_free(a, mutable_array(t)); 1837} 1838 1839bool upb_inttable_insert2(upb_inttable *t, uintptr_t key, upb_value val, 1840 upb_alloc *a) { 1841 upb_tabval tabval; 1842 tabval.val = val.val; 1843 UPB_ASSERT(upb_arrhas(tabval)); /* This will reject (uint64_t)-1. Fix this. */ 1844 1845 if (key < t->array_size) { 1846 UPB_ASSERT(!upb_arrhas(t->array[key])); 1847 t->array_count++; 1848 mutable_array(t)[key].val = val.val; 1849 } else { 1850 if (isfull(&t->t)) { 1851 /* Need to resize the hash part, but we re-use the array part. */ 1852 size_t i; 1853 upb_table new_table; 1854 1855 if (!init(&new_table, t->t.size_lg2 + 1, a)) { 1856 return false; 1857 } 1858 1859 for (i = begin(&t->t); i < upb_table_size(&t->t); i = next(&t->t, i)) { 1860 const upb_tabent *e = &t->t.entries[i]; 1861 uint32_t hash; 1862 upb_value v; 1863 1864 _upb_value_setval(&v, e->val.val); 1865 hash = upb_inthash(e->key); 1866 insert(&new_table, intkey(e->key), e->key, v, hash, &inthash, &inteql); 1867 } 1868 1869 UPB_ASSERT(t->t.count == new_table.count); 1870 1871 uninit(&t->t, a); 1872 t->t = new_table; 1873 } 1874 insert(&t->t, intkey(key), key, val, upb_inthash(key), &inthash, &inteql); 1875 } 1876 check(t); 1877 return true; 1878} 1879 1880bool upb_inttable_lookup(const upb_inttable *t, uintptr_t key, upb_value *v) { 1881 const upb_tabval *table_v = inttable_val_const(t, key); 1882 if (!table_v) return false; 1883 if (v) _upb_value_setval(v, table_v->val); 1884 return true; 1885} 1886 1887bool upb_inttable_replace(upb_inttable *t, uintptr_t key, upb_value val) { 1888 upb_tabval *table_v = inttable_val(t, key); 1889 if (!table_v) return false; 1890 table_v->val = val.val; 1891 return true; 1892} 1893 1894bool upb_inttable_remove(upb_inttable *t, uintptr_t key, upb_value *val) { 1895 bool success; 1896 if (key < t->array_size) { 1897 if (upb_arrhas(t->array[key])) { 1898 upb_tabval empty = UPB_TABVALUE_EMPTY_INIT; 1899 t->array_count--; 1900 if (val) { 1901 _upb_value_setval(val, t->array[key].val); 1902 } 1903 mutable_array(t)[key] = empty; 1904 success = true; 1905 } else { 1906 success = false; 1907 } 1908 } else { 1909 success = rm(&t->t, intkey(key), val, NULL, upb_inthash(key), &inteql); 1910 } 1911 check(t); 1912 return success; 1913} 1914 1915bool upb_inttable_push2(upb_inttable *t, upb_value val, upb_alloc *a) { 1916 return upb_inttable_insert2(t, upb_inttable_count(t), val, a); 1917} 1918 1919upb_value upb_inttable_pop(upb_inttable *t) { 1920 upb_value val; 1921 bool ok = upb_inttable_remove(t, upb_inttable_count(t) - 1, &val); 1922 UPB_ASSERT(ok); 1923 return val; 1924} 1925 1926bool upb_inttable_insertptr2(upb_inttable *t, const void *key, upb_value val, 1927 upb_alloc *a) { 1928 return upb_inttable_insert2(t, (uintptr_t)key, val, a); 1929} 1930 1931bool upb_inttable_lookupptr(const upb_inttable *t, const void *key, 1932 upb_value *v) { 1933 return upb_inttable_lookup(t, (uintptr_t)key, v); 1934} 1935 1936bool upb_inttable_removeptr(upb_inttable *t, const void *key, upb_value *val) { 1937 return upb_inttable_remove(t, (uintptr_t)key, val); 1938} 1939 1940void upb_inttable_compact2(upb_inttable *t, upb_alloc *a) { 1941 /* A power-of-two histogram of the table keys. */ 1942 size_t counts[UPB_MAXARRSIZE + 1] = {0}; 1943 1944 /* The max key in each bucket. */ 1945 uintptr_t max[UPB_MAXARRSIZE + 1] = {0}; 1946 1947 upb_inttable_iter i; 1948 size_t arr_count; 1949 int size_lg2; 1950 upb_inttable new_t; 1951 1952 upb_inttable_begin(&i, t); 1953 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { 1954 uintptr_t key = upb_inttable_iter_key(&i); 1955 int bucket = log2ceil(key); 1956 max[bucket] = UPB_MAX(max[bucket], key); 1957 counts[bucket]++; 1958 } 1959 1960 /* Find the largest power of two that satisfies the MIN_DENSITY 1961 * definition (while actually having some keys). */ 1962 arr_count = upb_inttable_count(t); 1963 1964 for (size_lg2 = ARRAY_SIZE(counts) - 1; size_lg2 > 0; size_lg2--) { 1965 if (counts[size_lg2] == 0) { 1966 /* We can halve again without losing any entries. */ 1967 continue; 1968 } else if (arr_count >= (1 << size_lg2) * MIN_DENSITY) { 1969 break; 1970 } 1971 1972 arr_count -= counts[size_lg2]; 1973 } 1974 1975 UPB_ASSERT(arr_count <= upb_inttable_count(t)); 1976 1977 { 1978 /* Insert all elements into new, perfectly-sized table. */ 1979 size_t arr_size = max[size_lg2] + 1; /* +1 so arr[max] will fit. */ 1980 size_t hash_count = upb_inttable_count(t) - arr_count; 1981 size_t hash_size = hash_count ? (hash_count / MAX_LOAD) + 1 : 0; 1982 int hashsize_lg2 = log2ceil(hash_size); 1983 1984 upb_inttable_sizedinit(&new_t, arr_size, hashsize_lg2, a); 1985 upb_inttable_begin(&i, t); 1986 for (; !upb_inttable_done(&i); upb_inttable_next(&i)) { 1987 uintptr_t k = upb_inttable_iter_key(&i); 1988 upb_inttable_insert2(&new_t, k, upb_inttable_iter_value(&i), a); 1989 } 1990 UPB_ASSERT(new_t.array_size == arr_size); 1991 UPB_ASSERT(new_t.t.size_lg2 == hashsize_lg2); 1992 } 1993 upb_inttable_uninit2(t, a); 1994 *t = new_t; 1995} 1996 1997/* Iteration. */ 1998 1999static const upb_tabent *int_tabent(const upb_inttable_iter *i) { 2000 UPB_ASSERT(!i->array_part); 2001 return &i->t->t.entries[i->index]; 2002} 2003 2004static upb_tabval int_arrent(const upb_inttable_iter *i) { 2005 UPB_ASSERT(i->array_part); 2006 return i->t->array[i->index]; 2007} 2008 2009void upb_inttable_begin(upb_inttable_iter *i, const upb_inttable *t) { 2010 i->t = t; 2011 i->index = -1; 2012 i->array_part = true; 2013 upb_inttable_next(i); 2014} 2015 2016void upb_inttable_next(upb_inttable_iter *iter) { 2017 const upb_inttable *t = iter->t; 2018 if (iter->array_part) { 2019 while (++iter->index < t->array_size) { 2020 if (upb_arrhas(int_arrent(iter))) { 2021 return; 2022 } 2023 } 2024 iter->array_part = false; 2025 iter->index = begin(&t->t); 2026 } else { 2027 iter->index = next(&t->t, iter->index); 2028 } 2029} 2030 2031bool upb_inttable_done(const upb_inttable_iter *i) { 2032 if (!i->t) return true; 2033 if (i->array_part) { 2034 return i->index >= i->t->array_size || 2035 !upb_arrhas(int_arrent(i)); 2036 } else { 2037 return i->index >= upb_table_size(&i->t->t) || 2038 upb_tabent_isempty(int_tabent(i)); 2039 } 2040} 2041 2042uintptr_t upb_inttable_iter_key(const upb_inttable_iter *i) { 2043 UPB_ASSERT(!upb_inttable_done(i)); 2044 return i->array_part ? i->index : int_tabent(i)->key; 2045} 2046 2047upb_value upb_inttable_iter_value(const upb_inttable_iter *i) { 2048 UPB_ASSERT(!upb_inttable_done(i)); 2049 return _upb_value_val( 2050 i->array_part ? i->t->array[i->index].val : int_tabent(i)->val.val); 2051} 2052 2053void upb_inttable_iter_setdone(upb_inttable_iter *i) { 2054 i->t = NULL; 2055 i->index = SIZE_MAX; 2056 i->array_part = false; 2057} 2058 2059bool upb_inttable_iter_isequal(const upb_inttable_iter *i1, 2060 const upb_inttable_iter *i2) { 2061 if (upb_inttable_done(i1) && upb_inttable_done(i2)) 2062 return true; 2063 return i1->t == i2->t && i1->index == i2->index && 2064 i1->array_part == i2->array_part; 2065} 2066 2067#if defined(UPB_UNALIGNED_READS_OK) || defined(__s390x__) 2068/* ----------------------------------------------------------------------------- 2069 * MurmurHash2, by Austin Appleby (released as public domain). 2070 * Reformatted and C99-ified by Joshua Haberman. 2071 * Note - This code makes a few assumptions about how your machine behaves - 2072 * 1. We can read a 4-byte value from any address without crashing 2073 * 2. sizeof(int) == 4 (in upb this limitation is removed by using uint32_t 2074 * And it has a few limitations - 2075 * 1. It will not work incrementally. 2076 * 2. It will not produce the same results on little-endian and big-endian 2077 * machines. */ 2078uint32_t upb_murmur_hash2(const void *key, size_t len, uint32_t seed) { 2079 /* 'm' and 'r' are mixing constants generated offline. 2080 * They're not really 'magic', they just happen to work well. */ 2081 const uint32_t m = 0x5bd1e995; 2082 const int32_t r = 24; 2083 2084 /* Initialize the hash to a 'random' value */ 2085 uint32_t h = seed ^ len; 2086 2087 /* Mix 4 bytes at a time into the hash */ 2088 const uint8_t * data = (const uint8_t *)key; 2089 while(len >= 4) { 2090 uint32_t k; 2091 memcpy(&k, data, sizeof(k)); 2092 2093 k *= m; 2094 k ^= k >> r; 2095 k *= m; 2096 2097 h *= m; 2098 h ^= k; 2099 2100 data += 4; 2101 len -= 4; 2102 } 2103 2104 /* Handle the last few bytes of the input array */ 2105 switch(len) { 2106 case 3: h ^= data[2] << 16; 2107 case 2: h ^= data[1] << 8; 2108 case 1: h ^= data[0]; h *= m; 2109 }; 2110 2111 /* Do a few final mixes of the hash to ensure the last few 2112 * bytes are well-incorporated. */ 2113 h ^= h >> 13; 2114 h *= m; 2115 h ^= h >> 15; 2116 2117 return h; 2118} 2119 2120#else /* !UPB_UNALIGNED_READS_OK */ 2121 2122/* ----------------------------------------------------------------------------- 2123 * MurmurHashAligned2, by Austin Appleby 2124 * Same algorithm as MurmurHash2, but only does aligned reads - should be safer 2125 * on certain platforms. 2126 * Performance will be lower than MurmurHash2 */ 2127 2128#define MIX(h,k,m) { k *= m; k ^= k >> r; k *= m; h *= m; h ^= k; } 2129 2130uint32_t upb_murmur_hash2(const void * key, size_t len, uint32_t seed) { 2131 const uint32_t m = 0x5bd1e995; 2132 const int32_t r = 24; 2133 const uint8_t * data = (const uint8_t *)key; 2134 uint32_t h = (uint32_t)(seed ^ len); 2135 uint8_t align = (uintptr_t)data & 3; 2136 2137 if(align && (len >= 4)) { 2138 /* Pre-load the temp registers */ 2139 uint32_t t = 0, d = 0; 2140 int32_t sl; 2141 int32_t sr; 2142 2143 switch(align) { 2144 case 1: t |= data[2] << 16; 2145 case 2: t |= data[1] << 8; 2146 case 3: t |= data[0]; 2147 } 2148 2149 t <<= (8 * align); 2150 2151 data += 4-align; 2152 len -= 4-align; 2153 2154 sl = 8 * (4-align); 2155 sr = 8 * align; 2156 2157 /* Mix */ 2158 2159 while(len >= 4) { 2160 uint32_t k; 2161 2162 d = *(uint32_t *)data; 2163 t = (t >> sr) | (d << sl); 2164 2165 k = t; 2166 2167 MIX(h,k,m); 2168 2169 t = d; 2170 2171 data += 4; 2172 len -= 4; 2173 } 2174 2175 /* Handle leftover data in temp registers */ 2176 2177 d = 0; 2178 2179 if(len >= align) { 2180 uint32_t k; 2181 2182 switch(align) { 2183 case 3: d |= data[2] << 16; 2184 case 2: d |= data[1] << 8; 2185 case 1: d |= data[0]; 2186 } 2187 2188 k = (t >> sr) | (d << sl); 2189 MIX(h,k,m); 2190 2191 data += align; 2192 len -= align; 2193 2194 /* ---------- 2195 * Handle tail bytes */ 2196 2197 switch(len) { 2198 case 3: h ^= data[2] << 16; 2199 case 2: h ^= data[1] << 8; 2200 case 1: h ^= data[0]; h *= m; 2201 }; 2202 } else { 2203 switch(len) { 2204 case 3: d |= data[2] << 16; 2205 case 2: d |= data[1] << 8; 2206 case 1: d |= data[0]; 2207 case 0: h ^= (t >> sr) | (d << sl); h *= m; 2208 } 2209 } 2210 2211 h ^= h >> 13; 2212 h *= m; 2213 h ^= h >> 15; 2214 2215 return h; 2216 } else { 2217 while(len >= 4) { 2218 uint32_t k = *(uint32_t *)data; 2219 2220 MIX(h,k,m); 2221 2222 data += 4; 2223 len -= 4; 2224 } 2225 2226 /* ---------- 2227 * Handle tail bytes */ 2228 2229 switch(len) { 2230 case 3: h ^= data[2] << 16; 2231 case 2: h ^= data[1] << 8; 2232 case 1: h ^= data[0]; h *= m; 2233 }; 2234 2235 h ^= h >> 13; 2236 h *= m; 2237 h ^= h >> 15; 2238 2239 return h; 2240 } 2241} 2242#undef MIX 2243 2244#endif /* UPB_UNALIGNED_READS_OK */ 2245 2246 2247#include <errno.h> 2248#include <stdarg.h> 2249#include <stddef.h> 2250#include <stdint.h> 2251#include <stdio.h> 2252#include <stdlib.h> 2253#include <string.h> 2254 2255 2256/* upb_status *****************************************************************/ 2257 2258void upb_status_clear(upb_status *status) { 2259 if (!status) return; 2260 status->ok = true; 2261 status->msg[0] = '\0'; 2262} 2263 2264bool upb_ok(const upb_status *status) { return status->ok; } 2265 2266const char *upb_status_errmsg(const upb_status *status) { return status->msg; } 2267 2268void upb_status_seterrmsg(upb_status *status, const char *msg) { 2269 if (!status) return; 2270 status->ok = false; 2271 strncpy(status->msg, msg, UPB_STATUS_MAX_MESSAGE - 1); 2272 status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; 2273} 2274 2275void upb_status_seterrf(upb_status *status, const char *fmt, ...) { 2276 va_list args; 2277 va_start(args, fmt); 2278 upb_status_vseterrf(status, fmt, args); 2279 va_end(args); 2280} 2281 2282void upb_status_vseterrf(upb_status *status, const char *fmt, va_list args) { 2283 if (!status) return; 2284 status->ok = false; 2285 _upb_vsnprintf(status->msg, sizeof(status->msg), fmt, args); 2286 status->msg[UPB_STATUS_MAX_MESSAGE - 1] = '\0'; 2287} 2288 2289/* upb_alloc ******************************************************************/ 2290 2291static void *upb_global_allocfunc(upb_alloc *alloc, void *ptr, size_t oldsize, 2292 size_t size) { 2293 UPB_UNUSED(alloc); 2294 UPB_UNUSED(oldsize); 2295 if (size == 0) { 2296 free(ptr); 2297 return NULL; 2298 } else { 2299 return realloc(ptr, size); 2300 } 2301} 2302 2303upb_alloc upb_alloc_global = {&upb_global_allocfunc}; 2304 2305/* upb_arena ******************************************************************/ 2306 2307/* Be conservative and choose 16 in case anyone is using SSE. */ 2308 2309struct upb_arena { 2310 _upb_arena_head head; 2311 char *start; 2312 2313 /* Allocator to allocate arena blocks. We are responsible for freeing these 2314 * when we are destroyed. */ 2315 upb_alloc *block_alloc; 2316 2317 size_t bytes_allocated; 2318 size_t next_block_size; 2319 size_t max_block_size; 2320 2321 /* Linked list of blocks. Points to an arena_block, defined in env.c */ 2322 void *block_head; 2323 2324 /* Cleanup entries. Pointer to a cleanup_ent, defined in env.c */ 2325 void *cleanup_head; 2326}; 2327 2328typedef struct mem_block { 2329 struct mem_block *next; 2330 bool owned; 2331 /* Data follows. */ 2332} mem_block; 2333 2334typedef struct cleanup_ent { 2335 struct cleanup_ent *next; 2336 upb_cleanup_func *cleanup; 2337 void *ud; 2338} cleanup_ent; 2339 2340static void upb_arena_addblock(upb_arena *a, void *ptr, size_t size, 2341 bool owned) { 2342 mem_block *block = ptr; 2343 2344 if (a->block_head) { 2345 a->bytes_allocated += a->head.ptr - a->start; 2346 } 2347 2348 block->next = a->block_head; 2349 block->owned = owned; 2350 2351 a->block_head = block; 2352 a->start = (char*)block + _upb_arena_alignup(sizeof(mem_block)); 2353 a->head.ptr = a->start; 2354 a->head.end = (char*)block + size; 2355 2356 /* TODO(haberman): ASAN poison. */ 2357} 2358 2359static mem_block *upb_arena_allocblock(upb_arena *a, size_t size) { 2360 size_t block_size = UPB_MAX(size, a->next_block_size) + sizeof(mem_block); 2361 mem_block *block = upb_malloc(a->block_alloc, block_size); 2362 2363 if (!block) { 2364 return NULL; 2365 } 2366 2367 upb_arena_addblock(a, block, block_size, true); 2368 a->next_block_size = UPB_MIN(block_size * 2, a->max_block_size); 2369 2370 return block; 2371} 2372 2373void *_upb_arena_slowmalloc(upb_arena *a, size_t size) { 2374 mem_block *block = upb_arena_allocblock(a, size); 2375 if (!block) return NULL; /* Out of memory. */ 2376 return upb_arena_malloc(a, size); 2377} 2378 2379static void *upb_arena_doalloc(upb_alloc *alloc, void *ptr, size_t oldsize, 2380 size_t size) { 2381 upb_arena *a = (upb_arena*)alloc; /* upb_alloc is initial member. */ 2382 void *ret; 2383 2384 if (size == 0) { 2385 return NULL; /* We are an arena, don't need individual frees. */ 2386 } 2387 2388 ret = upb_arena_malloc(a, size); 2389 if (!ret) return NULL; 2390 2391 /* TODO(haberman): special-case if this is a realloc of the last alloc? */ 2392 2393 if (oldsize > 0) { 2394 memcpy(ret, ptr, oldsize); /* Preserve existing data. */ 2395 } 2396 2397 /* TODO(haberman): ASAN unpoison. */ 2398 return ret; 2399} 2400 2401/* Public Arena API ***********************************************************/ 2402 2403#define upb_alignof(type) offsetof (struct { char c; type member; }, member) 2404 2405upb_arena *upb_arena_init(void *mem, size_t n, upb_alloc *alloc) { 2406 const size_t first_block_overhead = sizeof(upb_arena) + sizeof(mem_block); 2407 upb_arena *a; 2408 bool owned = false; 2409 2410 /* Round block size down to alignof(*a) since we will allocate the arena 2411 * itself at the end. */ 2412 n &= ~(upb_alignof(upb_arena) - 1); 2413 2414 if (n < first_block_overhead) { 2415 /* We need to malloc the initial block. */ 2416 n = first_block_overhead + 256; 2417 owned = true; 2418 if (!alloc || !(mem = upb_malloc(alloc, n))) { 2419 return NULL; 2420 } 2421 } 2422 2423 a = (void*)((char*)mem + n - sizeof(*a)); 2424 n -= sizeof(*a); 2425 2426 a->head.alloc.func = &upb_arena_doalloc; 2427 a->head.ptr = NULL; 2428 a->head.end = NULL; 2429 a->start = NULL; 2430 a->block_alloc = &upb_alloc_global; 2431 a->bytes_allocated = 0; 2432 a->next_block_size = 256; 2433 a->max_block_size = 16384; 2434 a->cleanup_head = NULL; 2435 a->block_head = NULL; 2436 a->block_alloc = alloc; 2437 2438 upb_arena_addblock(a, mem, n, owned); 2439 2440 return a; 2441} 2442 2443#undef upb_alignof 2444 2445void upb_arena_free(upb_arena *a) { 2446 cleanup_ent *ent = a->cleanup_head; 2447 mem_block *block = a->block_head; 2448 2449 while (ent) { 2450 ent->cleanup(ent->ud); 2451 ent = ent->next; 2452 } 2453 2454 /* Must do this after running cleanup functions, because this will delete 2455 * the memory we store our cleanup entries in! */ 2456 while (block) { 2457 /* Load first since we are deleting block. */ 2458 mem_block *next = block->next; 2459 2460 if (block->owned) { 2461 upb_free(a->block_alloc, block); 2462 } 2463 2464 block = next; 2465 } 2466} 2467 2468bool upb_arena_addcleanup(upb_arena *a, void *ud, upb_cleanup_func *func) { 2469 cleanup_ent *ent = upb_malloc(&a->head.alloc, sizeof(cleanup_ent)); 2470 if (!ent) { 2471 return false; /* Out of memory. */ 2472 } 2473 2474 ent->cleanup = func; 2475 ent->ud = ud; 2476 ent->next = a->cleanup_head; 2477 a->cleanup_head = ent; 2478 2479 return true; 2480} 2481 2482size_t upb_arena_bytesallocated(const upb_arena *a) { 2483 return a->bytes_allocated + (a->head.ptr - a->start); 2484} 2485/* This file was generated by upbc (the upb compiler) from the input 2486 * file: 2487 * 2488 * google/protobuf/descriptor.proto 2489 * 2490 * Do not edit -- your changes will be discarded when the file is 2491 * regenerated. */ 2492 2493#include <stddef.h> 2494 2495 2496static const upb_msglayout *const google_protobuf_FileDescriptorSet_submsgs[1] = { 2497 &google_protobuf_FileDescriptorProto_msginit, 2498}; 2499 2500static const upb_msglayout_field google_protobuf_FileDescriptorSet__fields[1] = { 2501 {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, 2502}; 2503 2504const upb_msglayout google_protobuf_FileDescriptorSet_msginit = { 2505 &google_protobuf_FileDescriptorSet_submsgs[0], 2506 &google_protobuf_FileDescriptorSet__fields[0], 2507 UPB_SIZE(4, 8), 1, false, 2508}; 2509 2510static const upb_msglayout *const google_protobuf_FileDescriptorProto_submsgs[6] = { 2511 &google_protobuf_DescriptorProto_msginit, 2512 &google_protobuf_EnumDescriptorProto_msginit, 2513 &google_protobuf_FieldDescriptorProto_msginit, 2514 &google_protobuf_FileOptions_msginit, 2515 &google_protobuf_ServiceDescriptorProto_msginit, 2516 &google_protobuf_SourceCodeInfo_msginit, 2517}; 2518 2519static const upb_msglayout_field google_protobuf_FileDescriptorProto__fields[12] = { 2520 {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2521 {2, UPB_SIZE(12, 24), 2, 0, 9, 1}, 2522 {3, UPB_SIZE(36, 72), 0, 0, 9, 3}, 2523 {4, UPB_SIZE(40, 80), 0, 0, 11, 3}, 2524 {5, UPB_SIZE(44, 88), 0, 1, 11, 3}, 2525 {6, UPB_SIZE(48, 96), 0, 4, 11, 3}, 2526 {7, UPB_SIZE(52, 104), 0, 2, 11, 3}, 2527 {8, UPB_SIZE(28, 56), 4, 3, 11, 1}, 2528 {9, UPB_SIZE(32, 64), 5, 5, 11, 1}, 2529 {10, UPB_SIZE(56, 112), 0, 0, 5, 3}, 2530 {11, UPB_SIZE(60, 120), 0, 0, 5, 3}, 2531 {12, UPB_SIZE(20, 40), 3, 0, 9, 1}, 2532}; 2533 2534const upb_msglayout google_protobuf_FileDescriptorProto_msginit = { 2535 &google_protobuf_FileDescriptorProto_submsgs[0], 2536 &google_protobuf_FileDescriptorProto__fields[0], 2537 UPB_SIZE(64, 128), 12, false, 2538}; 2539 2540static const upb_msglayout *const google_protobuf_DescriptorProto_submsgs[8] = { 2541 &google_protobuf_DescriptorProto_msginit, 2542 &google_protobuf_DescriptorProto_ExtensionRange_msginit, 2543 &google_protobuf_DescriptorProto_ReservedRange_msginit, 2544 &google_protobuf_EnumDescriptorProto_msginit, 2545 &google_protobuf_FieldDescriptorProto_msginit, 2546 &google_protobuf_MessageOptions_msginit, 2547 &google_protobuf_OneofDescriptorProto_msginit, 2548}; 2549 2550static const upb_msglayout_field google_protobuf_DescriptorProto__fields[10] = { 2551 {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2552 {2, UPB_SIZE(16, 32), 0, 4, 11, 3}, 2553 {3, UPB_SIZE(20, 40), 0, 0, 11, 3}, 2554 {4, UPB_SIZE(24, 48), 0, 3, 11, 3}, 2555 {5, UPB_SIZE(28, 56), 0, 1, 11, 3}, 2556 {6, UPB_SIZE(32, 64), 0, 4, 11, 3}, 2557 {7, UPB_SIZE(12, 24), 2, 5, 11, 1}, 2558 {8, UPB_SIZE(36, 72), 0, 6, 11, 3}, 2559 {9, UPB_SIZE(40, 80), 0, 2, 11, 3}, 2560 {10, UPB_SIZE(44, 88), 0, 0, 9, 3}, 2561}; 2562 2563const upb_msglayout google_protobuf_DescriptorProto_msginit = { 2564 &google_protobuf_DescriptorProto_submsgs[0], 2565 &google_protobuf_DescriptorProto__fields[0], 2566 UPB_SIZE(48, 96), 10, false, 2567}; 2568 2569static const upb_msglayout *const google_protobuf_DescriptorProto_ExtensionRange_submsgs[1] = { 2570 &google_protobuf_ExtensionRangeOptions_msginit, 2571}; 2572 2573static const upb_msglayout_field google_protobuf_DescriptorProto_ExtensionRange__fields[3] = { 2574 {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, 2575 {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, 2576 {3, UPB_SIZE(12, 16), 3, 0, 11, 1}, 2577}; 2578 2579const upb_msglayout google_protobuf_DescriptorProto_ExtensionRange_msginit = { 2580 &google_protobuf_DescriptorProto_ExtensionRange_submsgs[0], 2581 &google_protobuf_DescriptorProto_ExtensionRange__fields[0], 2582 UPB_SIZE(16, 24), 3, false, 2583}; 2584 2585static const upb_msglayout_field google_protobuf_DescriptorProto_ReservedRange__fields[2] = { 2586 {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, 2587 {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, 2588}; 2589 2590const upb_msglayout google_protobuf_DescriptorProto_ReservedRange_msginit = { 2591 NULL, 2592 &google_protobuf_DescriptorProto_ReservedRange__fields[0], 2593 UPB_SIZE(12, 12), 2, false, 2594}; 2595 2596static const upb_msglayout *const google_protobuf_ExtensionRangeOptions_submsgs[1] = { 2597 &google_protobuf_UninterpretedOption_msginit, 2598}; 2599 2600static const upb_msglayout_field google_protobuf_ExtensionRangeOptions__fields[1] = { 2601 {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, 2602}; 2603 2604const upb_msglayout google_protobuf_ExtensionRangeOptions_msginit = { 2605 &google_protobuf_ExtensionRangeOptions_submsgs[0], 2606 &google_protobuf_ExtensionRangeOptions__fields[0], 2607 UPB_SIZE(4, 8), 1, false, 2608}; 2609 2610static const upb_msglayout *const google_protobuf_FieldDescriptorProto_submsgs[1] = { 2611 &google_protobuf_FieldOptions_msginit, 2612}; 2613 2614static const upb_msglayout_field google_protobuf_FieldDescriptorProto__fields[11] = { 2615 {1, UPB_SIZE(36, 40), 6, 0, 9, 1}, 2616 {2, UPB_SIZE(44, 56), 7, 0, 9, 1}, 2617 {3, UPB_SIZE(24, 24), 3, 0, 5, 1}, 2618 {4, UPB_SIZE(8, 8), 1, 0, 14, 1}, 2619 {5, UPB_SIZE(16, 16), 2, 0, 14, 1}, 2620 {6, UPB_SIZE(52, 72), 8, 0, 9, 1}, 2621 {7, UPB_SIZE(60, 88), 9, 0, 9, 1}, 2622 {8, UPB_SIZE(76, 120), 11, 0, 11, 1}, 2623 {9, UPB_SIZE(28, 28), 4, 0, 5, 1}, 2624 {10, UPB_SIZE(68, 104), 10, 0, 9, 1}, 2625 {17, UPB_SIZE(32, 32), 5, 0, 8, 1}, 2626}; 2627 2628const upb_msglayout google_protobuf_FieldDescriptorProto_msginit = { 2629 &google_protobuf_FieldDescriptorProto_submsgs[0], 2630 &google_protobuf_FieldDescriptorProto__fields[0], 2631 UPB_SIZE(80, 128), 11, false, 2632}; 2633 2634static const upb_msglayout *const google_protobuf_OneofDescriptorProto_submsgs[1] = { 2635 &google_protobuf_OneofOptions_msginit, 2636}; 2637 2638static const upb_msglayout_field google_protobuf_OneofDescriptorProto__fields[2] = { 2639 {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2640 {2, UPB_SIZE(12, 24), 2, 0, 11, 1}, 2641}; 2642 2643const upb_msglayout google_protobuf_OneofDescriptorProto_msginit = { 2644 &google_protobuf_OneofDescriptorProto_submsgs[0], 2645 &google_protobuf_OneofDescriptorProto__fields[0], 2646 UPB_SIZE(16, 32), 2, false, 2647}; 2648 2649static const upb_msglayout *const google_protobuf_EnumDescriptorProto_submsgs[3] = { 2650 &google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit, 2651 &google_protobuf_EnumOptions_msginit, 2652 &google_protobuf_EnumValueDescriptorProto_msginit, 2653}; 2654 2655static const upb_msglayout_field google_protobuf_EnumDescriptorProto__fields[5] = { 2656 {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2657 {2, UPB_SIZE(16, 32), 0, 2, 11, 3}, 2658 {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, 2659 {4, UPB_SIZE(20, 40), 0, 0, 11, 3}, 2660 {5, UPB_SIZE(24, 48), 0, 0, 9, 3}, 2661}; 2662 2663const upb_msglayout google_protobuf_EnumDescriptorProto_msginit = { 2664 &google_protobuf_EnumDescriptorProto_submsgs[0], 2665 &google_protobuf_EnumDescriptorProto__fields[0], 2666 UPB_SIZE(32, 64), 5, false, 2667}; 2668 2669static const upb_msglayout_field google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[2] = { 2670 {1, UPB_SIZE(4, 4), 1, 0, 5, 1}, 2671 {2, UPB_SIZE(8, 8), 2, 0, 5, 1}, 2672}; 2673 2674const upb_msglayout google_protobuf_EnumDescriptorProto_EnumReservedRange_msginit = { 2675 NULL, 2676 &google_protobuf_EnumDescriptorProto_EnumReservedRange__fields[0], 2677 UPB_SIZE(12, 12), 2, false, 2678}; 2679 2680static const upb_msglayout *const google_protobuf_EnumValueDescriptorProto_submsgs[1] = { 2681 &google_protobuf_EnumValueOptions_msginit, 2682}; 2683 2684static const upb_msglayout_field google_protobuf_EnumValueDescriptorProto__fields[3] = { 2685 {1, UPB_SIZE(8, 8), 2, 0, 9, 1}, 2686 {2, UPB_SIZE(4, 4), 1, 0, 5, 1}, 2687 {3, UPB_SIZE(16, 24), 3, 0, 11, 1}, 2688}; 2689 2690const upb_msglayout google_protobuf_EnumValueDescriptorProto_msginit = { 2691 &google_protobuf_EnumValueDescriptorProto_submsgs[0], 2692 &google_protobuf_EnumValueDescriptorProto__fields[0], 2693 UPB_SIZE(24, 32), 3, false, 2694}; 2695 2696static const upb_msglayout *const google_protobuf_ServiceDescriptorProto_submsgs[2] = { 2697 &google_protobuf_MethodDescriptorProto_msginit, 2698 &google_protobuf_ServiceOptions_msginit, 2699}; 2700 2701static const upb_msglayout_field google_protobuf_ServiceDescriptorProto__fields[3] = { 2702 {1, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2703 {2, UPB_SIZE(16, 32), 0, 0, 11, 3}, 2704 {3, UPB_SIZE(12, 24), 2, 1, 11, 1}, 2705}; 2706 2707const upb_msglayout google_protobuf_ServiceDescriptorProto_msginit = { 2708 &google_protobuf_ServiceDescriptorProto_submsgs[0], 2709 &google_protobuf_ServiceDescriptorProto__fields[0], 2710 UPB_SIZE(24, 48), 3, false, 2711}; 2712 2713static const upb_msglayout *const google_protobuf_MethodDescriptorProto_submsgs[1] = { 2714 &google_protobuf_MethodOptions_msginit, 2715}; 2716 2717static const upb_msglayout_field google_protobuf_MethodDescriptorProto__fields[6] = { 2718 {1, UPB_SIZE(4, 8), 3, 0, 9, 1}, 2719 {2, UPB_SIZE(12, 24), 4, 0, 9, 1}, 2720 {3, UPB_SIZE(20, 40), 5, 0, 9, 1}, 2721 {4, UPB_SIZE(28, 56), 6, 0, 11, 1}, 2722 {5, UPB_SIZE(1, 1), 1, 0, 8, 1}, 2723 {6, UPB_SIZE(2, 2), 2, 0, 8, 1}, 2724}; 2725 2726const upb_msglayout google_protobuf_MethodDescriptorProto_msginit = { 2727 &google_protobuf_MethodDescriptorProto_submsgs[0], 2728 &google_protobuf_MethodDescriptorProto__fields[0], 2729 UPB_SIZE(32, 64), 6, false, 2730}; 2731 2732static const upb_msglayout *const google_protobuf_FileOptions_submsgs[1] = { 2733 &google_protobuf_UninterpretedOption_msginit, 2734}; 2735 2736static const upb_msglayout_field google_protobuf_FileOptions__fields[21] = { 2737 {1, UPB_SIZE(28, 32), 11, 0, 9, 1}, 2738 {8, UPB_SIZE(36, 48), 12, 0, 9, 1}, 2739 {9, UPB_SIZE(8, 8), 1, 0, 14, 1}, 2740 {10, UPB_SIZE(16, 16), 2, 0, 8, 1}, 2741 {11, UPB_SIZE(44, 64), 13, 0, 9, 1}, 2742 {16, UPB_SIZE(17, 17), 3, 0, 8, 1}, 2743 {17, UPB_SIZE(18, 18), 4, 0, 8, 1}, 2744 {18, UPB_SIZE(19, 19), 5, 0, 8, 1}, 2745 {20, UPB_SIZE(20, 20), 6, 0, 8, 1}, 2746 {23, UPB_SIZE(21, 21), 7, 0, 8, 1}, 2747 {27, UPB_SIZE(22, 22), 8, 0, 8, 1}, 2748 {31, UPB_SIZE(23, 23), 9, 0, 8, 1}, 2749 {36, UPB_SIZE(52, 80), 14, 0, 9, 1}, 2750 {37, UPB_SIZE(60, 96), 15, 0, 9, 1}, 2751 {39, UPB_SIZE(68, 112), 16, 0, 9, 1}, 2752 {40, UPB_SIZE(76, 128), 17, 0, 9, 1}, 2753 {41, UPB_SIZE(84, 144), 18, 0, 9, 1}, 2754 {42, UPB_SIZE(24, 24), 10, 0, 8, 1}, 2755 {44, UPB_SIZE(92, 160), 19, 0, 9, 1}, 2756 {45, UPB_SIZE(100, 176), 20, 0, 9, 1}, 2757 {999, UPB_SIZE(108, 192), 0, 0, 11, 3}, 2758}; 2759 2760const upb_msglayout google_protobuf_FileOptions_msginit = { 2761 &google_protobuf_FileOptions_submsgs[0], 2762 &google_protobuf_FileOptions__fields[0], 2763 UPB_SIZE(112, 208), 21, false, 2764}; 2765 2766static const upb_msglayout *const google_protobuf_MessageOptions_submsgs[1] = { 2767 &google_protobuf_UninterpretedOption_msginit, 2768}; 2769 2770static const upb_msglayout_field google_protobuf_MessageOptions__fields[5] = { 2771 {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, 2772 {2, UPB_SIZE(2, 2), 2, 0, 8, 1}, 2773 {3, UPB_SIZE(3, 3), 3, 0, 8, 1}, 2774 {7, UPB_SIZE(4, 4), 4, 0, 8, 1}, 2775 {999, UPB_SIZE(8, 8), 0, 0, 11, 3}, 2776}; 2777 2778const upb_msglayout google_protobuf_MessageOptions_msginit = { 2779 &google_protobuf_MessageOptions_submsgs[0], 2780 &google_protobuf_MessageOptions__fields[0], 2781 UPB_SIZE(12, 16), 5, false, 2782}; 2783 2784static const upb_msglayout *const google_protobuf_FieldOptions_submsgs[1] = { 2785 &google_protobuf_UninterpretedOption_msginit, 2786}; 2787 2788static const upb_msglayout_field google_protobuf_FieldOptions__fields[7] = { 2789 {1, UPB_SIZE(8, 8), 1, 0, 14, 1}, 2790 {2, UPB_SIZE(24, 24), 3, 0, 8, 1}, 2791 {3, UPB_SIZE(25, 25), 4, 0, 8, 1}, 2792 {5, UPB_SIZE(26, 26), 5, 0, 8, 1}, 2793 {6, UPB_SIZE(16, 16), 2, 0, 14, 1}, 2794 {10, UPB_SIZE(27, 27), 6, 0, 8, 1}, 2795 {999, UPB_SIZE(28, 32), 0, 0, 11, 3}, 2796}; 2797 2798const upb_msglayout google_protobuf_FieldOptions_msginit = { 2799 &google_protobuf_FieldOptions_submsgs[0], 2800 &google_protobuf_FieldOptions__fields[0], 2801 UPB_SIZE(32, 40), 7, false, 2802}; 2803 2804static const upb_msglayout *const google_protobuf_OneofOptions_submsgs[1] = { 2805 &google_protobuf_UninterpretedOption_msginit, 2806}; 2807 2808static const upb_msglayout_field google_protobuf_OneofOptions__fields[1] = { 2809 {999, UPB_SIZE(0, 0), 0, 0, 11, 3}, 2810}; 2811 2812const upb_msglayout google_protobuf_OneofOptions_msginit = { 2813 &google_protobuf_OneofOptions_submsgs[0], 2814 &google_protobuf_OneofOptions__fields[0], 2815 UPB_SIZE(4, 8), 1, false, 2816}; 2817 2818static const upb_msglayout *const google_protobuf_EnumOptions_submsgs[1] = { 2819 &google_protobuf_UninterpretedOption_msginit, 2820}; 2821 2822static const upb_msglayout_field google_protobuf_EnumOptions__fields[3] = { 2823 {2, UPB_SIZE(1, 1), 1, 0, 8, 1}, 2824 {3, UPB_SIZE(2, 2), 2, 0, 8, 1}, 2825 {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, 2826}; 2827 2828const upb_msglayout google_protobuf_EnumOptions_msginit = { 2829 &google_protobuf_EnumOptions_submsgs[0], 2830 &google_protobuf_EnumOptions__fields[0], 2831 UPB_SIZE(8, 16), 3, false, 2832}; 2833 2834static const upb_msglayout *const google_protobuf_EnumValueOptions_submsgs[1] = { 2835 &google_protobuf_UninterpretedOption_msginit, 2836}; 2837 2838static const upb_msglayout_field google_protobuf_EnumValueOptions__fields[2] = { 2839 {1, UPB_SIZE(1, 1), 1, 0, 8, 1}, 2840 {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, 2841}; 2842 2843const upb_msglayout google_protobuf_EnumValueOptions_msginit = { 2844 &google_protobuf_EnumValueOptions_submsgs[0], 2845 &google_protobuf_EnumValueOptions__fields[0], 2846 UPB_SIZE(8, 16), 2, false, 2847}; 2848 2849static const upb_msglayout *const google_protobuf_ServiceOptions_submsgs[1] = { 2850 &google_protobuf_UninterpretedOption_msginit, 2851}; 2852 2853static const upb_msglayout_field google_protobuf_ServiceOptions__fields[2] = { 2854 {33, UPB_SIZE(1, 1), 1, 0, 8, 1}, 2855 {999, UPB_SIZE(4, 8), 0, 0, 11, 3}, 2856}; 2857 2858const upb_msglayout google_protobuf_ServiceOptions_msginit = { 2859 &google_protobuf_ServiceOptions_submsgs[0], 2860 &google_protobuf_ServiceOptions__fields[0], 2861 UPB_SIZE(8, 16), 2, false, 2862}; 2863 2864static const upb_msglayout *const google_protobuf_MethodOptions_submsgs[1] = { 2865 &google_protobuf_UninterpretedOption_msginit, 2866}; 2867 2868static const upb_msglayout_field google_protobuf_MethodOptions__fields[3] = { 2869 {33, UPB_SIZE(16, 16), 2, 0, 8, 1}, 2870 {34, UPB_SIZE(8, 8), 1, 0, 14, 1}, 2871 {999, UPB_SIZE(20, 24), 0, 0, 11, 3}, 2872}; 2873 2874const upb_msglayout google_protobuf_MethodOptions_msginit = { 2875 &google_protobuf_MethodOptions_submsgs[0], 2876 &google_protobuf_MethodOptions__fields[0], 2877 UPB_SIZE(24, 32), 3, false, 2878}; 2879 2880static const upb_msglayout *const google_protobuf_UninterpretedOption_submsgs[1] = { 2881 &google_protobuf_UninterpretedOption_NamePart_msginit, 2882}; 2883 2884static const upb_msglayout_field google_protobuf_UninterpretedOption__fields[7] = { 2885 {2, UPB_SIZE(56, 80), 0, 0, 11, 3}, 2886 {3, UPB_SIZE(32, 32), 4, 0, 9, 1}, 2887 {4, UPB_SIZE(8, 8), 1, 0, 4, 1}, 2888 {5, UPB_SIZE(16, 16), 2, 0, 3, 1}, 2889 {6, UPB_SIZE(24, 24), 3, 0, 1, 1}, 2890 {7, UPB_SIZE(40, 48), 5, 0, 12, 1}, 2891 {8, UPB_SIZE(48, 64), 6, 0, 9, 1}, 2892}; 2893 2894const upb_msglayout google_protobuf_UninterpretedOption_msginit = { 2895 &google_protobuf_UninterpretedOption_submsgs[0], 2896 &google_protobuf_UninterpretedOption__fields[0], 2897 UPB_SIZE(64, 96), 7, false, 2898}; 2899 2900static const upb_msglayout_field google_protobuf_UninterpretedOption_NamePart__fields[2] = { 2901 {1, UPB_SIZE(4, 8), 2, 0, 9, 2}, 2902 {2, UPB_SIZE(1, 1), 1, 0, 8, 2}, 2903}; 2904 2905const upb_msglayout google_protobuf_UninterpretedOption_NamePart_msginit = { 2906 NULL, 2907 &google_protobuf_UninterpretedOption_NamePart__fields[0], 2908 UPB_SIZE(16, 32), 2, false, 2909}; 2910 2911static const upb_msglayout *const google_protobuf_SourceCodeInfo_submsgs[1] = { 2912 &google_protobuf_SourceCodeInfo_Location_msginit, 2913}; 2914 2915static const upb_msglayout_field google_protobuf_SourceCodeInfo__fields[1] = { 2916 {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, 2917}; 2918 2919const upb_msglayout google_protobuf_SourceCodeInfo_msginit = { 2920 &google_protobuf_SourceCodeInfo_submsgs[0], 2921 &google_protobuf_SourceCodeInfo__fields[0], 2922 UPB_SIZE(4, 8), 1, false, 2923}; 2924 2925static const upb_msglayout_field google_protobuf_SourceCodeInfo_Location__fields[5] = { 2926 {1, UPB_SIZE(20, 40), 0, 0, 5, _UPB_LABEL_PACKED}, 2927 {2, UPB_SIZE(24, 48), 0, 0, 5, _UPB_LABEL_PACKED}, 2928 {3, UPB_SIZE(4, 8), 1, 0, 9, 1}, 2929 {4, UPB_SIZE(12, 24), 2, 0, 9, 1}, 2930 {6, UPB_SIZE(28, 56), 0, 0, 9, 3}, 2931}; 2932 2933const upb_msglayout google_protobuf_SourceCodeInfo_Location_msginit = { 2934 NULL, 2935 &google_protobuf_SourceCodeInfo_Location__fields[0], 2936 UPB_SIZE(32, 64), 5, false, 2937}; 2938 2939static const upb_msglayout *const google_protobuf_GeneratedCodeInfo_submsgs[1] = { 2940 &google_protobuf_GeneratedCodeInfo_Annotation_msginit, 2941}; 2942 2943static const upb_msglayout_field google_protobuf_GeneratedCodeInfo__fields[1] = { 2944 {1, UPB_SIZE(0, 0), 0, 0, 11, 3}, 2945}; 2946 2947const upb_msglayout google_protobuf_GeneratedCodeInfo_msginit = { 2948 &google_protobuf_GeneratedCodeInfo_submsgs[0], 2949 &google_protobuf_GeneratedCodeInfo__fields[0], 2950 UPB_SIZE(4, 8), 1, false, 2951}; 2952 2953static const upb_msglayout_field google_protobuf_GeneratedCodeInfo_Annotation__fields[4] = { 2954 {1, UPB_SIZE(20, 32), 0, 0, 5, _UPB_LABEL_PACKED}, 2955 {2, UPB_SIZE(12, 16), 3, 0, 9, 1}, 2956 {3, UPB_SIZE(4, 4), 1, 0, 5, 1}, 2957 {4, UPB_SIZE(8, 8), 2, 0, 5, 1}, 2958}; 2959 2960const upb_msglayout google_protobuf_GeneratedCodeInfo_Annotation_msginit = { 2961 NULL, 2962 &google_protobuf_GeneratedCodeInfo_Annotation__fields[0], 2963 UPB_SIZE(24, 48), 4, false, 2964}; 2965 2966 2967 2968 2969#include <ctype.h> 2970#include <errno.h> 2971#include <stdlib.h> 2972#include <string.h> 2973 2974 2975typedef struct { 2976 size_t len; 2977 char str[1]; /* Null-terminated string data follows. */ 2978} str_t; 2979 2980static str_t *newstr(upb_alloc *alloc, const char *data, size_t len) { 2981 str_t *ret = upb_malloc(alloc, sizeof(*ret) + len); 2982 if (!ret) return NULL; 2983 ret->len = len; 2984 memcpy(ret->str, data, len); 2985 ret->str[len] = '\0'; 2986 return ret; 2987} 2988 2989struct upb_fielddef { 2990 const upb_filedef *file; 2991 const upb_msgdef *msgdef; 2992 const char *full_name; 2993 const char *json_name; 2994 union { 2995 int64_t sint; 2996 uint64_t uint; 2997 double dbl; 2998 float flt; 2999 bool boolean; 3000 str_t *str; 3001 } defaultval; 3002 const upb_oneofdef *oneof; 3003 union { 3004 const upb_msgdef *msgdef; 3005 const upb_enumdef *enumdef; 3006 const google_protobuf_FieldDescriptorProto *unresolved; 3007 } sub; 3008 uint32_t number_; 3009 uint16_t index_; 3010 uint16_t layout_index; 3011 uint32_t selector_base; /* Used to index into a upb::Handlers table. */ 3012 bool is_extension_; 3013 bool lazy_; 3014 bool packed_; 3015 bool proto3_optional_; 3016 upb_descriptortype_t type_; 3017 upb_label_t label_; 3018}; 3019 3020struct upb_msgdef { 3021 const upb_msglayout *layout; 3022 const upb_filedef *file; 3023 const char *full_name; 3024 uint32_t selector_count; 3025 uint32_t submsg_field_count; 3026 3027 /* Tables for looking up fields by number and name. */ 3028 upb_inttable itof; 3029 upb_strtable ntof; 3030 3031 const upb_fielddef *fields; 3032 const upb_oneofdef *oneofs; 3033 int field_count; 3034 int oneof_count; 3035 int real_oneof_count; 3036 3037 /* Is this a map-entry message? */ 3038 bool map_entry; 3039 upb_wellknowntype_t well_known_type; 3040 3041 /* TODO(haberman): proper extension ranges (there can be multiple). */ 3042}; 3043 3044struct upb_enumdef { 3045 const upb_filedef *file; 3046 const char *full_name; 3047 upb_strtable ntoi; 3048 upb_inttable iton; 3049 int32_t defaultval; 3050}; 3051 3052struct upb_oneofdef { 3053 const upb_msgdef *parent; 3054 const char *full_name; 3055 uint32_t index; 3056 upb_strtable ntof; 3057 upb_inttable itof; 3058}; 3059 3060struct upb_filedef { 3061 const char *name; 3062 const char *package; 3063 const char *phpprefix; 3064 const char *phpnamespace; 3065 upb_syntax_t syntax; 3066 3067 const upb_filedef **deps; 3068 const upb_msgdef *msgs; 3069 const upb_enumdef *enums; 3070 const upb_fielddef *exts; 3071 3072 int dep_count; 3073 int msg_count; 3074 int enum_count; 3075 int ext_count; 3076}; 3077 3078struct upb_symtab { 3079 upb_arena *arena; 3080 upb_strtable syms; /* full_name -> packed def ptr */ 3081 upb_strtable files; /* file_name -> upb_filedef* */ 3082}; 3083 3084/* Inside a symtab we store tagged pointers to specific def types. */ 3085typedef enum { 3086 UPB_DEFTYPE_FIELD = 0, 3087 3088 /* Only inside symtab table. */ 3089 UPB_DEFTYPE_MSG = 1, 3090 UPB_DEFTYPE_ENUM = 2, 3091 3092 /* Only inside message table. */ 3093 UPB_DEFTYPE_ONEOF = 1, 3094 UPB_DEFTYPE_FIELD_JSONNAME = 2 3095} upb_deftype_t; 3096 3097static const void *unpack_def(upb_value v, upb_deftype_t type) { 3098 uintptr_t num = (uintptr_t)upb_value_getconstptr(v); 3099 return (num & 3) == type ? (const void*)(num & ~3) : NULL; 3100} 3101 3102static upb_value pack_def(const void *ptr, upb_deftype_t type) { 3103 uintptr_t num = (uintptr_t)ptr | type; 3104 return upb_value_constptr((const void*)num); 3105} 3106 3107/* isalpha() etc. from <ctype.h> are locale-dependent, which we don't want. */ 3108static bool upb_isbetween(char c, char low, char high) { 3109 return c >= low && c <= high; 3110} 3111 3112static bool upb_isletter(char c) { 3113 return upb_isbetween(c, 'A', 'Z') || upb_isbetween(c, 'a', 'z') || c == '_'; 3114} 3115 3116static bool upb_isalphanum(char c) { 3117 return upb_isletter(c) || upb_isbetween(c, '0', '9'); 3118} 3119 3120static bool upb_isident(upb_strview name, bool full, upb_status *s) { 3121 const char *str = name.data; 3122 size_t len = name.size; 3123 bool start = true; 3124 size_t i; 3125 for (i = 0; i < len; i++) { 3126 char c = str[i]; 3127 if (c == '.') { 3128 if (start || !full) { 3129 upb_status_seterrf(s, "invalid name: unexpected '.' (%s)", str); 3130 return false; 3131 } 3132 start = true; 3133 } else if (start) { 3134 if (!upb_isletter(c)) { 3135 upb_status_seterrf( 3136 s, "invalid name: path components must start with a letter (%s)", 3137 str); 3138 return false; 3139 } 3140 start = false; 3141 } else { 3142 if (!upb_isalphanum(c)) { 3143 upb_status_seterrf(s, "invalid name: non-alphanumeric character (%s)", 3144 str); 3145 return false; 3146 } 3147 } 3148 } 3149 return !start; 3150} 3151 3152static const char *shortdefname(const char *fullname) { 3153 const char *p; 3154 3155 if (fullname == NULL) { 3156 return NULL; 3157 } else if ((p = strrchr(fullname, '.')) == NULL) { 3158 /* No '.' in the name, return the full string. */ 3159 return fullname; 3160 } else { 3161 /* Return one past the last '.'. */ 3162 return p + 1; 3163 } 3164} 3165 3166/* All submessage fields are lower than all other fields. 3167 * Secondly, fields are increasing in order. */ 3168uint32_t field_rank(const upb_fielddef *f) { 3169 uint32_t ret = upb_fielddef_number(f); 3170 const uint32_t high_bit = 1 << 30; 3171 UPB_ASSERT(ret < high_bit); 3172 if (!upb_fielddef_issubmsg(f)) 3173 ret |= high_bit; 3174 return ret; 3175} 3176 3177int cmp_fields(const void *p1, const void *p2) { 3178 const upb_fielddef *f1 = *(upb_fielddef*const*)p1; 3179 const upb_fielddef *f2 = *(upb_fielddef*const*)p2; 3180 return field_rank(f1) - field_rank(f2); 3181} 3182 3183/* A few implementation details of handlers. We put these here to avoid 3184 * a def -> handlers dependency. */ 3185 3186#define UPB_STATIC_SELECTOR_COUNT 3 /* Warning: also in upb/handlers.h. */ 3187 3188static uint32_t upb_handlers_selectorbaseoffset(const upb_fielddef *f) { 3189 return upb_fielddef_isseq(f) ? 2 : 0; 3190} 3191 3192static uint32_t upb_handlers_selectorcount(const upb_fielddef *f) { 3193 uint32_t ret = 1; 3194 if (upb_fielddef_isseq(f)) ret += 2; /* STARTSEQ/ENDSEQ */ 3195 if (upb_fielddef_isstring(f)) ret += 2; /* [STRING]/STARTSTR/ENDSTR */ 3196 if (upb_fielddef_issubmsg(f)) { 3197 /* ENDSUBMSG (STARTSUBMSG is at table beginning) */ 3198 ret += 0; 3199 if (upb_fielddef_lazy(f)) { 3200 /* STARTSTR/ENDSTR/STRING (for lazy) */ 3201 ret += 3; 3202 } 3203 } 3204 return ret; 3205} 3206 3207static void upb_status_setoom(upb_status *status) { 3208 upb_status_seterrmsg(status, "out of memory"); 3209} 3210 3211static bool assign_msg_indices(upb_msgdef *m, upb_status *s) { 3212 /* Sort fields. upb internally relies on UPB_TYPE_MESSAGE fields having the 3213 * lowest indexes, but we do not publicly guarantee this. */ 3214 upb_msg_field_iter j; 3215 int i; 3216 uint32_t selector; 3217 int n = upb_msgdef_numfields(m); 3218 upb_fielddef **fields; 3219 3220 if (n == 0) { 3221 m->selector_count = UPB_STATIC_SELECTOR_COUNT; 3222 m->submsg_field_count = 0; 3223 return true; 3224 } 3225 3226 fields = upb_gmalloc(n * sizeof(*fields)); 3227 if (!fields) { 3228 upb_status_setoom(s); 3229 return false; 3230 } 3231 3232 m->submsg_field_count = 0; 3233 for(i = 0, upb_msg_field_begin(&j, m); 3234 !upb_msg_field_done(&j); 3235 upb_msg_field_next(&j), i++) { 3236 upb_fielddef *f = upb_msg_iter_field(&j); 3237 UPB_ASSERT(f->msgdef == m); 3238 if (upb_fielddef_issubmsg(f)) { 3239 m->submsg_field_count++; 3240 } 3241 fields[i] = f; 3242 } 3243 3244 qsort(fields, n, sizeof(*fields), cmp_fields); 3245 3246 selector = UPB_STATIC_SELECTOR_COUNT + m->submsg_field_count; 3247 for (i = 0; i < n; i++) { 3248 upb_fielddef *f = fields[i]; 3249 f->index_ = i; 3250 f->selector_base = selector + upb_handlers_selectorbaseoffset(f); 3251 selector += upb_handlers_selectorcount(f); 3252 } 3253 m->selector_count = selector; 3254 3255 upb_gfree(fields); 3256 return true; 3257} 3258 3259static bool check_oneofs(upb_msgdef *m, upb_status *s) { 3260 int i; 3261 int first_synthetic = -1; 3262 upb_oneofdef *mutable_oneofs = (upb_oneofdef*)m->oneofs; 3263 3264 for (i = 0; i < m->oneof_count; i++) { 3265 mutable_oneofs[i].index = i; 3266 3267 if (upb_oneofdef_issynthetic(&mutable_oneofs[i])) { 3268 if (first_synthetic == -1) { 3269 first_synthetic = i; 3270 } 3271 } else { 3272 if (first_synthetic != -1) { 3273 upb_status_seterrf( 3274 s, "Synthetic oneofs must be after all other oneofs: %s", 3275 upb_oneofdef_name(&mutable_oneofs[i])); 3276 return false; 3277 } 3278 } 3279 } 3280 3281 if (first_synthetic == -1) { 3282 m->real_oneof_count = m->oneof_count; 3283 } else { 3284 m->real_oneof_count = first_synthetic; 3285 } 3286 3287 return true; 3288} 3289 3290static void assign_msg_wellknowntype(upb_msgdef *m) { 3291 const char *name = upb_msgdef_fullname(m); 3292 if (name == NULL) { 3293 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; 3294 return; 3295 } 3296 if (!strcmp(name, "google.protobuf.Any")) { 3297 m->well_known_type = UPB_WELLKNOWN_ANY; 3298 } else if (!strcmp(name, "google.protobuf.FieldMask")) { 3299 m->well_known_type = UPB_WELLKNOWN_FIELDMASK; 3300 } else if (!strcmp(name, "google.protobuf.Duration")) { 3301 m->well_known_type = UPB_WELLKNOWN_DURATION; 3302 } else if (!strcmp(name, "google.protobuf.Timestamp")) { 3303 m->well_known_type = UPB_WELLKNOWN_TIMESTAMP; 3304 } else if (!strcmp(name, "google.protobuf.DoubleValue")) { 3305 m->well_known_type = UPB_WELLKNOWN_DOUBLEVALUE; 3306 } else if (!strcmp(name, "google.protobuf.FloatValue")) { 3307 m->well_known_type = UPB_WELLKNOWN_FLOATVALUE; 3308 } else if (!strcmp(name, "google.protobuf.Int64Value")) { 3309 m->well_known_type = UPB_WELLKNOWN_INT64VALUE; 3310 } else if (!strcmp(name, "google.protobuf.UInt64Value")) { 3311 m->well_known_type = UPB_WELLKNOWN_UINT64VALUE; 3312 } else if (!strcmp(name, "google.protobuf.Int32Value")) { 3313 m->well_known_type = UPB_WELLKNOWN_INT32VALUE; 3314 } else if (!strcmp(name, "google.protobuf.UInt32Value")) { 3315 m->well_known_type = UPB_WELLKNOWN_UINT32VALUE; 3316 } else if (!strcmp(name, "google.protobuf.BoolValue")) { 3317 m->well_known_type = UPB_WELLKNOWN_BOOLVALUE; 3318 } else if (!strcmp(name, "google.protobuf.StringValue")) { 3319 m->well_known_type = UPB_WELLKNOWN_STRINGVALUE; 3320 } else if (!strcmp(name, "google.protobuf.BytesValue")) { 3321 m->well_known_type = UPB_WELLKNOWN_BYTESVALUE; 3322 } else if (!strcmp(name, "google.protobuf.Value")) { 3323 m->well_known_type = UPB_WELLKNOWN_VALUE; 3324 } else if (!strcmp(name, "google.protobuf.ListValue")) { 3325 m->well_known_type = UPB_WELLKNOWN_LISTVALUE; 3326 } else if (!strcmp(name, "google.protobuf.Struct")) { 3327 m->well_known_type = UPB_WELLKNOWN_STRUCT; 3328 } else { 3329 m->well_known_type = UPB_WELLKNOWN_UNSPECIFIED; 3330 } 3331} 3332 3333 3334/* upb_enumdef ****************************************************************/ 3335 3336const char *upb_enumdef_fullname(const upb_enumdef *e) { 3337 return e->full_name; 3338} 3339 3340const char *upb_enumdef_name(const upb_enumdef *e) { 3341 return shortdefname(e->full_name); 3342} 3343 3344const upb_filedef *upb_enumdef_file(const upb_enumdef *e) { 3345 return e->file; 3346} 3347 3348int32_t upb_enumdef_default(const upb_enumdef *e) { 3349 UPB_ASSERT(upb_enumdef_iton(e, e->defaultval)); 3350 return e->defaultval; 3351} 3352 3353int upb_enumdef_numvals(const upb_enumdef *e) { 3354 return (int)upb_strtable_count(&e->ntoi); 3355} 3356 3357void upb_enum_begin(upb_enum_iter *i, const upb_enumdef *e) { 3358 /* We iterate over the ntoi table, to account for duplicate numbers. */ 3359 upb_strtable_begin(i, &e->ntoi); 3360} 3361 3362void upb_enum_next(upb_enum_iter *iter) { upb_strtable_next(iter); } 3363bool upb_enum_done(upb_enum_iter *iter) { return upb_strtable_done(iter); } 3364 3365bool upb_enumdef_ntoi(const upb_enumdef *def, const char *name, 3366 size_t len, int32_t *num) { 3367 upb_value v; 3368 if (!upb_strtable_lookup2(&def->ntoi, name, len, &v)) { 3369 return false; 3370 } 3371 if (num) *num = upb_value_getint32(v); 3372 return true; 3373} 3374 3375const char *upb_enumdef_iton(const upb_enumdef *def, int32_t num) { 3376 upb_value v; 3377 return upb_inttable_lookup32(&def->iton, num, &v) ? 3378 upb_value_getcstr(v) : NULL; 3379} 3380 3381const char *upb_enum_iter_name(upb_enum_iter *iter) { 3382 return upb_strtable_iter_key(iter).data; 3383} 3384 3385int32_t upb_enum_iter_number(upb_enum_iter *iter) { 3386 return upb_value_getint32(upb_strtable_iter_value(iter)); 3387} 3388 3389 3390/* upb_fielddef ***************************************************************/ 3391 3392const char *upb_fielddef_fullname(const upb_fielddef *f) { 3393 return f->full_name; 3394} 3395 3396upb_fieldtype_t upb_fielddef_type(const upb_fielddef *f) { 3397 switch (f->type_) { 3398 case UPB_DESCRIPTOR_TYPE_DOUBLE: 3399 return UPB_TYPE_DOUBLE; 3400 case UPB_DESCRIPTOR_TYPE_FLOAT: 3401 return UPB_TYPE_FLOAT; 3402 case UPB_DESCRIPTOR_TYPE_INT64: 3403 case UPB_DESCRIPTOR_TYPE_SINT64: 3404 case UPB_DESCRIPTOR_TYPE_SFIXED64: 3405 return UPB_TYPE_INT64; 3406 case UPB_DESCRIPTOR_TYPE_INT32: 3407 case UPB_DESCRIPTOR_TYPE_SFIXED32: 3408 case UPB_DESCRIPTOR_TYPE_SINT32: 3409 return UPB_TYPE_INT32; 3410 case UPB_DESCRIPTOR_TYPE_UINT64: 3411 case UPB_DESCRIPTOR_TYPE_FIXED64: 3412 return UPB_TYPE_UINT64; 3413 case UPB_DESCRIPTOR_TYPE_UINT32: 3414 case UPB_DESCRIPTOR_TYPE_FIXED32: 3415 return UPB_TYPE_UINT32; 3416 case UPB_DESCRIPTOR_TYPE_ENUM: 3417 return UPB_TYPE_ENUM; 3418 case UPB_DESCRIPTOR_TYPE_BOOL: 3419 return UPB_TYPE_BOOL; 3420 case UPB_DESCRIPTOR_TYPE_STRING: 3421 return UPB_TYPE_STRING; 3422 case UPB_DESCRIPTOR_TYPE_BYTES: 3423 return UPB_TYPE_BYTES; 3424 case UPB_DESCRIPTOR_TYPE_GROUP: 3425 case UPB_DESCRIPTOR_TYPE_MESSAGE: 3426 return UPB_TYPE_MESSAGE; 3427 } 3428 UPB_UNREACHABLE(); 3429} 3430 3431upb_descriptortype_t upb_fielddef_descriptortype(const upb_fielddef *f) { 3432 return f->type_; 3433} 3434 3435uint32_t upb_fielddef_index(const upb_fielddef *f) { 3436 return f->index_; 3437} 3438 3439upb_label_t upb_fielddef_label(const upb_fielddef *f) { 3440 return f->label_; 3441} 3442 3443uint32_t upb_fielddef_number(const upb_fielddef *f) { 3444 return f->number_; 3445} 3446 3447bool upb_fielddef_isextension(const upb_fielddef *f) { 3448 return f->is_extension_; 3449} 3450 3451bool upb_fielddef_lazy(const upb_fielddef *f) { 3452 return f->lazy_; 3453} 3454 3455bool upb_fielddef_packed(const upb_fielddef *f) { 3456 return f->packed_; 3457} 3458 3459const char *upb_fielddef_name(const upb_fielddef *f) { 3460 return shortdefname(f->full_name); 3461} 3462 3463const char *upb_fielddef_jsonname(const upb_fielddef *f) { 3464 return f->json_name; 3465} 3466 3467uint32_t upb_fielddef_selectorbase(const upb_fielddef *f) { 3468 return f->selector_base; 3469} 3470 3471const upb_filedef *upb_fielddef_file(const upb_fielddef *f) { 3472 return f->file; 3473} 3474 3475const upb_msgdef *upb_fielddef_containingtype(const upb_fielddef *f) { 3476 return f->msgdef; 3477} 3478 3479const upb_oneofdef *upb_fielddef_containingoneof(const upb_fielddef *f) { 3480 return f->oneof; 3481} 3482 3483const upb_oneofdef *upb_fielddef_realcontainingoneof(const upb_fielddef *f) { 3484 if (!f->oneof || upb_oneofdef_issynthetic(f->oneof)) return NULL; 3485 return f->oneof; 3486} 3487 3488static void chkdefaulttype(const upb_fielddef *f, int ctype) { 3489 UPB_UNUSED(f); 3490 UPB_UNUSED(ctype); 3491} 3492 3493int64_t upb_fielddef_defaultint64(const upb_fielddef *f) { 3494 chkdefaulttype(f, UPB_TYPE_INT64); 3495 return f->defaultval.sint; 3496} 3497 3498int32_t upb_fielddef_defaultint32(const upb_fielddef *f) { 3499 chkdefaulttype(f, UPB_TYPE_INT32); 3500 return (int32_t)f->defaultval.sint; 3501} 3502 3503uint64_t upb_fielddef_defaultuint64(const upb_fielddef *f) { 3504 chkdefaulttype(f, UPB_TYPE_UINT64); 3505 return f->defaultval.uint; 3506} 3507 3508uint32_t upb_fielddef_defaultuint32(const upb_fielddef *f) { 3509 chkdefaulttype(f, UPB_TYPE_UINT32); 3510 return (uint32_t)f->defaultval.uint; 3511} 3512 3513bool upb_fielddef_defaultbool(const upb_fielddef *f) { 3514 chkdefaulttype(f, UPB_TYPE_BOOL); 3515 return f->defaultval.boolean; 3516} 3517 3518float upb_fielddef_defaultfloat(const upb_fielddef *f) { 3519 chkdefaulttype(f, UPB_TYPE_FLOAT); 3520 return f->defaultval.flt; 3521} 3522 3523double upb_fielddef_defaultdouble(const upb_fielddef *f) { 3524 chkdefaulttype(f, UPB_TYPE_DOUBLE); 3525 return f->defaultval.dbl; 3526} 3527 3528const char *upb_fielddef_defaultstr(const upb_fielddef *f, size_t *len) { 3529 str_t *str = f->defaultval.str; 3530 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_STRING || 3531 upb_fielddef_type(f) == UPB_TYPE_BYTES || 3532 upb_fielddef_type(f) == UPB_TYPE_ENUM); 3533 if (str) { 3534 if (len) *len = str->len; 3535 return str->str; 3536 } else { 3537 if (len) *len = 0; 3538 return NULL; 3539 } 3540} 3541 3542const upb_msgdef *upb_fielddef_msgsubdef(const upb_fielddef *f) { 3543 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_MESSAGE); 3544 return f->sub.msgdef; 3545} 3546 3547const upb_enumdef *upb_fielddef_enumsubdef(const upb_fielddef *f) { 3548 UPB_ASSERT(upb_fielddef_type(f) == UPB_TYPE_ENUM); 3549 return f->sub.enumdef; 3550} 3551 3552const upb_msglayout_field *upb_fielddef_layout(const upb_fielddef *f) { 3553 return &f->msgdef->layout->fields[f->layout_index]; 3554} 3555 3556bool upb_fielddef_issubmsg(const upb_fielddef *f) { 3557 return upb_fielddef_type(f) == UPB_TYPE_MESSAGE; 3558} 3559 3560bool upb_fielddef_isstring(const upb_fielddef *f) { 3561 return upb_fielddef_type(f) == UPB_TYPE_STRING || 3562 upb_fielddef_type(f) == UPB_TYPE_BYTES; 3563} 3564 3565bool upb_fielddef_isseq(const upb_fielddef *f) { 3566 return upb_fielddef_label(f) == UPB_LABEL_REPEATED; 3567} 3568 3569bool upb_fielddef_isprimitive(const upb_fielddef *f) { 3570 return !upb_fielddef_isstring(f) && !upb_fielddef_issubmsg(f); 3571} 3572 3573bool upb_fielddef_ismap(const upb_fielddef *f) { 3574 return upb_fielddef_isseq(f) && upb_fielddef_issubmsg(f) && 3575 upb_msgdef_mapentry(upb_fielddef_msgsubdef(f)); 3576} 3577 3578bool upb_fielddef_hassubdef(const upb_fielddef *f) { 3579 return upb_fielddef_issubmsg(f) || upb_fielddef_type(f) == UPB_TYPE_ENUM; 3580} 3581 3582bool upb_fielddef_haspresence(const upb_fielddef *f) { 3583 if (upb_fielddef_isseq(f)) return false; 3584 return upb_fielddef_issubmsg(f) || upb_fielddef_containingoneof(f) || 3585 f->file->syntax == UPB_SYNTAX_PROTO2; 3586} 3587 3588static bool between(int32_t x, int32_t low, int32_t high) { 3589 return x >= low && x <= high; 3590} 3591 3592bool upb_fielddef_checklabel(int32_t label) { return between(label, 1, 3); } 3593bool upb_fielddef_checktype(int32_t type) { return between(type, 1, 11); } 3594bool upb_fielddef_checkintfmt(int32_t fmt) { return between(fmt, 1, 3); } 3595 3596bool upb_fielddef_checkdescriptortype(int32_t type) { 3597 return between(type, 1, 18); 3598} 3599 3600/* upb_msgdef *****************************************************************/ 3601 3602const char *upb_msgdef_fullname(const upb_msgdef *m) { 3603 return m->full_name; 3604} 3605 3606const upb_filedef *upb_msgdef_file(const upb_msgdef *m) { 3607 return m->file; 3608} 3609 3610const char *upb_msgdef_name(const upb_msgdef *m) { 3611 return shortdefname(m->full_name); 3612} 3613 3614upb_syntax_t upb_msgdef_syntax(const upb_msgdef *m) { 3615 return m->file->syntax; 3616} 3617 3618size_t upb_msgdef_selectorcount(const upb_msgdef *m) { 3619 return m->selector_count; 3620} 3621 3622uint32_t upb_msgdef_submsgfieldcount(const upb_msgdef *m) { 3623 return m->submsg_field_count; 3624} 3625 3626const upb_fielddef *upb_msgdef_itof(const upb_msgdef *m, uint32_t i) { 3627 upb_value val; 3628 return upb_inttable_lookup32(&m->itof, i, &val) ? 3629 upb_value_getconstptr(val) : NULL; 3630} 3631 3632const upb_fielddef *upb_msgdef_ntof(const upb_msgdef *m, const char *name, 3633 size_t len) { 3634 upb_value val; 3635 3636 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { 3637 return NULL; 3638 } 3639 3640 return unpack_def(val, UPB_DEFTYPE_FIELD); 3641} 3642 3643const upb_oneofdef *upb_msgdef_ntoo(const upb_msgdef *m, const char *name, 3644 size_t len) { 3645 upb_value val; 3646 3647 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { 3648 return NULL; 3649 } 3650 3651 return unpack_def(val, UPB_DEFTYPE_ONEOF); 3652} 3653 3654bool upb_msgdef_lookupname(const upb_msgdef *m, const char *name, size_t len, 3655 const upb_fielddef **f, const upb_oneofdef **o) { 3656 upb_value val; 3657 3658 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { 3659 return false; 3660 } 3661 3662 *o = unpack_def(val, UPB_DEFTYPE_ONEOF); 3663 *f = unpack_def(val, UPB_DEFTYPE_FIELD); 3664 return *o || *f; /* False if this was a JSON name. */ 3665} 3666 3667const upb_fielddef *upb_msgdef_lookupjsonname(const upb_msgdef *m, 3668 const char *name, size_t len) { 3669 upb_value val; 3670 const upb_fielddef* f; 3671 3672 if (!upb_strtable_lookup2(&m->ntof, name, len, &val)) { 3673 return NULL; 3674 } 3675 3676 f = unpack_def(val, UPB_DEFTYPE_FIELD); 3677 if (!f) f = unpack_def(val, UPB_DEFTYPE_FIELD_JSONNAME); 3678 3679 return f; 3680} 3681 3682int upb_msgdef_numfields(const upb_msgdef *m) { 3683 return m->field_count; 3684} 3685 3686int upb_msgdef_numoneofs(const upb_msgdef *m) { 3687 return m->oneof_count; 3688} 3689 3690int upb_msgdef_numrealoneofs(const upb_msgdef *m) { 3691 return m->real_oneof_count; 3692} 3693 3694const upb_msglayout *upb_msgdef_layout(const upb_msgdef *m) { 3695 return m->layout; 3696} 3697 3698const upb_fielddef *_upb_msgdef_field(const upb_msgdef *m, int i) { 3699 if (i >= m->field_count) return NULL; 3700 return &m->fields[i]; 3701} 3702 3703bool upb_msgdef_mapentry(const upb_msgdef *m) { 3704 return m->map_entry; 3705} 3706 3707upb_wellknowntype_t upb_msgdef_wellknowntype(const upb_msgdef *m) { 3708 return m->well_known_type; 3709} 3710 3711bool upb_msgdef_isnumberwrapper(const upb_msgdef *m) { 3712 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); 3713 return type >= UPB_WELLKNOWN_DOUBLEVALUE && 3714 type <= UPB_WELLKNOWN_UINT32VALUE; 3715} 3716 3717void upb_msg_field_begin(upb_msg_field_iter *iter, const upb_msgdef *m) { 3718 upb_inttable_begin(iter, &m->itof); 3719} 3720 3721void upb_msg_field_next(upb_msg_field_iter *iter) { upb_inttable_next(iter); } 3722 3723bool upb_msg_field_done(const upb_msg_field_iter *iter) { 3724 return upb_inttable_done(iter); 3725} 3726 3727upb_fielddef *upb_msg_iter_field(const upb_msg_field_iter *iter) { 3728 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); 3729} 3730 3731void upb_msg_field_iter_setdone(upb_msg_field_iter *iter) { 3732 upb_inttable_iter_setdone(iter); 3733} 3734 3735bool upb_msg_field_iter_isequal(const upb_msg_field_iter * iter1, 3736 const upb_msg_field_iter * iter2) { 3737 return upb_inttable_iter_isequal(iter1, iter2); 3738} 3739 3740void upb_msg_oneof_begin(upb_msg_oneof_iter *iter, const upb_msgdef *m) { 3741 upb_strtable_begin(iter, &m->ntof); 3742 /* We need to skip past any initial fields. */ 3743 while (!upb_strtable_done(iter) && 3744 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)) { 3745 upb_strtable_next(iter); 3746 } 3747} 3748 3749void upb_msg_oneof_next(upb_msg_oneof_iter *iter) { 3750 /* We need to skip past fields to return only oneofs. */ 3751 do { 3752 upb_strtable_next(iter); 3753 } while (!upb_strtable_done(iter) && 3754 !unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF)); 3755} 3756 3757bool upb_msg_oneof_done(const upb_msg_oneof_iter *iter) { 3758 return upb_strtable_done(iter); 3759} 3760 3761const upb_oneofdef *upb_msg_iter_oneof(const upb_msg_oneof_iter *iter) { 3762 return unpack_def(upb_strtable_iter_value(iter), UPB_DEFTYPE_ONEOF); 3763} 3764 3765void upb_msg_oneof_iter_setdone(upb_msg_oneof_iter *iter) { 3766 upb_strtable_iter_setdone(iter); 3767} 3768 3769bool upb_msg_oneof_iter_isequal(const upb_msg_oneof_iter *iter1, 3770 const upb_msg_oneof_iter *iter2) { 3771 return upb_strtable_iter_isequal(iter1, iter2); 3772} 3773 3774/* upb_oneofdef ***************************************************************/ 3775 3776const char *upb_oneofdef_name(const upb_oneofdef *o) { 3777 return shortdefname(o->full_name); 3778} 3779 3780const upb_msgdef *upb_oneofdef_containingtype(const upb_oneofdef *o) { 3781 return o->parent; 3782} 3783 3784int upb_oneofdef_numfields(const upb_oneofdef *o) { 3785 return (int)upb_strtable_count(&o->ntof); 3786} 3787 3788uint32_t upb_oneofdef_index(const upb_oneofdef *o) { 3789 return o->index; 3790} 3791 3792bool upb_oneofdef_issynthetic(const upb_oneofdef *o) { 3793 upb_inttable_iter iter; 3794 const upb_fielddef *f; 3795 upb_inttable_begin(&iter, &o->itof); 3796 if (upb_oneofdef_numfields(o) != 1) return false; 3797 f = upb_value_getptr(upb_inttable_iter_value(&iter)); 3798 UPB_ASSERT(f); 3799 return f->proto3_optional_; 3800} 3801 3802const upb_fielddef *upb_oneofdef_ntof(const upb_oneofdef *o, 3803 const char *name, size_t length) { 3804 upb_value val; 3805 return upb_strtable_lookup2(&o->ntof, name, length, &val) ? 3806 upb_value_getptr(val) : NULL; 3807} 3808 3809const upb_fielddef *upb_oneofdef_itof(const upb_oneofdef *o, uint32_t num) { 3810 upb_value val; 3811 return upb_inttable_lookup32(&o->itof, num, &val) ? 3812 upb_value_getptr(val) : NULL; 3813} 3814 3815void upb_oneof_begin(upb_oneof_iter *iter, const upb_oneofdef *o) { 3816 upb_inttable_begin(iter, &o->itof); 3817} 3818 3819void upb_oneof_next(upb_oneof_iter *iter) { 3820 upb_inttable_next(iter); 3821} 3822 3823bool upb_oneof_done(upb_oneof_iter *iter) { 3824 return upb_inttable_done(iter); 3825} 3826 3827upb_fielddef *upb_oneof_iter_field(const upb_oneof_iter *iter) { 3828 return (upb_fielddef *)upb_value_getconstptr(upb_inttable_iter_value(iter)); 3829} 3830 3831void upb_oneof_iter_setdone(upb_oneof_iter *iter) { 3832 upb_inttable_iter_setdone(iter); 3833} 3834 3835/* Dynamic Layout Generation. *************************************************/ 3836 3837static bool is_power_of_two(size_t val) { 3838 return (val & (val - 1)) == 0; 3839} 3840 3841/* Align up to the given power of 2. */ 3842static size_t align_up(size_t val, size_t align) { 3843 UPB_ASSERT(is_power_of_two(align)); 3844 return (val + align - 1) & ~(align - 1); 3845} 3846 3847static size_t div_round_up(size_t n, size_t d) { 3848 return (n + d - 1) / d; 3849} 3850 3851static size_t upb_msgval_sizeof(upb_fieldtype_t type) { 3852 switch (type) { 3853 case UPB_TYPE_DOUBLE: 3854 case UPB_TYPE_INT64: 3855 case UPB_TYPE_UINT64: 3856 return 8; 3857 case UPB_TYPE_ENUM: 3858 case UPB_TYPE_INT32: 3859 case UPB_TYPE_UINT32: 3860 case UPB_TYPE_FLOAT: 3861 return 4; 3862 case UPB_TYPE_BOOL: 3863 return 1; 3864 case UPB_TYPE_MESSAGE: 3865 return sizeof(void*); 3866 case UPB_TYPE_BYTES: 3867 case UPB_TYPE_STRING: 3868 return sizeof(upb_strview); 3869 } 3870 UPB_UNREACHABLE(); 3871} 3872 3873static uint8_t upb_msg_fielddefsize(const upb_fielddef *f) { 3874 if (upb_msgdef_mapentry(upb_fielddef_containingtype(f))) { 3875 upb_map_entry ent; 3876 UPB_ASSERT(sizeof(ent.k) == sizeof(ent.v)); 3877 return sizeof(ent.k); 3878 } else if (upb_fielddef_isseq(f)) { 3879 return sizeof(void*); 3880 } else { 3881 return upb_msgval_sizeof(upb_fielddef_type(f)); 3882 } 3883} 3884 3885static uint32_t upb_msglayout_place(upb_msglayout *l, size_t size) { 3886 uint32_t ret; 3887 3888 l->size = align_up(l->size, size); 3889 ret = l->size; 3890 l->size += size; 3891 return ret; 3892} 3893 3894/* This function is the dynamic equivalent of message_layout.{cc,h} in upbc. 3895 * It computes a dynamic layout for all of the fields in |m|. */ 3896static bool make_layout(const upb_symtab *symtab, const upb_msgdef *m) { 3897 upb_msglayout *l = (upb_msglayout*)m->layout; 3898 upb_msg_field_iter it; 3899 upb_msg_oneof_iter oit; 3900 size_t hasbit; 3901 size_t submsg_count = m->submsg_field_count; 3902 const upb_msglayout **submsgs; 3903 upb_msglayout_field *fields; 3904 upb_alloc *alloc = upb_arena_alloc(symtab->arena); 3905 3906 memset(l, 0, sizeof(*l)); 3907 3908 fields = upb_malloc(alloc, upb_msgdef_numfields(m) * sizeof(*fields)); 3909 submsgs = upb_malloc(alloc, submsg_count * sizeof(*submsgs)); 3910 3911 if ((!fields && upb_msgdef_numfields(m)) || 3912 (!submsgs && submsg_count)) { 3913 /* OOM. */ 3914 return false; 3915 } 3916 3917 l->field_count = upb_msgdef_numfields(m); 3918 l->fields = fields; 3919 l->submsgs = submsgs; 3920 3921 if (upb_msgdef_mapentry(m)) { 3922 /* TODO(haberman): refactor this method so this special case is more 3923 * elegant. */ 3924 const upb_fielddef *key = upb_msgdef_itof(m, 1); 3925 const upb_fielddef *val = upb_msgdef_itof(m, 2); 3926 fields[0].number = 1; 3927 fields[1].number = 2; 3928 fields[0].label = UPB_LABEL_OPTIONAL; 3929 fields[1].label = UPB_LABEL_OPTIONAL; 3930 fields[0].presence = 0; 3931 fields[1].presence = 0; 3932 fields[0].descriptortype = upb_fielddef_descriptortype(key); 3933 fields[1].descriptortype = upb_fielddef_descriptortype(val); 3934 fields[0].offset = 0; 3935 fields[1].offset = sizeof(upb_strview); 3936 fields[1].submsg_index = 0; 3937 3938 if (upb_fielddef_type(val) == UPB_TYPE_MESSAGE) { 3939 submsgs[0] = upb_fielddef_msgsubdef(val)->layout; 3940 } 3941 3942 l->field_count = 2; 3943 l->size = 2 * sizeof(upb_strview);align_up(l->size, 8); 3944 return true; 3945 } 3946 3947 /* Allocate data offsets in three stages: 3948 * 3949 * 1. hasbits. 3950 * 2. regular fields. 3951 * 3. oneof fields. 3952 * 3953 * OPT: There is a lot of room for optimization here to minimize the size. 3954 */ 3955 3956 /* Allocate hasbits and set basic field attributes. */ 3957 submsg_count = 0; 3958 for (upb_msg_field_begin(&it, m), hasbit = 0; 3959 !upb_msg_field_done(&it); 3960 upb_msg_field_next(&it)) { 3961 upb_fielddef* f = upb_msg_iter_field(&it); 3962 upb_msglayout_field *field = &fields[upb_fielddef_index(f)]; 3963 3964 field->number = upb_fielddef_number(f); 3965 field->descriptortype = upb_fielddef_descriptortype(f); 3966 field->label = upb_fielddef_label(f); 3967 3968 if (upb_fielddef_ismap(f)) { 3969 field->label = _UPB_LABEL_MAP; 3970 } else if (upb_fielddef_packed(f)) { 3971 field->label = _UPB_LABEL_PACKED; 3972 } 3973 3974 /* TODO: we probably should sort the fields by field number to match the 3975 * output of upbc, and to improve search speed for the table parser. */ 3976 f->layout_index = f->index_; 3977 3978 if (upb_fielddef_issubmsg(f)) { 3979 const upb_msgdef *subm = upb_fielddef_msgsubdef(f); 3980 field->submsg_index = submsg_count++; 3981 submsgs[field->submsg_index] = subm->layout; 3982 } 3983 3984 if (upb_fielddef_haspresence(f) && !upb_fielddef_realcontainingoneof(f)) { 3985 /* We don't use hasbit 0, so that 0 can indicate "no presence" in the 3986 * table. This wastes one hasbit, but we don't worry about it for now. */ 3987 field->presence = ++hasbit; 3988 } else { 3989 field->presence = 0; 3990 } 3991 } 3992 3993 /* Account for space used by hasbits. */ 3994 l->size = div_round_up(hasbit, 8); 3995 3996 /* Allocate non-oneof fields. */ 3997 for (upb_msg_field_begin(&it, m); !upb_msg_field_done(&it); 3998 upb_msg_field_next(&it)) { 3999 const upb_fielddef* f = upb_msg_iter_field(&it); 4000 size_t field_size = upb_msg_fielddefsize(f); 4001 size_t index = upb_fielddef_index(f); 4002 4003 if (upb_fielddef_realcontainingoneof(f)) { 4004 /* Oneofs are handled separately below. */ 4005 continue; 4006 } 4007 4008 fields[index].offset = upb_msglayout_place(l, field_size); 4009 } 4010 4011 /* Allocate oneof fields. Each oneof field consists of a uint32 for the case 4012 * and space for the actual data. */ 4013 for (upb_msg_oneof_begin(&oit, m); !upb_msg_oneof_done(&oit); 4014 upb_msg_oneof_next(&oit)) { 4015 const upb_oneofdef* o = upb_msg_iter_oneof(&oit); 4016 upb_oneof_iter fit; 4017 4018 if (upb_oneofdef_issynthetic(o)) continue; 4019 4020 size_t case_size = sizeof(uint32_t); /* Could potentially optimize this. */ 4021 size_t field_size = 0; 4022 uint32_t case_offset; 4023 uint32_t data_offset; 4024 4025 /* Calculate field size: the max of all field sizes. */ 4026 for (upb_oneof_begin(&fit, o); 4027 !upb_oneof_done(&fit); 4028 upb_oneof_next(&fit)) { 4029 const upb_fielddef* f = upb_oneof_iter_field(&fit); 4030 field_size = UPB_MAX(field_size, upb_msg_fielddefsize(f)); 4031 } 4032 4033 /* Align and allocate case offset. */ 4034 case_offset = upb_msglayout_place(l, case_size); 4035 data_offset = upb_msglayout_place(l, field_size); 4036 4037 for (upb_oneof_begin(&fit, o); 4038 !upb_oneof_done(&fit); 4039 upb_oneof_next(&fit)) { 4040 const upb_fielddef* f = upb_oneof_iter_field(&fit); 4041 fields[upb_fielddef_index(f)].offset = data_offset; 4042 fields[upb_fielddef_index(f)].presence = ~case_offset; 4043 } 4044 } 4045 4046 /* Size of the entire structure should be a multiple of its greatest 4047 * alignment. TODO: track overall alignment for real? */ 4048 l->size = align_up(l->size, 8); 4049 4050 return true; 4051} 4052 4053/* Code to build defs from descriptor protos. *********************************/ 4054 4055/* There is a question of how much validation to do here. It will be difficult 4056 * to perfectly match the amount of validation performed by proto2. But since 4057 * this code is used to directly build defs from Ruby (for example) we do need 4058 * to validate important constraints like uniqueness of names and numbers. */ 4059 4060#define CHK(x) if (!(x)) { return false; } 4061#define CHK_OOM(x) if (!(x)) { upb_status_setoom(ctx->status); return false; } 4062 4063typedef struct { 4064 const upb_symtab *symtab; 4065 upb_filedef *file; /* File we are building. */ 4066 upb_alloc *alloc; /* Allocate defs here. */ 4067 upb_alloc *tmp; /* Alloc for addtab and any other tmp data. */ 4068 upb_strtable *addtab; /* full_name -> packed def ptr for new defs */ 4069 const upb_msglayout **layouts; /* NULL if we should build layouts. */ 4070 upb_status *status; /* Record errors here. */ 4071} symtab_addctx; 4072 4073static char* strviewdup(const symtab_addctx *ctx, upb_strview view) { 4074 return upb_strdup2(view.data, view.size, ctx->alloc); 4075} 4076 4077static bool streql2(const char *a, size_t n, const char *b) { 4078 return n == strlen(b) && memcmp(a, b, n) == 0; 4079} 4080 4081static bool streql_view(upb_strview view, const char *b) { 4082 return streql2(view.data, view.size, b); 4083} 4084 4085static const char *makefullname(const symtab_addctx *ctx, const char *prefix, 4086 upb_strview name) { 4087 if (prefix) { 4088 /* ret = prefix + '.' + name; */ 4089 size_t n = strlen(prefix); 4090 char *ret = upb_malloc(ctx->alloc, n + name.size + 2); 4091 CHK_OOM(ret); 4092 strcpy(ret, prefix); 4093 ret[n] = '.'; 4094 memcpy(&ret[n + 1], name.data, name.size); 4095 ret[n + 1 + name.size] = '\0'; 4096 return ret; 4097 } else { 4098 return strviewdup(ctx, name); 4099 } 4100} 4101 4102size_t getjsonname(const char *name, char *buf, size_t len) { 4103 size_t src, dst = 0; 4104 bool ucase_next = false; 4105 4106#define WRITE(byte) \ 4107 ++dst; \ 4108 if (dst < len) buf[dst - 1] = byte; \ 4109 else if (dst == len) buf[dst - 1] = '\0' 4110 4111 if (!name) { 4112 WRITE('\0'); 4113 return 0; 4114 } 4115 4116 /* Implement the transformation as described in the spec: 4117 * 1. upper case all letters after an underscore. 4118 * 2. remove all underscores. 4119 */ 4120 for (src = 0; name[src]; src++) { 4121 if (name[src] == '_') { 4122 ucase_next = true; 4123 continue; 4124 } 4125 4126 if (ucase_next) { 4127 WRITE(toupper(name[src])); 4128 ucase_next = false; 4129 } else { 4130 WRITE(name[src]); 4131 } 4132 } 4133 4134 WRITE('\0'); 4135 return dst; 4136 4137#undef WRITE 4138} 4139 4140static char* makejsonname(const char* name, upb_alloc *alloc) { 4141 size_t size = getjsonname(name, NULL, 0); 4142 char* json_name = upb_malloc(alloc, size); 4143 getjsonname(name, json_name, size); 4144 return json_name; 4145} 4146 4147static bool symtab_add(const symtab_addctx *ctx, const char *name, 4148 upb_value v) { 4149 upb_value tmp; 4150 if (upb_strtable_lookup(ctx->addtab, name, &tmp) || 4151 upb_strtable_lookup(&ctx->symtab->syms, name, &tmp)) { 4152 upb_status_seterrf(ctx->status, "duplicate symbol '%s'", name); 4153 return false; 4154 } 4155 4156 CHK_OOM(upb_strtable_insert3(ctx->addtab, name, strlen(name), v, ctx->tmp)); 4157 return true; 4158} 4159 4160/* Given a symbol and the base symbol inside which it is defined, find the 4161 * symbol's definition in t. */ 4162static bool resolvename(const upb_strtable *t, const upb_fielddef *f, 4163 const char *base, upb_strview sym, 4164 upb_deftype_t type, upb_status *status, 4165 const void **def) { 4166 if(sym.size == 0) return NULL; 4167 if(sym.data[0] == '.') { 4168 /* Symbols starting with '.' are absolute, so we do a single lookup. 4169 * Slice to omit the leading '.' */ 4170 upb_value v; 4171 if (!upb_strtable_lookup2(t, sym.data + 1, sym.size - 1, &v)) { 4172 return false; 4173 } 4174 4175 *def = unpack_def(v, type); 4176 4177 if (!*def) { 4178 upb_status_seterrf(status, 4179 "type mismatch when resolving field %s, name %s", 4180 f->full_name, sym.data); 4181 return false; 4182 } 4183 4184 return true; 4185 } else { 4186 /* Remove components from base until we find an entry or run out. 4187 * TODO: This branch is totally broken, but currently not used. */ 4188 (void)base; 4189 UPB_ASSERT(false); 4190 return false; 4191 } 4192} 4193 4194const void *symtab_resolve(const symtab_addctx *ctx, const upb_fielddef *f, 4195 const char *base, upb_strview sym, 4196 upb_deftype_t type) { 4197 const void *ret; 4198 if (!resolvename(ctx->addtab, f, base, sym, type, ctx->status, &ret) && 4199 !resolvename(&ctx->symtab->syms, f, base, sym, type, ctx->status, &ret)) { 4200 if (upb_ok(ctx->status)) { 4201 upb_status_seterrf(ctx->status, "couldn't resolve name '%s'", sym.data); 4202 } 4203 return false; 4204 } 4205 return ret; 4206} 4207 4208static bool create_oneofdef( 4209 const symtab_addctx *ctx, upb_msgdef *m, 4210 const google_protobuf_OneofDescriptorProto *oneof_proto) { 4211 upb_oneofdef *o; 4212 upb_strview name = google_protobuf_OneofDescriptorProto_name(oneof_proto); 4213 upb_value v; 4214 4215 o = (upb_oneofdef*)&m->oneofs[m->oneof_count++]; 4216 o->parent = m; 4217 o->full_name = makefullname(ctx, m->full_name, name); 4218 4219 v = pack_def(o, UPB_DEFTYPE_ONEOF); 4220 CHK_OOM(symtab_add(ctx, o->full_name, v)); 4221 CHK_OOM(upb_strtable_insert3(&m->ntof, name.data, name.size, v, ctx->alloc)); 4222 4223 CHK_OOM(upb_inttable_init2(&o->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); 4224 CHK_OOM(upb_strtable_init2(&o->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); 4225 4226 return true; 4227} 4228 4229static bool parse_default(const symtab_addctx *ctx, const char *str, size_t len, 4230 upb_fielddef *f) { 4231 char *end; 4232 char nullz[64]; 4233 errno = 0; 4234 4235 switch (upb_fielddef_type(f)) { 4236 case UPB_TYPE_INT32: 4237 case UPB_TYPE_INT64: 4238 case UPB_TYPE_UINT32: 4239 case UPB_TYPE_UINT64: 4240 case UPB_TYPE_DOUBLE: 4241 case UPB_TYPE_FLOAT: 4242 /* Standard C number parsing functions expect null-terminated strings. */ 4243 if (len >= sizeof(nullz) - 1) { 4244 return false; 4245 } 4246 memcpy(nullz, str, len); 4247 nullz[len] = '\0'; 4248 str = nullz; 4249 break; 4250 default: 4251 break; 4252 } 4253 4254 switch (upb_fielddef_type(f)) { 4255 case UPB_TYPE_INT32: { 4256 long val = strtol(str, &end, 0); 4257 CHK(val <= INT32_MAX && val >= INT32_MIN && errno != ERANGE && !*end); 4258 f->defaultval.sint = val; 4259 break; 4260 } 4261 case UPB_TYPE_ENUM: { 4262 const upb_enumdef *e = f->sub.enumdef; 4263 int32_t val; 4264 CHK(upb_enumdef_ntoi(e, str, len, &val)); 4265 f->defaultval.sint = val; 4266 break; 4267 } 4268 case UPB_TYPE_INT64: { 4269 /* XXX: Need to write our own strtoll, since it's not available in c89. */ 4270 int64_t val = strtol(str, &end, 0); 4271 CHK(val <= INT64_MAX && val >= INT64_MIN && errno != ERANGE && !*end); 4272 f->defaultval.sint = val; 4273 break; 4274 } 4275 case UPB_TYPE_UINT32: { 4276 unsigned long val = strtoul(str, &end, 0); 4277 CHK(val <= UINT32_MAX && errno != ERANGE && !*end); 4278 f->defaultval.uint = val; 4279 break; 4280 } 4281 case UPB_TYPE_UINT64: { 4282 /* XXX: Need to write our own strtoull, since it's not available in c89. */ 4283 uint64_t val = strtoul(str, &end, 0); 4284 CHK(val <= UINT64_MAX && errno != ERANGE && !*end); 4285 f->defaultval.uint = val; 4286 break; 4287 } 4288 case UPB_TYPE_DOUBLE: { 4289 double val = strtod(str, &end); 4290 CHK(errno != ERANGE && !*end); 4291 f->defaultval.dbl = val; 4292 break; 4293 } 4294 case UPB_TYPE_FLOAT: { 4295 /* XXX: Need to write our own strtof, since it's not available in c89. */ 4296 float val = strtod(str, &end); 4297 CHK(errno != ERANGE && !*end); 4298 f->defaultval.flt = val; 4299 break; 4300 } 4301 case UPB_TYPE_BOOL: { 4302 if (streql2(str, len, "false")) { 4303 f->defaultval.boolean = false; 4304 } else if (streql2(str, len, "true")) { 4305 f->defaultval.boolean = true; 4306 } else { 4307 return false; 4308 } 4309 break; 4310 } 4311 case UPB_TYPE_STRING: 4312 f->defaultval.str = newstr(ctx->alloc, str, len); 4313 break; 4314 case UPB_TYPE_BYTES: 4315 /* XXX: need to interpret the C-escaped value. */ 4316 f->defaultval.str = newstr(ctx->alloc, str, len); 4317 break; 4318 case UPB_TYPE_MESSAGE: 4319 /* Should not have a default value. */ 4320 return false; 4321 } 4322 return true; 4323} 4324 4325static void set_default_default(const symtab_addctx *ctx, upb_fielddef *f) { 4326 switch (upb_fielddef_type(f)) { 4327 case UPB_TYPE_INT32: 4328 case UPB_TYPE_INT64: 4329 case UPB_TYPE_ENUM: 4330 f->defaultval.sint = 0; 4331 break; 4332 case UPB_TYPE_UINT64: 4333 case UPB_TYPE_UINT32: 4334 f->defaultval.uint = 0; 4335 break; 4336 case UPB_TYPE_DOUBLE: 4337 case UPB_TYPE_FLOAT: 4338 f->defaultval.dbl = 0; 4339 break; 4340 case UPB_TYPE_STRING: 4341 case UPB_TYPE_BYTES: 4342 f->defaultval.str = newstr(ctx->alloc, NULL, 0); 4343 break; 4344 case UPB_TYPE_BOOL: 4345 f->defaultval.boolean = false; 4346 break; 4347 case UPB_TYPE_MESSAGE: 4348 break; 4349 } 4350} 4351 4352static bool create_fielddef( 4353 const symtab_addctx *ctx, const char *prefix, upb_msgdef *m, 4354 const google_protobuf_FieldDescriptorProto *field_proto) { 4355 upb_alloc *alloc = ctx->alloc; 4356 upb_fielddef *f; 4357 const google_protobuf_FieldOptions *options; 4358 upb_strview name; 4359 const char *full_name; 4360 const char *json_name; 4361 const char *shortname; 4362 uint32_t field_number; 4363 4364 if (!google_protobuf_FieldDescriptorProto_has_name(field_proto)) { 4365 upb_status_seterrmsg(ctx->status, "field has no name"); 4366 return false; 4367 } 4368 4369 name = google_protobuf_FieldDescriptorProto_name(field_proto); 4370 CHK(upb_isident(name, false, ctx->status)); 4371 full_name = makefullname(ctx, prefix, name); 4372 shortname = shortdefname(full_name); 4373 4374 if (google_protobuf_FieldDescriptorProto_has_json_name(field_proto)) { 4375 json_name = strviewdup( 4376 ctx, google_protobuf_FieldDescriptorProto_json_name(field_proto)); 4377 } else { 4378 json_name = makejsonname(shortname, ctx->alloc); 4379 } 4380 4381 field_number = google_protobuf_FieldDescriptorProto_number(field_proto); 4382 4383 if (field_number == 0 || field_number > UPB_MAX_FIELDNUMBER) { 4384 upb_status_seterrf(ctx->status, "invalid field number (%u)", field_number); 4385 return false; 4386 } 4387 4388 if (m) { 4389 /* direct message field. */ 4390 upb_value v, field_v, json_v; 4391 size_t json_size; 4392 4393 f = (upb_fielddef*)&m->fields[m->field_count++]; 4394 f->msgdef = m; 4395 f->is_extension_ = false; 4396 4397 if (upb_strtable_lookup(&m->ntof, shortname, NULL)) { 4398 upb_status_seterrf(ctx->status, "duplicate field name (%s)", shortname); 4399 return false; 4400 } 4401 4402 if (upb_strtable_lookup(&m->ntof, json_name, NULL)) { 4403 upb_status_seterrf(ctx->status, "duplicate json_name (%s)", json_name); 4404 return false; 4405 } 4406 4407 if (upb_inttable_lookup(&m->itof, field_number, NULL)) { 4408 upb_status_seterrf(ctx->status, "duplicate field number (%u)", 4409 field_number); 4410 return false; 4411 } 4412 4413 field_v = pack_def(f, UPB_DEFTYPE_FIELD); 4414 json_v = pack_def(f, UPB_DEFTYPE_FIELD_JSONNAME); 4415 v = upb_value_constptr(f); 4416 json_size = strlen(json_name); 4417 4418 CHK_OOM( 4419 upb_strtable_insert3(&m->ntof, name.data, name.size, field_v, alloc)); 4420 CHK_OOM(upb_inttable_insert2(&m->itof, field_number, v, alloc)); 4421 4422 if (strcmp(shortname, json_name) != 0) { 4423 upb_strtable_insert3(&m->ntof, json_name, json_size, json_v, alloc); 4424 } 4425 4426 if (ctx->layouts) { 4427 const upb_msglayout_field *fields = m->layout->fields; 4428 int count = m->layout->field_count; 4429 bool found = false; 4430 int i; 4431 for (i = 0; i < count; i++) { 4432 if (fields[i].number == field_number) { 4433 f->layout_index = i; 4434 found = true; 4435 break; 4436 } 4437 } 4438 UPB_ASSERT(found); 4439 } 4440 } else { 4441 /* extension field. */ 4442 f = (upb_fielddef*)&ctx->file->exts[ctx->file->ext_count++]; 4443 f->is_extension_ = true; 4444 CHK_OOM(symtab_add(ctx, full_name, pack_def(f, UPB_DEFTYPE_FIELD))); 4445 } 4446 4447 f->full_name = full_name; 4448 f->json_name = json_name; 4449 f->file = ctx->file; 4450 f->type_ = (int)google_protobuf_FieldDescriptorProto_type(field_proto); 4451 f->label_ = (int)google_protobuf_FieldDescriptorProto_label(field_proto); 4452 f->number_ = field_number; 4453 f->oneof = NULL; 4454 f->proto3_optional_ = 4455 google_protobuf_FieldDescriptorProto_proto3_optional(field_proto); 4456 4457 /* We can't resolve the subdef or (in the case of extensions) the containing 4458 * message yet, because it may not have been defined yet. We stash a pointer 4459 * to the field_proto until later when we can properly resolve it. */ 4460 f->sub.unresolved = field_proto; 4461 4462 if (f->label_ == UPB_LABEL_REQUIRED && f->file->syntax == UPB_SYNTAX_PROTO3) { 4463 upb_status_seterrf(ctx->status, "proto3 fields cannot be required (%s)", 4464 f->full_name); 4465 return false; 4466 } 4467 4468 if (google_protobuf_FieldDescriptorProto_has_oneof_index(field_proto)) { 4469 int oneof_index = 4470 google_protobuf_FieldDescriptorProto_oneof_index(field_proto); 4471 upb_oneofdef *oneof; 4472 upb_value v = upb_value_constptr(f); 4473 4474 if (upb_fielddef_label(f) != UPB_LABEL_OPTIONAL) { 4475 upb_status_seterrf(ctx->status, 4476 "fields in oneof must have OPTIONAL label (%s)", 4477 f->full_name); 4478 return false; 4479 } 4480 4481 if (!m) { 4482 upb_status_seterrf(ctx->status, 4483 "oneof_index provided for extension field (%s)", 4484 f->full_name); 4485 return false; 4486 } 4487 4488 if (oneof_index >= m->oneof_count) { 4489 upb_status_seterrf(ctx->status, "oneof_index out of range (%s)", 4490 f->full_name); 4491 return false; 4492 } 4493 4494 oneof = (upb_oneofdef*)&m->oneofs[oneof_index]; 4495 f->oneof = oneof; 4496 4497 CHK(upb_inttable_insert2(&oneof->itof, f->number_, v, alloc)); 4498 CHK(upb_strtable_insert3(&oneof->ntof, name.data, name.size, v, alloc)); 4499 } else { 4500 f->oneof = NULL; 4501 } 4502 4503 if (google_protobuf_FieldDescriptorProto_has_options(field_proto)) { 4504 options = google_protobuf_FieldDescriptorProto_options(field_proto); 4505 f->lazy_ = google_protobuf_FieldOptions_lazy(options); 4506 f->packed_ = google_protobuf_FieldOptions_packed(options); 4507 } else { 4508 f->lazy_ = false; 4509 f->packed_ = false; 4510 } 4511 4512 return true; 4513} 4514 4515static bool create_enumdef( 4516 const symtab_addctx *ctx, const char *prefix, 4517 const google_protobuf_EnumDescriptorProto *enum_proto) { 4518 upb_enumdef *e; 4519 const google_protobuf_EnumValueDescriptorProto *const *values; 4520 upb_strview name; 4521 size_t i, n; 4522 4523 name = google_protobuf_EnumDescriptorProto_name(enum_proto); 4524 CHK(upb_isident(name, false, ctx->status)); 4525 4526 e = (upb_enumdef*)&ctx->file->enums[ctx->file->enum_count++]; 4527 e->full_name = makefullname(ctx, prefix, name); 4528 CHK_OOM(symtab_add(ctx, e->full_name, pack_def(e, UPB_DEFTYPE_ENUM))); 4529 4530 CHK_OOM(upb_strtable_init2(&e->ntoi, UPB_CTYPE_INT32, ctx->alloc)); 4531 CHK_OOM(upb_inttable_init2(&e->iton, UPB_CTYPE_CSTR, ctx->alloc)); 4532 4533 e->file = ctx->file; 4534 e->defaultval = 0; 4535 4536 values = google_protobuf_EnumDescriptorProto_value(enum_proto, &n); 4537 4538 if (n == 0) { 4539 upb_status_seterrf(ctx->status, 4540 "enums must contain at least one value (%s)", 4541 e->full_name); 4542 return false; 4543 } 4544 4545 for (i = 0; i < n; i++) { 4546 const google_protobuf_EnumValueDescriptorProto *value = values[i]; 4547 upb_strview name = google_protobuf_EnumValueDescriptorProto_name(value); 4548 char *name2 = strviewdup(ctx, name); 4549 int32_t num = google_protobuf_EnumValueDescriptorProto_number(value); 4550 upb_value v = upb_value_int32(num); 4551 4552 if (i == 0 && e->file->syntax == UPB_SYNTAX_PROTO3 && num != 0) { 4553 upb_status_seterrf(ctx->status, 4554 "for proto3, the first enum value must be zero (%s)", 4555 e->full_name); 4556 return false; 4557 } 4558 4559 if (upb_strtable_lookup(&e->ntoi, name2, NULL)) { 4560 upb_status_seterrf(ctx->status, "duplicate enum label '%s'", name2); 4561 return false; 4562 } 4563 4564 CHK_OOM(name2) 4565 CHK_OOM( 4566 upb_strtable_insert3(&e->ntoi, name2, strlen(name2), v, ctx->alloc)); 4567 4568 if (!upb_inttable_lookup(&e->iton, num, NULL)) { 4569 upb_value v = upb_value_cstr(name2); 4570 CHK_OOM(upb_inttable_insert2(&e->iton, num, v, ctx->alloc)); 4571 } 4572 } 4573 4574 upb_inttable_compact2(&e->iton, ctx->alloc); 4575 4576 return true; 4577} 4578 4579static bool create_msgdef(symtab_addctx *ctx, const char *prefix, 4580 const google_protobuf_DescriptorProto *msg_proto) { 4581 upb_msgdef *m; 4582 const google_protobuf_MessageOptions *options; 4583 const google_protobuf_OneofDescriptorProto *const *oneofs; 4584 const google_protobuf_FieldDescriptorProto *const *fields; 4585 const google_protobuf_EnumDescriptorProto *const *enums; 4586 const google_protobuf_DescriptorProto *const *msgs; 4587 size_t i, n; 4588 upb_strview name; 4589 4590 name = google_protobuf_DescriptorProto_name(msg_proto); 4591 CHK(upb_isident(name, false, ctx->status)); 4592 4593 m = (upb_msgdef*)&ctx->file->msgs[ctx->file->msg_count++]; 4594 m->full_name = makefullname(ctx, prefix, name); 4595 CHK_OOM(symtab_add(ctx, m->full_name, pack_def(m, UPB_DEFTYPE_MSG))); 4596 4597 CHK_OOM(upb_inttable_init2(&m->itof, UPB_CTYPE_CONSTPTR, ctx->alloc)); 4598 CHK_OOM(upb_strtable_init2(&m->ntof, UPB_CTYPE_CONSTPTR, ctx->alloc)); 4599 4600 m->file = ctx->file; 4601 m->map_entry = false; 4602 4603 options = google_protobuf_DescriptorProto_options(msg_proto); 4604 4605 if (options) { 4606 m->map_entry = google_protobuf_MessageOptions_map_entry(options); 4607 } 4608 4609 if (ctx->layouts) { 4610 m->layout = *ctx->layouts; 4611 ctx->layouts++; 4612 } else { 4613 /* Allocate now (to allow cross-linking), populate later. */ 4614 m->layout = upb_malloc(ctx->alloc, sizeof(*m->layout)); 4615 } 4616 4617 oneofs = google_protobuf_DescriptorProto_oneof_decl(msg_proto, &n); 4618 m->oneof_count = 0; 4619 m->oneofs = upb_malloc(ctx->alloc, sizeof(*m->oneofs) * n); 4620 for (i = 0; i < n; i++) { 4621 CHK(create_oneofdef(ctx, m, oneofs[i])); 4622 } 4623 4624 fields = google_protobuf_DescriptorProto_field(msg_proto, &n); 4625 m->field_count = 0; 4626 m->fields = upb_malloc(ctx->alloc, sizeof(*m->fields) * n); 4627 for (i = 0; i < n; i++) { 4628 CHK(create_fielddef(ctx, m->full_name, m, fields[i])); 4629 } 4630 4631 CHK(assign_msg_indices(m, ctx->status)); 4632 CHK(check_oneofs(m, ctx->status)); 4633 assign_msg_wellknowntype(m); 4634 upb_inttable_compact2(&m->itof, ctx->alloc); 4635 4636 /* This message is built. Now build nested messages and enums. */ 4637 4638 enums = google_protobuf_DescriptorProto_enum_type(msg_proto, &n); 4639 for (i = 0; i < n; i++) { 4640 CHK(create_enumdef(ctx, m->full_name, enums[i])); 4641 } 4642 4643 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); 4644 for (i = 0; i < n; i++) { 4645 CHK(create_msgdef(ctx, m->full_name, msgs[i])); 4646 } 4647 4648 return true; 4649} 4650 4651typedef struct { 4652 int msg_count; 4653 int enum_count; 4654 int ext_count; 4655} decl_counts; 4656 4657static void count_types_in_msg(const google_protobuf_DescriptorProto *msg_proto, 4658 decl_counts *counts) { 4659 const google_protobuf_DescriptorProto *const *msgs; 4660 size_t i, n; 4661 4662 counts->msg_count++; 4663 4664 msgs = google_protobuf_DescriptorProto_nested_type(msg_proto, &n); 4665 for (i = 0; i < n; i++) { 4666 count_types_in_msg(msgs[i], counts); 4667 } 4668 4669 google_protobuf_DescriptorProto_enum_type(msg_proto, &n); 4670 counts->enum_count += n; 4671 4672 google_protobuf_DescriptorProto_extension(msg_proto, &n); 4673 counts->ext_count += n; 4674} 4675 4676static void count_types_in_file( 4677 const google_protobuf_FileDescriptorProto *file_proto, 4678 decl_counts *counts) { 4679 const google_protobuf_DescriptorProto *const *msgs; 4680 size_t i, n; 4681 4682 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); 4683 for (i = 0; i < n; i++) { 4684 count_types_in_msg(msgs[i], counts); 4685 } 4686 4687 google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); 4688 counts->enum_count += n; 4689 4690 google_protobuf_FileDescriptorProto_extension(file_proto, &n); 4691 counts->ext_count += n; 4692} 4693 4694static bool resolve_fielddef(const symtab_addctx *ctx, const char *prefix, 4695 upb_fielddef *f) { 4696 upb_strview name; 4697 const google_protobuf_FieldDescriptorProto *field_proto = f->sub.unresolved; 4698 4699 if (f->is_extension_) { 4700 if (!google_protobuf_FieldDescriptorProto_has_extendee(field_proto)) { 4701 upb_status_seterrf(ctx->status, 4702 "extension for field '%s' had no extendee", 4703 f->full_name); 4704 return false; 4705 } 4706 4707 name = google_protobuf_FieldDescriptorProto_extendee(field_proto); 4708 f->msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); 4709 CHK(f->msgdef); 4710 } 4711 4712 if ((upb_fielddef_issubmsg(f) || f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) && 4713 !google_protobuf_FieldDescriptorProto_has_type_name(field_proto)) { 4714 upb_status_seterrf(ctx->status, "field '%s' is missing type name", 4715 f->full_name); 4716 return false; 4717 } 4718 4719 name = google_protobuf_FieldDescriptorProto_type_name(field_proto); 4720 4721 if (upb_fielddef_issubmsg(f)) { 4722 f->sub.msgdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_MSG); 4723 CHK(f->sub.msgdef); 4724 } else if (f->type_ == UPB_DESCRIPTOR_TYPE_ENUM) { 4725 f->sub.enumdef = symtab_resolve(ctx, f, prefix, name, UPB_DEFTYPE_ENUM); 4726 CHK(f->sub.enumdef); 4727 } 4728 4729 /* Have to delay resolving of the default value until now because of the enum 4730 * case, since enum defaults are specified with a label. */ 4731 if (google_protobuf_FieldDescriptorProto_has_default_value(field_proto)) { 4732 upb_strview defaultval = 4733 google_protobuf_FieldDescriptorProto_default_value(field_proto); 4734 4735 if (f->file->syntax == UPB_SYNTAX_PROTO3) { 4736 upb_status_seterrf(ctx->status, 4737 "proto3 fields cannot have explicit defaults (%s)", 4738 f->full_name); 4739 return false; 4740 } 4741 4742 if (upb_fielddef_issubmsg(f)) { 4743 upb_status_seterrf(ctx->status, 4744 "message fields cannot have explicit defaults (%s)", 4745 f->full_name); 4746 return false; 4747 } 4748 4749 if (!parse_default(ctx, defaultval.data, defaultval.size, f)) { 4750 upb_status_seterrf(ctx->status, 4751 "couldn't parse default '" UPB_STRVIEW_FORMAT 4752 "' for field (%s)", 4753 UPB_STRVIEW_ARGS(defaultval), f->full_name); 4754 return false; 4755 } 4756 } else { 4757 set_default_default(ctx, f); 4758 } 4759 4760 return true; 4761} 4762 4763static bool build_filedef( 4764 symtab_addctx *ctx, upb_filedef *file, 4765 const google_protobuf_FileDescriptorProto *file_proto) { 4766 upb_alloc *alloc = ctx->alloc; 4767 const google_protobuf_FileOptions *file_options_proto; 4768 const google_protobuf_DescriptorProto *const *msgs; 4769 const google_protobuf_EnumDescriptorProto *const *enums; 4770 const google_protobuf_FieldDescriptorProto *const *exts; 4771 const upb_strview* strs; 4772 size_t i, n; 4773 decl_counts counts = {0}; 4774 4775 count_types_in_file(file_proto, &counts); 4776 4777 file->msgs = upb_malloc(alloc, sizeof(*file->msgs) * counts.msg_count); 4778 file->enums = upb_malloc(alloc, sizeof(*file->enums) * counts.enum_count); 4779 file->exts = upb_malloc(alloc, sizeof(*file->exts) * counts.ext_count); 4780 4781 CHK_OOM(counts.msg_count == 0 || file->msgs); 4782 CHK_OOM(counts.enum_count == 0 || file->enums); 4783 CHK_OOM(counts.ext_count == 0 || file->exts); 4784 4785 /* We increment these as defs are added. */ 4786 file->msg_count = 0; 4787 file->enum_count = 0; 4788 file->ext_count = 0; 4789 4790 if (!google_protobuf_FileDescriptorProto_has_name(file_proto)) { 4791 upb_status_seterrmsg(ctx->status, "File has no name"); 4792 return false; 4793 } 4794 4795 file->name = 4796 strviewdup(ctx, google_protobuf_FileDescriptorProto_name(file_proto)); 4797 file->phpprefix = NULL; 4798 file->phpnamespace = NULL; 4799 4800 if (google_protobuf_FileDescriptorProto_has_package(file_proto)) { 4801 upb_strview package = 4802 google_protobuf_FileDescriptorProto_package(file_proto); 4803 CHK(upb_isident(package, true, ctx->status)); 4804 file->package = strviewdup(ctx, package); 4805 } else { 4806 file->package = NULL; 4807 } 4808 4809 if (google_protobuf_FileDescriptorProto_has_syntax(file_proto)) { 4810 upb_strview syntax = 4811 google_protobuf_FileDescriptorProto_syntax(file_proto); 4812 4813 if (streql_view(syntax, "proto2")) { 4814 file->syntax = UPB_SYNTAX_PROTO2; 4815 } else if (streql_view(syntax, "proto3")) { 4816 file->syntax = UPB_SYNTAX_PROTO3; 4817 } else { 4818 upb_status_seterrf(ctx->status, "Invalid syntax '" UPB_STRVIEW_FORMAT "'", 4819 UPB_STRVIEW_ARGS(syntax)); 4820 return false; 4821 } 4822 } else { 4823 file->syntax = UPB_SYNTAX_PROTO2; 4824 } 4825 4826 /* Read options. */ 4827 file_options_proto = google_protobuf_FileDescriptorProto_options(file_proto); 4828 if (file_options_proto) { 4829 if (google_protobuf_FileOptions_has_php_class_prefix(file_options_proto)) { 4830 file->phpprefix = strviewdup( 4831 ctx, 4832 google_protobuf_FileOptions_php_class_prefix(file_options_proto)); 4833 } 4834 if (google_protobuf_FileOptions_has_php_namespace(file_options_proto)) { 4835 file->phpnamespace = strviewdup( 4836 ctx, google_protobuf_FileOptions_php_namespace(file_options_proto)); 4837 } 4838 } 4839 4840 /* Verify dependencies. */ 4841 strs = google_protobuf_FileDescriptorProto_dependency(file_proto, &n); 4842 file->deps = upb_malloc(alloc, sizeof(*file->deps) * n) ; 4843 CHK_OOM(n == 0 || file->deps); 4844 4845 for (i = 0; i < n; i++) { 4846 upb_strview dep_name = strs[i]; 4847 upb_value v; 4848 if (!upb_strtable_lookup2(&ctx->symtab->files, dep_name.data, 4849 dep_name.size, &v)) { 4850 upb_status_seterrf(ctx->status, 4851 "Depends on file '" UPB_STRVIEW_FORMAT 4852 "', but it has not been loaded", 4853 UPB_STRVIEW_ARGS(dep_name)); 4854 return false; 4855 } 4856 file->deps[i] = upb_value_getconstptr(v); 4857 } 4858 4859 /* Create messages. */ 4860 msgs = google_protobuf_FileDescriptorProto_message_type(file_proto, &n); 4861 for (i = 0; i < n; i++) { 4862 CHK(create_msgdef(ctx, file->package, msgs[i])); 4863 } 4864 4865 /* Create enums. */ 4866 enums = google_protobuf_FileDescriptorProto_enum_type(file_proto, &n); 4867 for (i = 0; i < n; i++) { 4868 CHK(create_enumdef(ctx, file->package, enums[i])); 4869 } 4870 4871 /* Create extensions. */ 4872 exts = google_protobuf_FileDescriptorProto_extension(file_proto, &n); 4873 file->exts = upb_malloc(alloc, sizeof(*file->exts) * n); 4874 CHK_OOM(n == 0 || file->exts); 4875 for (i = 0; i < n; i++) { 4876 CHK(create_fielddef(ctx, file->package, NULL, exts[i])); 4877 } 4878 4879 /* Now that all names are in the table, build layouts and resolve refs. */ 4880 for (i = 0; i < file->ext_count; i++) { 4881 CHK(resolve_fielddef(ctx, file->package, (upb_fielddef*)&file->exts[i])); 4882 } 4883 4884 for (i = 0; i < file->msg_count; i++) { 4885 const upb_msgdef *m = &file->msgs[i]; 4886 int j; 4887 for (j = 0; j < m->field_count; j++) { 4888 CHK(resolve_fielddef(ctx, m->full_name, (upb_fielddef*)&m->fields[j])); 4889 } 4890 } 4891 4892 if (!ctx->layouts) { 4893 for (i = 0; i < file->msg_count; i++) { 4894 const upb_msgdef *m = &file->msgs[i]; 4895 make_layout(ctx->symtab, m); 4896 } 4897 } 4898 4899 return true; 4900 } 4901 4902static bool upb_symtab_addtotabs(upb_symtab *s, symtab_addctx *ctx, 4903 upb_status *status) { 4904 const upb_filedef *file = ctx->file; 4905 upb_alloc *alloc = upb_arena_alloc(s->arena); 4906 upb_strtable_iter iter; 4907 4908 CHK_OOM(upb_strtable_insert3(&s->files, file->name, strlen(file->name), 4909 upb_value_constptr(file), alloc)); 4910 4911 upb_strtable_begin(&iter, ctx->addtab); 4912 for (; !upb_strtable_done(&iter); upb_strtable_next(&iter)) { 4913 upb_strview key = upb_strtable_iter_key(&iter); 4914 upb_value value = upb_strtable_iter_value(&iter); 4915 CHK_OOM(upb_strtable_insert3(&s->syms, key.data, key.size, value, alloc)); 4916 } 4917 4918 return true; 4919} 4920 4921/* upb_filedef ****************************************************************/ 4922 4923const char *upb_filedef_name(const upb_filedef *f) { 4924 return f->name; 4925} 4926 4927const char *upb_filedef_package(const upb_filedef *f) { 4928 return f->package; 4929} 4930 4931const char *upb_filedef_phpprefix(const upb_filedef *f) { 4932 return f->phpprefix; 4933} 4934 4935const char *upb_filedef_phpnamespace(const upb_filedef *f) { 4936 return f->phpnamespace; 4937} 4938 4939upb_syntax_t upb_filedef_syntax(const upb_filedef *f) { 4940 return f->syntax; 4941} 4942 4943int upb_filedef_msgcount(const upb_filedef *f) { 4944 return f->msg_count; 4945} 4946 4947int upb_filedef_depcount(const upb_filedef *f) { 4948 return f->dep_count; 4949} 4950 4951int upb_filedef_enumcount(const upb_filedef *f) { 4952 return f->enum_count; 4953} 4954 4955const upb_filedef *upb_filedef_dep(const upb_filedef *f, int i) { 4956 return i < 0 || i >= f->dep_count ? NULL : f->deps[i]; 4957} 4958 4959const upb_msgdef *upb_filedef_msg(const upb_filedef *f, int i) { 4960 return i < 0 || i >= f->msg_count ? NULL : &f->msgs[i]; 4961} 4962 4963const upb_enumdef *upb_filedef_enum(const upb_filedef *f, int i) { 4964 return i < 0 || i >= f->enum_count ? NULL : &f->enums[i]; 4965} 4966 4967void upb_symtab_free(upb_symtab *s) { 4968 upb_arena_free(s->arena); 4969 upb_gfree(s); 4970} 4971 4972upb_symtab *upb_symtab_new(void) { 4973 upb_symtab *s = upb_gmalloc(sizeof(*s)); 4974 upb_alloc *alloc; 4975 4976 if (!s) { 4977 return NULL; 4978 } 4979 4980 s->arena = upb_arena_new(); 4981 alloc = upb_arena_alloc(s->arena); 4982 4983 if (!upb_strtable_init2(&s->syms, UPB_CTYPE_CONSTPTR, alloc) || 4984 !upb_strtable_init2(&s->files, UPB_CTYPE_CONSTPTR, alloc)) { 4985 upb_arena_free(s->arena); 4986 upb_gfree(s); 4987 s = NULL; 4988 } 4989 return s; 4990} 4991 4992const upb_msgdef *upb_symtab_lookupmsg(const upb_symtab *s, const char *sym) { 4993 upb_value v; 4994 return upb_strtable_lookup(&s->syms, sym, &v) ? 4995 unpack_def(v, UPB_DEFTYPE_MSG) : NULL; 4996} 4997 4998const upb_msgdef *upb_symtab_lookupmsg2(const upb_symtab *s, const char *sym, 4999 size_t len) { 5000 upb_value v; 5001 return upb_strtable_lookup2(&s->syms, sym, len, &v) ? 5002 unpack_def(v, UPB_DEFTYPE_MSG) : NULL; 5003} 5004 5005const upb_enumdef *upb_symtab_lookupenum(const upb_symtab *s, const char *sym) { 5006 upb_value v; 5007 return upb_strtable_lookup(&s->syms, sym, &v) ? 5008 unpack_def(v, UPB_DEFTYPE_ENUM) : NULL; 5009} 5010 5011const upb_filedef *upb_symtab_lookupfile(const upb_symtab *s, const char *name) { 5012 upb_value v; 5013 return upb_strtable_lookup(&s->files, name, &v) ? upb_value_getconstptr(v) 5014 : NULL; 5015} 5016 5017int upb_symtab_filecount(const upb_symtab *s) { 5018 return (int)upb_strtable_count(&s->files); 5019} 5020 5021static const upb_filedef *_upb_symtab_addfile( 5022 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, 5023 const upb_msglayout **layouts, upb_status *status) { 5024 upb_arena *tmparena = upb_arena_new(); 5025 upb_strtable addtab; 5026 upb_alloc *alloc = upb_arena_alloc(s->arena); 5027 upb_filedef *file = upb_malloc(alloc, sizeof(*file)); 5028 bool ok; 5029 symtab_addctx ctx; 5030 5031 ctx.file = file; 5032 ctx.symtab = s; 5033 ctx.alloc = alloc; 5034 ctx.tmp = upb_arena_alloc(tmparena); 5035 ctx.addtab = &addtab; 5036 ctx.layouts = layouts; 5037 ctx.status = status; 5038 5039 ok = file && 5040 upb_strtable_init2(&addtab, UPB_CTYPE_CONSTPTR, ctx.tmp) && 5041 build_filedef(&ctx, file, file_proto) && 5042 upb_symtab_addtotabs(s, &ctx, status); 5043 5044 upb_arena_free(tmparena); 5045 return ok ? file : NULL; 5046} 5047 5048const upb_filedef *upb_symtab_addfile( 5049 upb_symtab *s, const google_protobuf_FileDescriptorProto *file_proto, 5050 upb_status *status) { 5051 return _upb_symtab_addfile(s, file_proto, NULL, status); 5052} 5053 5054/* Include here since we want most of this file to be stdio-free. */ 5055#include <stdio.h> 5056 5057bool _upb_symtab_loaddefinit(upb_symtab *s, const upb_def_init *init) { 5058 /* Since this function should never fail (it would indicate a bug in upb) we 5059 * print errors to stderr instead of returning error status to the user. */ 5060 upb_def_init **deps = init->deps; 5061 google_protobuf_FileDescriptorProto *file; 5062 upb_arena *arena; 5063 upb_status status; 5064 5065 upb_status_clear(&status); 5066 5067 if (upb_strtable_lookup(&s->files, init->filename, NULL)) { 5068 return true; 5069 } 5070 5071 arena = upb_arena_new(); 5072 5073 for (; *deps; deps++) { 5074 if (!_upb_symtab_loaddefinit(s, *deps)) goto err; 5075 } 5076 5077 file = google_protobuf_FileDescriptorProto_parse( 5078 init->descriptor.data, init->descriptor.size, arena); 5079 5080 if (!file) { 5081 upb_status_seterrf( 5082 &status, 5083 "Failed to parse compiled-in descriptor for file '%s'. This should " 5084 "never happen.", 5085 init->filename); 5086 goto err; 5087 } 5088 5089 if (!_upb_symtab_addfile(s, file, init->layouts, &status)) goto err; 5090 5091 upb_arena_free(arena); 5092 return true; 5093 5094err: 5095 fprintf(stderr, "Error loading compiled-in descriptor: %s\n", 5096 upb_status_errmsg(&status)); 5097 upb_arena_free(arena); 5098 return false; 5099} 5100 5101#undef CHK 5102#undef CHK_OOM 5103 5104 5105#include <string.h> 5106 5107 5108static char field_size[] = { 5109 0,/* 0 */ 5110 8, /* UPB_DESCRIPTOR_TYPE_DOUBLE */ 5111 4, /* UPB_DESCRIPTOR_TYPE_FLOAT */ 5112 8, /* UPB_DESCRIPTOR_TYPE_INT64 */ 5113 8, /* UPB_DESCRIPTOR_TYPE_UINT64 */ 5114 4, /* UPB_DESCRIPTOR_TYPE_INT32 */ 5115 8, /* UPB_DESCRIPTOR_TYPE_FIXED64 */ 5116 4, /* UPB_DESCRIPTOR_TYPE_FIXED32 */ 5117 1, /* UPB_DESCRIPTOR_TYPE_BOOL */ 5118 sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_STRING */ 5119 sizeof(void*), /* UPB_DESCRIPTOR_TYPE_GROUP */ 5120 sizeof(void*), /* UPB_DESCRIPTOR_TYPE_MESSAGE */ 5121 sizeof(upb_strview), /* UPB_DESCRIPTOR_TYPE_BYTES */ 5122 4, /* UPB_DESCRIPTOR_TYPE_UINT32 */ 5123 4, /* UPB_DESCRIPTOR_TYPE_ENUM */ 5124 4, /* UPB_DESCRIPTOR_TYPE_SFIXED32 */ 5125 8, /* UPB_DESCRIPTOR_TYPE_SFIXED64 */ 5126 4, /* UPB_DESCRIPTOR_TYPE_SINT32 */ 5127 8, /* UPB_DESCRIPTOR_TYPE_SINT64 */ 5128}; 5129 5130/* Strings/bytes are special-cased in maps. */ 5131static char _upb_fieldtype_to_mapsize[12] = { 5132 0, 5133 1, /* UPB_TYPE_BOOL */ 5134 4, /* UPB_TYPE_FLOAT */ 5135 4, /* UPB_TYPE_INT32 */ 5136 4, /* UPB_TYPE_UINT32 */ 5137 4, /* UPB_TYPE_ENUM */ 5138 sizeof(void*), /* UPB_TYPE_MESSAGE */ 5139 8, /* UPB_TYPE_DOUBLE */ 5140 8, /* UPB_TYPE_INT64 */ 5141 8, /* UPB_TYPE_UINT64 */ 5142 0, /* UPB_TYPE_STRING */ 5143 0, /* UPB_TYPE_BYTES */ 5144}; 5145 5146/** upb_msg *******************************************************************/ 5147 5148upb_msg *upb_msg_new(const upb_msgdef *m, upb_arena *a) { 5149 return _upb_msg_new(upb_msgdef_layout(m), a); 5150} 5151 5152static bool in_oneof(const upb_msglayout_field *field) { 5153 return field->presence < 0; 5154} 5155 5156static uint32_t *oneofcase(const upb_msg *msg, 5157 const upb_msglayout_field *field) { 5158 UPB_ASSERT(in_oneof(field)); 5159 return UPB_PTR_AT(msg, -field->presence, uint32_t); 5160} 5161 5162static upb_msgval _upb_msg_getraw(const upb_msg *msg, const upb_fielddef *f) { 5163 const upb_msglayout_field *field = upb_fielddef_layout(f); 5164 const char *mem = UPB_PTR_AT(msg, field->offset, char); 5165 upb_msgval val = {0}; 5166 int size = upb_fielddef_isseq(f) ? sizeof(void *) 5167 : field_size[field->descriptortype]; 5168 memcpy(&val, mem, size); 5169 return val; 5170} 5171 5172bool upb_msg_has(const upb_msg *msg, const upb_fielddef *f) { 5173 const upb_msglayout_field *field = upb_fielddef_layout(f); 5174 if (in_oneof(field)) { 5175 return *oneofcase(msg, field) == field->number; 5176 } else if (field->presence > 0) { 5177 uint32_t hasbit = field->presence; 5178 return *UPB_PTR_AT(msg, hasbit / 8, uint8_t) & (1 << (hasbit % 8)); 5179 } else { 5180 UPB_ASSERT(field->descriptortype == UPB_DESCRIPTOR_TYPE_MESSAGE || 5181 field->descriptortype == UPB_DESCRIPTOR_TYPE_GROUP); 5182 return _upb_msg_getraw(msg, f).msg_val != NULL; 5183 } 5184} 5185 5186bool upb_msg_hasoneof(const upb_msg *msg, const upb_oneofdef *o) { 5187 upb_oneof_iter i; 5188 const upb_fielddef *f; 5189 const upb_msglayout_field *field; 5190 5191 upb_oneof_begin(&i, o); 5192 if (upb_oneof_done(&i)) return false; 5193 f = upb_oneof_iter_field(&i); 5194 field = upb_fielddef_layout(f); 5195 return *oneofcase(msg, field) != 0; 5196} 5197 5198upb_msgval upb_msg_get(const upb_msg *msg, const upb_fielddef *f) { 5199 if (!upb_fielddef_haspresence(f) || upb_msg_has(msg, f)) { 5200 return _upb_msg_getraw(msg, f); 5201 } else { 5202 /* TODO(haberman): change upb_fielddef to not require this switch(). */ 5203 upb_msgval val = {0}; 5204 switch (upb_fielddef_type(f)) { 5205 case UPB_TYPE_INT32: 5206 case UPB_TYPE_ENUM: 5207 val.int32_val = upb_fielddef_defaultint32(f); 5208 break; 5209 case UPB_TYPE_INT64: 5210 val.int64_val = upb_fielddef_defaultint64(f); 5211 break; 5212 case UPB_TYPE_UINT32: 5213 val.uint32_val = upb_fielddef_defaultuint32(f); 5214 break; 5215 case UPB_TYPE_UINT64: 5216 val.uint64_val = upb_fielddef_defaultuint64(f); 5217 break; 5218 case UPB_TYPE_FLOAT: 5219 val.float_val = upb_fielddef_defaultfloat(f); 5220 break; 5221 case UPB_TYPE_DOUBLE: 5222 val.double_val = upb_fielddef_defaultdouble(f); 5223 break; 5224 case UPB_TYPE_BOOL: 5225 val.double_val = upb_fielddef_defaultbool(f); 5226 break; 5227 case UPB_TYPE_STRING: 5228 case UPB_TYPE_BYTES: 5229 val.str_val.data = upb_fielddef_defaultstr(f, &val.str_val.size); 5230 break; 5231 case UPB_TYPE_MESSAGE: 5232 val.msg_val = NULL; 5233 break; 5234 } 5235 return val; 5236 } 5237} 5238 5239upb_mutmsgval upb_msg_mutable(upb_msg *msg, const upb_fielddef *f, 5240 upb_arena *a) { 5241 const upb_msglayout_field *field = upb_fielddef_layout(f); 5242 upb_mutmsgval ret; 5243 char *mem = UPB_PTR_AT(msg, field->offset, char); 5244 bool wrong_oneof = in_oneof(field) && *oneofcase(msg, field) != field->number; 5245 5246 memcpy(&ret, mem, sizeof(void*)); 5247 5248 if (a && (!ret.msg || wrong_oneof)) { 5249 if (upb_fielddef_ismap(f)) { 5250 const upb_msgdef *entry = upb_fielddef_msgsubdef(f); 5251 const upb_fielddef *key = upb_msgdef_itof(entry, UPB_MAPENTRY_KEY); 5252 const upb_fielddef *value = upb_msgdef_itof(entry, UPB_MAPENTRY_VALUE); 5253 ret.map = upb_map_new(a, upb_fielddef_type(key), upb_fielddef_type(value)); 5254 } else if (upb_fielddef_isseq(f)) { 5255 ret.array = upb_array_new(a, upb_fielddef_type(f)); 5256 } else { 5257 UPB_ASSERT(upb_fielddef_issubmsg(f)); 5258 ret.msg = upb_msg_new(upb_fielddef_msgsubdef(f), a); 5259 } 5260 5261 memcpy(mem, &ret, sizeof(void*)); 5262 5263 if (wrong_oneof) { 5264 *oneofcase(msg, field) = field->number; 5265 } 5266 } 5267 return ret; 5268} 5269 5270void upb_msg_set(upb_msg *msg, const upb_fielddef *f, upb_msgval val, 5271 upb_arena *a) { 5272 const upb_msglayout_field *field = upb_fielddef_layout(f); 5273 char *mem = UPB_PTR_AT(msg, field->offset, char); 5274 int size = upb_fielddef_isseq(f) ? sizeof(void *) 5275 : field_size[field->descriptortype]; 5276 memcpy(mem, &val, size); 5277 if (in_oneof(field)) { 5278 *oneofcase(msg, field) = field->number; 5279 } 5280} 5281 5282bool upb_msg_next(const upb_msg *msg, const upb_msgdef *m, 5283 const upb_symtab *ext_pool, const upb_fielddef **out_f, 5284 upb_msgval *out_val, size_t *iter) { 5285 size_t i = *iter; 5286 const upb_msgval zero = {0}; 5287 const upb_fielddef *f; 5288 while ((f = _upb_msgdef_field(m, (int)++i)) != NULL) { 5289 upb_msgval val = _upb_msg_getraw(msg, f); 5290 5291 /* Skip field if unset or empty. */ 5292 if (upb_fielddef_haspresence(f)) { 5293 if (!upb_msg_has(msg, f)) continue; 5294 } else { 5295 upb_msgval test = val; 5296 if (upb_fielddef_isstring(f) && !upb_fielddef_isseq(f)) { 5297 /* Clear string pointer, only size matters (ptr could be non-NULL). */ 5298 test.str_val.data = NULL; 5299 } 5300 /* Continue if NULL or 0. */ 5301 if (memcmp(&test, &zero, sizeof(test)) == 0) continue; 5302 5303 /* Continue on empty array or map. */ 5304 if (upb_fielddef_ismap(f)) { 5305 if (upb_map_size(test.map_val) == 0) continue; 5306 } else if (upb_fielddef_isseq(f)) { 5307 if (upb_array_size(test.array_val) == 0) continue; 5308 } 5309 } 5310 5311 *out_val = val; 5312 *out_f = f; 5313 *iter = i; 5314 return true; 5315 } 5316 *iter = i; 5317 return false; 5318} 5319 5320/** upb_array *****************************************************************/ 5321 5322upb_array *upb_array_new(upb_arena *a, upb_fieldtype_t type) { 5323 return _upb_array_new(a, type); 5324} 5325 5326size_t upb_array_size(const upb_array *arr) { 5327 return arr->len; 5328} 5329 5330upb_msgval upb_array_get(const upb_array *arr, size_t i) { 5331 upb_msgval ret; 5332 const char* data = _upb_array_constptr(arr); 5333 int lg2 = arr->data & 7; 5334 UPB_ASSERT(i < arr->len); 5335 memcpy(&ret, data + (i << lg2), 1 << lg2); 5336 return ret; 5337} 5338 5339void upb_array_set(upb_array *arr, size_t i, upb_msgval val) { 5340 char* data = _upb_array_ptr(arr); 5341 int lg2 = arr->data & 7; 5342 UPB_ASSERT(i < arr->len); 5343 memcpy(data + (i << lg2), &val, 1 << lg2); 5344} 5345 5346bool upb_array_append(upb_array *arr, upb_msgval val, upb_arena *arena) { 5347 if (!_upb_array_realloc(arr, arr->len + 1, arena)) { 5348 return false; 5349 } 5350 arr->len++; 5351 upb_array_set(arr, arr->len - 1, val); 5352 return true; 5353} 5354 5355/* Resizes the array to the given size, reallocating if necessary, and returns a 5356 * pointer to the new array elements. */ 5357bool upb_array_resize(upb_array *arr, size_t size, upb_arena *arena) { 5358 return _upb_array_realloc(arr, size, arena); 5359} 5360 5361/** upb_map *******************************************************************/ 5362 5363upb_map *upb_map_new(upb_arena *a, upb_fieldtype_t key_type, 5364 upb_fieldtype_t value_type) { 5365 return _upb_map_new(a, _upb_fieldtype_to_mapsize[key_type], 5366 _upb_fieldtype_to_mapsize[value_type]); 5367} 5368 5369size_t upb_map_size(const upb_map *map) { 5370 return _upb_map_size(map); 5371} 5372 5373bool upb_map_get(const upb_map *map, upb_msgval key, upb_msgval *val) { 5374 return _upb_map_get(map, &key, map->key_size, val, map->val_size); 5375} 5376 5377bool upb_map_set(upb_map *map, upb_msgval key, upb_msgval val, 5378 upb_arena *arena) { 5379 return _upb_map_set(map, &key, map->key_size, &val, map->val_size, arena); 5380} 5381 5382bool upb_map_delete(upb_map *map, upb_msgval key) { 5383 return _upb_map_delete(map, &key, map->key_size); 5384} 5385 5386bool upb_mapiter_next(const upb_map *map, size_t *iter) { 5387 return _upb_map_next(map, iter); 5388} 5389 5390/* Returns the key and value for this entry of the map. */ 5391upb_msgval upb_mapiter_key(const upb_map *map, size_t iter) { 5392 upb_strtable_iter i; 5393 upb_msgval ret; 5394 i.t = &map->table; 5395 i.index = iter; 5396 _upb_map_fromkey(upb_strtable_iter_key(&i), &ret, map->key_size); 5397 return ret; 5398} 5399 5400upb_msgval upb_mapiter_value(const upb_map *map, size_t iter) { 5401 upb_strtable_iter i; 5402 upb_msgval ret; 5403 i.t = &map->table; 5404 i.index = iter; 5405 _upb_map_fromvalue(upb_strtable_iter_value(&i), &ret, map->val_size); 5406 return ret; 5407} 5408 5409/* void upb_mapiter_setvalue(upb_map *map, size_t iter, upb_msgval value); */ 5410/* 5411** TODO(haberman): it's unclear whether a lot of the consistency checks should 5412** UPB_ASSERT() or return false. 5413*/ 5414 5415 5416#include <string.h> 5417 5418 5419 5420struct upb_handlers { 5421 upb_handlercache *cache; 5422 const upb_msgdef *msg; 5423 const upb_handlers **sub; 5424 const void *top_closure_type; 5425 upb_handlers_tabent table[1]; /* Dynamically-sized field handler array. */ 5426}; 5427 5428static void *upb_calloc(upb_arena *arena, size_t size) { 5429 void *mem = upb_malloc(upb_arena_alloc(arena), size); 5430 if (mem) { 5431 memset(mem, 0, size); 5432 } 5433 return mem; 5434} 5435 5436/* Defined for the sole purpose of having a unique pointer value for 5437 * UPB_NO_CLOSURE. */ 5438char _upb_noclosure; 5439 5440/* Given a selector for a STARTSUBMSG handler, resolves to a pointer to the 5441 * subhandlers for this submessage field. */ 5442#define SUBH(h, selector) (h->sub[selector]) 5443 5444/* The selector for a submessage field is the field index. */ 5445#define SUBH_F(h, f) SUBH(h, upb_fielddef_index(f)) 5446 5447static int32_t trygetsel(upb_handlers *h, const upb_fielddef *f, 5448 upb_handlertype_t type) { 5449 upb_selector_t sel; 5450 bool ok; 5451 5452 ok = upb_handlers_getselector(f, type, &sel); 5453 5454 UPB_ASSERT(upb_handlers_msgdef(h) == upb_fielddef_containingtype(f)); 5455 UPB_ASSERT(ok); 5456 5457 return sel; 5458} 5459 5460static upb_selector_t handlers_getsel(upb_handlers *h, const upb_fielddef *f, 5461 upb_handlertype_t type) { 5462 int32_t sel = trygetsel(h, f, type); 5463 UPB_ASSERT(sel >= 0); 5464 return sel; 5465} 5466 5467static const void **returntype(upb_handlers *h, const upb_fielddef *f, 5468 upb_handlertype_t type) { 5469 return &h->table[handlers_getsel(h, f, type)].attr.return_closure_type; 5470} 5471 5472static bool doset(upb_handlers *h, int32_t sel, const upb_fielddef *f, 5473 upb_handlertype_t type, upb_func *func, 5474 const upb_handlerattr *attr) { 5475 upb_handlerattr set_attr = UPB_HANDLERATTR_INIT; 5476 const void *closure_type; 5477 const void **context_closure_type; 5478 5479 UPB_ASSERT(!h->table[sel].func); 5480 5481 if (attr) { 5482 set_attr = *attr; 5483 } 5484 5485 /* Check that the given closure type matches the closure type that has been 5486 * established for this context (if any). */ 5487 closure_type = set_attr.closure_type; 5488 5489 if (type == UPB_HANDLER_STRING) { 5490 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSTR); 5491 } else if (f && upb_fielddef_isseq(f) && 5492 type != UPB_HANDLER_STARTSEQ && 5493 type != UPB_HANDLER_ENDSEQ) { 5494 context_closure_type = returntype(h, f, UPB_HANDLER_STARTSEQ); 5495 } else { 5496 context_closure_type = &h->top_closure_type; 5497 } 5498 5499 if (closure_type && *context_closure_type && 5500 closure_type != *context_closure_type) { 5501 return false; 5502 } 5503 5504 if (closure_type) 5505 *context_closure_type = closure_type; 5506 5507 /* If this is a STARTSEQ or STARTSTR handler, check that the returned pointer 5508 * matches any pre-existing expectations about what type is expected. */ 5509 if (type == UPB_HANDLER_STARTSEQ || type == UPB_HANDLER_STARTSTR) { 5510 const void *return_type = set_attr.return_closure_type; 5511 const void *table_return_type = h->table[sel].attr.return_closure_type; 5512 if (return_type && table_return_type && return_type != table_return_type) { 5513 return false; 5514 } 5515 5516 if (table_return_type && !return_type) { 5517 set_attr.return_closure_type = table_return_type; 5518 } 5519 } 5520 5521 h->table[sel].func = (upb_func*)func; 5522 h->table[sel].attr = set_attr; 5523 return true; 5524} 5525 5526/* Returns the effective closure type for this handler (which will propagate 5527 * from outer frames if this frame has no START* handler). Not implemented for 5528 * UPB_HANDLER_STRING at the moment since this is not needed. Returns NULL is 5529 * the effective closure type is unspecified (either no handler was registered 5530 * to specify it or the handler that was registered did not specify the closure 5531 * type). */ 5532const void *effective_closure_type(upb_handlers *h, const upb_fielddef *f, 5533 upb_handlertype_t type) { 5534 const void *ret; 5535 upb_selector_t sel; 5536 5537 UPB_ASSERT(type != UPB_HANDLER_STRING); 5538 ret = h->top_closure_type; 5539 5540 if (upb_fielddef_isseq(f) && 5541 type != UPB_HANDLER_STARTSEQ && 5542 type != UPB_HANDLER_ENDSEQ && 5543 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSEQ)].func) { 5544 ret = h->table[sel].attr.return_closure_type; 5545 } 5546 5547 if (type == UPB_HANDLER_STRING && 5548 h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSTR)].func) { 5549 ret = h->table[sel].attr.return_closure_type; 5550 } 5551 5552 /* The effective type of the submessage; not used yet. 5553 * if (type == SUBMESSAGE && 5554 * h->table[sel = handlers_getsel(h, f, UPB_HANDLER_STARTSUBMSG)].func) { 5555 * ret = h->table[sel].attr.return_closure_type; 5556 * } */ 5557 5558 return ret; 5559} 5560 5561/* Checks whether the START* handler specified by f & type is missing even 5562 * though it is required to convert the established type of an outer frame 5563 * ("closure_type") into the established type of an inner frame (represented in 5564 * the return closure type of this handler's attr. */ 5565bool checkstart(upb_handlers *h, const upb_fielddef *f, upb_handlertype_t type, 5566 upb_status *status) { 5567 const void *closure_type; 5568 const upb_handlerattr *attr; 5569 const void *return_closure_type; 5570 5571 upb_selector_t sel = handlers_getsel(h, f, type); 5572 if (h->table[sel].func) return true; 5573 closure_type = effective_closure_type(h, f, type); 5574 attr = &h->table[sel].attr; 5575 return_closure_type = attr->return_closure_type; 5576 if (closure_type && return_closure_type && 5577 closure_type != return_closure_type) { 5578 return false; 5579 } 5580 return true; 5581} 5582 5583static upb_handlers *upb_handlers_new(const upb_msgdef *md, 5584 upb_handlercache *cache, 5585 upb_arena *arena) { 5586 int extra; 5587 upb_handlers *h; 5588 5589 extra = 5590 (int)(sizeof(upb_handlers_tabent) * (upb_msgdef_selectorcount(md) - 1)); 5591 h = upb_calloc(arena, sizeof(*h) + extra); 5592 if (!h) return NULL; 5593 5594 h->cache = cache; 5595 h->msg = md; 5596 5597 if (upb_msgdef_submsgfieldcount(md) > 0) { 5598 size_t bytes = upb_msgdef_submsgfieldcount(md) * sizeof(*h->sub); 5599 h->sub = upb_calloc(arena, bytes); 5600 if (!h->sub) return NULL; 5601 } else { 5602 h->sub = 0; 5603 } 5604 5605 /* calloc() above initialized all handlers to NULL. */ 5606 return h; 5607} 5608 5609/* Public interface ***********************************************************/ 5610 5611#define SETTER(name, handlerctype, handlertype) \ 5612 bool upb_handlers_set##name(upb_handlers *h, const upb_fielddef *f, \ 5613 handlerctype func, \ 5614 const upb_handlerattr *attr) { \ 5615 int32_t sel = trygetsel(h, f, handlertype); \ 5616 return doset(h, sel, f, handlertype, (upb_func *)func, attr); \ 5617 } 5618 5619SETTER(int32, upb_int32_handlerfunc*, UPB_HANDLER_INT32) 5620SETTER(int64, upb_int64_handlerfunc*, UPB_HANDLER_INT64) 5621SETTER(uint32, upb_uint32_handlerfunc*, UPB_HANDLER_UINT32) 5622SETTER(uint64, upb_uint64_handlerfunc*, UPB_HANDLER_UINT64) 5623SETTER(float, upb_float_handlerfunc*, UPB_HANDLER_FLOAT) 5624SETTER(double, upb_double_handlerfunc*, UPB_HANDLER_DOUBLE) 5625SETTER(bool, upb_bool_handlerfunc*, UPB_HANDLER_BOOL) 5626SETTER(startstr, upb_startstr_handlerfunc*, UPB_HANDLER_STARTSTR) 5627SETTER(string, upb_string_handlerfunc*, UPB_HANDLER_STRING) 5628SETTER(endstr, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSTR) 5629SETTER(startseq, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSEQ) 5630SETTER(startsubmsg, upb_startfield_handlerfunc*, UPB_HANDLER_STARTSUBMSG) 5631SETTER(endsubmsg, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSUBMSG) 5632SETTER(endseq, upb_endfield_handlerfunc*, UPB_HANDLER_ENDSEQ) 5633 5634#undef SETTER 5635 5636bool upb_handlers_setunknown(upb_handlers *h, upb_unknown_handlerfunc *func, 5637 const upb_handlerattr *attr) { 5638 return doset(h, UPB_UNKNOWN_SELECTOR, NULL, UPB_HANDLER_INT32, 5639 (upb_func *)func, attr); 5640} 5641 5642bool upb_handlers_setstartmsg(upb_handlers *h, upb_startmsg_handlerfunc *func, 5643 const upb_handlerattr *attr) { 5644 return doset(h, UPB_STARTMSG_SELECTOR, NULL, UPB_HANDLER_INT32, 5645 (upb_func *)func, attr); 5646} 5647 5648bool upb_handlers_setendmsg(upb_handlers *h, upb_endmsg_handlerfunc *func, 5649 const upb_handlerattr *attr) { 5650 return doset(h, UPB_ENDMSG_SELECTOR, NULL, UPB_HANDLER_INT32, 5651 (upb_func *)func, attr); 5652} 5653 5654bool upb_handlers_setsubhandlers(upb_handlers *h, const upb_fielddef *f, 5655 const upb_handlers *sub) { 5656 UPB_ASSERT(sub); 5657 UPB_ASSERT(upb_fielddef_issubmsg(f)); 5658 if (SUBH_F(h, f)) return false; /* Can't reset. */ 5659 if (upb_handlers_msgdef(sub) != upb_fielddef_msgsubdef(f)) { 5660 return false; 5661 } 5662 SUBH_F(h, f) = sub; 5663 return true; 5664} 5665 5666const upb_handlers *upb_handlers_getsubhandlers(const upb_handlers *h, 5667 const upb_fielddef *f) { 5668 UPB_ASSERT(upb_fielddef_issubmsg(f)); 5669 return SUBH_F(h, f); 5670} 5671 5672upb_func *upb_handlers_gethandler(const upb_handlers *h, upb_selector_t s, 5673 const void **handler_data) { 5674 upb_func *ret = (upb_func *)h->table[s].func; 5675 if (ret && handler_data) { 5676 *handler_data = h->table[s].attr.handler_data; 5677 } 5678 return ret; 5679} 5680 5681bool upb_handlers_getattr(const upb_handlers *h, upb_selector_t sel, 5682 upb_handlerattr *attr) { 5683 if (!upb_handlers_gethandler(h, sel, NULL)) 5684 return false; 5685 *attr = h->table[sel].attr; 5686 return true; 5687} 5688 5689const upb_handlers *upb_handlers_getsubhandlers_sel(const upb_handlers *h, 5690 upb_selector_t sel) { 5691 /* STARTSUBMSG selector in sel is the field's selector base. */ 5692 return SUBH(h, sel - UPB_STATIC_SELECTOR_COUNT); 5693} 5694 5695const upb_msgdef *upb_handlers_msgdef(const upb_handlers *h) { return h->msg; } 5696 5697bool upb_handlers_addcleanup(upb_handlers *h, void *p, upb_handlerfree *func) { 5698 return upb_handlercache_addcleanup(h->cache, p, func); 5699} 5700 5701upb_handlertype_t upb_handlers_getprimitivehandlertype(const upb_fielddef *f) { 5702 switch (upb_fielddef_type(f)) { 5703 case UPB_TYPE_INT32: 5704 case UPB_TYPE_ENUM: return UPB_HANDLER_INT32; 5705 case UPB_TYPE_INT64: return UPB_HANDLER_INT64; 5706 case UPB_TYPE_UINT32: return UPB_HANDLER_UINT32; 5707 case UPB_TYPE_UINT64: return UPB_HANDLER_UINT64; 5708 case UPB_TYPE_FLOAT: return UPB_HANDLER_FLOAT; 5709 case UPB_TYPE_DOUBLE: return UPB_HANDLER_DOUBLE; 5710 case UPB_TYPE_BOOL: return UPB_HANDLER_BOOL; 5711 default: UPB_ASSERT(false); return -1; /* Invalid input. */ 5712 } 5713} 5714 5715bool upb_handlers_getselector(const upb_fielddef *f, upb_handlertype_t type, 5716 upb_selector_t *s) { 5717 uint32_t selector_base = upb_fielddef_selectorbase(f); 5718 switch (type) { 5719 case UPB_HANDLER_INT32: 5720 case UPB_HANDLER_INT64: 5721 case UPB_HANDLER_UINT32: 5722 case UPB_HANDLER_UINT64: 5723 case UPB_HANDLER_FLOAT: 5724 case UPB_HANDLER_DOUBLE: 5725 case UPB_HANDLER_BOOL: 5726 if (!upb_fielddef_isprimitive(f) || 5727 upb_handlers_getprimitivehandlertype(f) != type) 5728 return false; 5729 *s = selector_base; 5730 break; 5731 case UPB_HANDLER_STRING: 5732 if (upb_fielddef_isstring(f)) { 5733 *s = selector_base; 5734 } else if (upb_fielddef_lazy(f)) { 5735 *s = selector_base + 3; 5736 } else { 5737 return false; 5738 } 5739 break; 5740 case UPB_HANDLER_STARTSTR: 5741 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { 5742 *s = selector_base + 1; 5743 } else { 5744 return false; 5745 } 5746 break; 5747 case UPB_HANDLER_ENDSTR: 5748 if (upb_fielddef_isstring(f) || upb_fielddef_lazy(f)) { 5749 *s = selector_base + 2; 5750 } else { 5751 return false; 5752 } 5753 break; 5754 case UPB_HANDLER_STARTSEQ: 5755 if (!upb_fielddef_isseq(f)) return false; 5756 *s = selector_base - 2; 5757 break; 5758 case UPB_HANDLER_ENDSEQ: 5759 if (!upb_fielddef_isseq(f)) return false; 5760 *s = selector_base - 1; 5761 break; 5762 case UPB_HANDLER_STARTSUBMSG: 5763 if (!upb_fielddef_issubmsg(f)) return false; 5764 /* Selectors for STARTSUBMSG are at the beginning of the table so that the 5765 * selector can also be used as an index into the "sub" array of 5766 * subhandlers. The indexes for the two into these two tables are the 5767 * same, except that in the handler table the static selectors come first. */ 5768 *s = upb_fielddef_index(f) + UPB_STATIC_SELECTOR_COUNT; 5769 break; 5770 case UPB_HANDLER_ENDSUBMSG: 5771 if (!upb_fielddef_issubmsg(f)) return false; 5772 *s = selector_base; 5773 break; 5774 } 5775 UPB_ASSERT((size_t)*s < upb_msgdef_selectorcount(upb_fielddef_containingtype(f))); 5776 return true; 5777} 5778 5779/* upb_handlercache ***********************************************************/ 5780 5781struct upb_handlercache { 5782 upb_arena *arena; 5783 upb_inttable tab; /* maps upb_msgdef* -> upb_handlers*. */ 5784 upb_handlers_callback *callback; 5785 const void *closure; 5786}; 5787 5788const upb_handlers *upb_handlercache_get(upb_handlercache *c, 5789 const upb_msgdef *md) { 5790 upb_msg_field_iter i; 5791 upb_value v; 5792 upb_handlers *h; 5793 5794 if (upb_inttable_lookupptr(&c->tab, md, &v)) { 5795 return upb_value_getptr(v); 5796 } 5797 5798 h = upb_handlers_new(md, c, c->arena); 5799 v = upb_value_ptr(h); 5800 5801 if (!h) return NULL; 5802 if (!upb_inttable_insertptr(&c->tab, md, v)) return NULL; 5803 5804 c->callback(c->closure, h); 5805 5806 /* For each submessage field, get or create a handlers object and set it as 5807 * the subhandlers. */ 5808 for(upb_msg_field_begin(&i, md); 5809 !upb_msg_field_done(&i); 5810 upb_msg_field_next(&i)) { 5811 upb_fielddef *f = upb_msg_iter_field(&i); 5812 5813 if (upb_fielddef_issubmsg(f)) { 5814 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); 5815 const upb_handlers *sub_mh = upb_handlercache_get(c, subdef); 5816 5817 if (!sub_mh) return NULL; 5818 5819 upb_handlers_setsubhandlers(h, f, sub_mh); 5820 } 5821 } 5822 5823 return h; 5824} 5825 5826 5827upb_handlercache *upb_handlercache_new(upb_handlers_callback *callback, 5828 const void *closure) { 5829 upb_handlercache *cache = upb_gmalloc(sizeof(*cache)); 5830 5831 if (!cache) return NULL; 5832 5833 cache->arena = upb_arena_new(); 5834 5835 cache->callback = callback; 5836 cache->closure = closure; 5837 5838 if (!upb_inttable_init(&cache->tab, UPB_CTYPE_PTR)) goto oom; 5839 5840 return cache; 5841 5842oom: 5843 upb_gfree(cache); 5844 return NULL; 5845} 5846 5847void upb_handlercache_free(upb_handlercache *cache) { 5848 upb_inttable_uninit(&cache->tab); 5849 upb_arena_free(cache->arena); 5850 upb_gfree(cache); 5851} 5852 5853bool upb_handlercache_addcleanup(upb_handlercache *c, void *p, 5854 upb_handlerfree *func) { 5855 return upb_arena_addcleanup(c->arena, p, func); 5856} 5857 5858/* upb_byteshandler ***********************************************************/ 5859 5860bool upb_byteshandler_setstartstr(upb_byteshandler *h, 5861 upb_startstr_handlerfunc *func, void *d) { 5862 h->table[UPB_STARTSTR_SELECTOR].func = (upb_func*)func; 5863 h->table[UPB_STARTSTR_SELECTOR].attr.handler_data = d; 5864 return true; 5865} 5866 5867bool upb_byteshandler_setstring(upb_byteshandler *h, 5868 upb_string_handlerfunc *func, void *d) { 5869 h->table[UPB_STRING_SELECTOR].func = (upb_func*)func; 5870 h->table[UPB_STRING_SELECTOR].attr.handler_data = d; 5871 return true; 5872} 5873 5874bool upb_byteshandler_setendstr(upb_byteshandler *h, 5875 upb_endfield_handlerfunc *func, void *d) { 5876 h->table[UPB_ENDSTR_SELECTOR].func = (upb_func*)func; 5877 h->table[UPB_ENDSTR_SELECTOR].attr.handler_data = d; 5878 return true; 5879} 5880 5881/** Handlers for upb_msg ******************************************************/ 5882 5883typedef struct { 5884 size_t offset; 5885 int32_t hasbit; 5886} upb_msg_handlerdata; 5887 5888/* Fallback implementation if the handler is not specialized by the producer. */ 5889#define MSG_WRITER(type, ctype) \ 5890 bool upb_msg_set ## type (void *c, const void *hd, ctype val) { \ 5891 uint8_t *m = c; \ 5892 const upb_msg_handlerdata *d = hd; \ 5893 if (d->hasbit > 0) \ 5894 *(uint8_t*)&m[d->hasbit / 8] |= 1 << (d->hasbit % 8); \ 5895 *(ctype*)&m[d->offset] = val; \ 5896 return true; \ 5897 } \ 5898 5899MSG_WRITER(double, double) 5900MSG_WRITER(float, float) 5901MSG_WRITER(int32, int32_t) 5902MSG_WRITER(int64, int64_t) 5903MSG_WRITER(uint32, uint32_t) 5904MSG_WRITER(uint64, uint64_t) 5905MSG_WRITER(bool, bool) 5906 5907bool upb_msg_setscalarhandler(upb_handlers *h, const upb_fielddef *f, 5908 size_t offset, int32_t hasbit) { 5909 upb_handlerattr attr = UPB_HANDLERATTR_INIT; 5910 bool ok; 5911 5912 upb_msg_handlerdata *d = upb_gmalloc(sizeof(*d)); 5913 if (!d) return false; 5914 d->offset = offset; 5915 d->hasbit = hasbit; 5916 5917 attr.handler_data = d; 5918 attr.alwaysok = true; 5919 upb_handlers_addcleanup(h, d, upb_gfree); 5920 5921#define TYPE(u, l) \ 5922 case UPB_TYPE_##u: \ 5923 ok = upb_handlers_set##l(h, f, upb_msg_set##l, &attr); break; 5924 5925 ok = false; 5926 5927 switch (upb_fielddef_type(f)) { 5928 TYPE(INT64, int64); 5929 TYPE(INT32, int32); 5930 TYPE(ENUM, int32); 5931 TYPE(UINT64, uint64); 5932 TYPE(UINT32, uint32); 5933 TYPE(DOUBLE, double); 5934 TYPE(FLOAT, float); 5935 TYPE(BOOL, bool); 5936 default: UPB_ASSERT(false); break; 5937 } 5938#undef TYPE 5939 5940 return ok; 5941} 5942 5943bool upb_msg_getscalarhandlerdata(const upb_handlers *h, 5944 upb_selector_t s, 5945 upb_fieldtype_t *type, 5946 size_t *offset, 5947 int32_t *hasbit) { 5948 const upb_msg_handlerdata *d; 5949 const void *p; 5950 upb_func *f = upb_handlers_gethandler(h, s, &p); 5951 5952 if ((upb_int64_handlerfunc*)f == upb_msg_setint64) { 5953 *type = UPB_TYPE_INT64; 5954 } else if ((upb_int32_handlerfunc*)f == upb_msg_setint32) { 5955 *type = UPB_TYPE_INT32; 5956 } else if ((upb_uint64_handlerfunc*)f == upb_msg_setuint64) { 5957 *type = UPB_TYPE_UINT64; 5958 } else if ((upb_uint32_handlerfunc*)f == upb_msg_setuint32) { 5959 *type = UPB_TYPE_UINT32; 5960 } else if ((upb_double_handlerfunc*)f == upb_msg_setdouble) { 5961 *type = UPB_TYPE_DOUBLE; 5962 } else if ((upb_float_handlerfunc*)f == upb_msg_setfloat) { 5963 *type = UPB_TYPE_FLOAT; 5964 } else if ((upb_bool_handlerfunc*)f == upb_msg_setbool) { 5965 *type = UPB_TYPE_BOOL; 5966 } else { 5967 return false; 5968 } 5969 5970 d = p; 5971 *offset = d->offset; 5972 *hasbit = d->hasbit; 5973 return true; 5974} 5975 5976 5977bool upb_bufsrc_putbuf(const char *buf, size_t len, upb_bytessink sink) { 5978 void *subc; 5979 bool ret; 5980 upb_bufhandle handle = UPB_BUFHANDLE_INIT; 5981 handle.buf = buf; 5982 ret = upb_bytessink_start(sink, len, &subc); 5983 if (ret && len != 0) { 5984 ret = (upb_bytessink_putbuf(sink, subc, buf, len, &handle) >= len); 5985 } 5986 if (ret) { 5987 ret = upb_bytessink_end(sink); 5988 } 5989 return ret; 5990} 5991 5992 5993#ifdef UPB_MSVC_VSNPRINTF 5994/* Visual C++ earlier than 2015 doesn't have standard C99 snprintf and 5995 * vsnprintf. To support them, missing functions are manually implemented 5996 * using the existing secure functions. */ 5997int msvc_vsnprintf(char* s, size_t n, const char* format, va_list arg) { 5998 if (!s) { 5999 return _vscprintf(format, arg); 6000 } 6001 int ret = _vsnprintf_s(s, n, _TRUNCATE, format, arg); 6002 if (ret < 0) { 6003 ret = _vscprintf(format, arg); 6004 } 6005 return ret; 6006} 6007 6008int msvc_snprintf(char* s, size_t n, const char* format, ...) { 6009 va_list arg; 6010 va_start(arg, format); 6011 int ret = msvc_vsnprintf(s, n, format, arg); 6012 va_end(arg); 6013 return ret; 6014} 6015#endif 6016/* 6017** protobuf decoder bytecode compiler 6018** 6019** Code to compile a upb::Handlers into bytecode for decoding a protobuf 6020** according to that specific schema and destination handlers. 6021** 6022** Bytecode definition is in decoder.int.h. 6023*/ 6024 6025#include <stdarg.h> 6026 6027#ifdef UPB_DUMP_BYTECODE 6028#include <stdio.h> 6029#endif 6030 6031 6032#define MAXLABEL 5 6033#define EMPTYLABEL -1 6034 6035/* upb_pbdecodermethod ********************************************************/ 6036 6037static void freemethod(upb_pbdecodermethod *method) { 6038 upb_inttable_uninit(&method->dispatch); 6039 upb_gfree(method); 6040} 6041 6042static upb_pbdecodermethod *newmethod(const upb_handlers *dest_handlers, 6043 mgroup *group) { 6044 upb_pbdecodermethod *ret = upb_gmalloc(sizeof(*ret)); 6045 upb_byteshandler_init(&ret->input_handler_); 6046 6047 ret->group = group; 6048 ret->dest_handlers_ = dest_handlers; 6049 upb_inttable_init(&ret->dispatch, UPB_CTYPE_UINT64); 6050 6051 return ret; 6052} 6053 6054const upb_handlers *upb_pbdecodermethod_desthandlers( 6055 const upb_pbdecodermethod *m) { 6056 return m->dest_handlers_; 6057} 6058 6059const upb_byteshandler *upb_pbdecodermethod_inputhandler( 6060 const upb_pbdecodermethod *m) { 6061 return &m->input_handler_; 6062} 6063 6064bool upb_pbdecodermethod_isnative(const upb_pbdecodermethod *m) { 6065 return m->is_native_; 6066} 6067 6068 6069/* mgroup *********************************************************************/ 6070 6071static void freegroup(mgroup *g) { 6072 upb_inttable_iter i; 6073 6074 upb_inttable_begin(&i, &g->methods); 6075 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 6076 freemethod(upb_value_getptr(upb_inttable_iter_value(&i))); 6077 } 6078 6079 upb_inttable_uninit(&g->methods); 6080 upb_gfree(g->bytecode); 6081 upb_gfree(g); 6082} 6083 6084mgroup *newgroup(void) { 6085 mgroup *g = upb_gmalloc(sizeof(*g)); 6086 upb_inttable_init(&g->methods, UPB_CTYPE_PTR); 6087 g->bytecode = NULL; 6088 g->bytecode_end = NULL; 6089 return g; 6090} 6091 6092 6093/* bytecode compiler **********************************************************/ 6094 6095/* Data used only at compilation time. */ 6096typedef struct { 6097 mgroup *group; 6098 6099 uint32_t *pc; 6100 int fwd_labels[MAXLABEL]; 6101 int back_labels[MAXLABEL]; 6102 6103 /* For fields marked "lazy", parse them lazily or eagerly? */ 6104 bool lazy; 6105} compiler; 6106 6107static compiler *newcompiler(mgroup *group, bool lazy) { 6108 compiler *ret = upb_gmalloc(sizeof(*ret)); 6109 int i; 6110 6111 ret->group = group; 6112 ret->lazy = lazy; 6113 for (i = 0; i < MAXLABEL; i++) { 6114 ret->fwd_labels[i] = EMPTYLABEL; 6115 ret->back_labels[i] = EMPTYLABEL; 6116 } 6117 return ret; 6118} 6119 6120static void freecompiler(compiler *c) { 6121 upb_gfree(c); 6122} 6123 6124const size_t ptr_words = sizeof(void*) / sizeof(uint32_t); 6125 6126/* How many words an instruction is. */ 6127static int instruction_len(uint32_t instr) { 6128 switch (getop(instr)) { 6129 case OP_SETDISPATCH: return 1 + ptr_words; 6130 case OP_TAGN: return 3; 6131 case OP_SETBIGGROUPNUM: return 2; 6132 default: return 1; 6133 } 6134} 6135 6136bool op_has_longofs(int32_t instruction) { 6137 switch (getop(instruction)) { 6138 case OP_CALL: 6139 case OP_BRANCH: 6140 case OP_CHECKDELIM: 6141 return true; 6142 /* The "tag" instructions only have 8 bytes available for the jump target, 6143 * but that is ok because these opcodes only require short jumps. */ 6144 case OP_TAG1: 6145 case OP_TAG2: 6146 case OP_TAGN: 6147 return false; 6148 default: 6149 UPB_ASSERT(false); 6150 return false; 6151 } 6152} 6153 6154static int32_t getofs(uint32_t instruction) { 6155 if (op_has_longofs(instruction)) { 6156 return (int32_t)instruction >> 8; 6157 } else { 6158 return (int8_t)(instruction >> 8); 6159 } 6160} 6161 6162static void setofs(uint32_t *instruction, int32_t ofs) { 6163 if (op_has_longofs(*instruction)) { 6164 *instruction = getop(*instruction) | (uint32_t)ofs << 8; 6165 } else { 6166 *instruction = (*instruction & ~0xff00) | ((ofs & 0xff) << 8); 6167 } 6168 UPB_ASSERT(getofs(*instruction) == ofs); /* Would fail in cases of overflow. */ 6169} 6170 6171static uint32_t pcofs(compiler *c) { 6172 return (uint32_t)(c->pc - c->group->bytecode); 6173} 6174 6175/* Defines a local label at the current PC location. All previous forward 6176 * references are updated to point to this location. The location is noted 6177 * for any future backward references. */ 6178static void label(compiler *c, unsigned int label) { 6179 int val; 6180 uint32_t *codep; 6181 6182 UPB_ASSERT(label < MAXLABEL); 6183 val = c->fwd_labels[label]; 6184 codep = (val == EMPTYLABEL) ? NULL : c->group->bytecode + val; 6185 while (codep) { 6186 int ofs = getofs(*codep); 6187 setofs(codep, (int32_t)(c->pc - codep - instruction_len(*codep))); 6188 codep = ofs ? codep + ofs : NULL; 6189 } 6190 c->fwd_labels[label] = EMPTYLABEL; 6191 c->back_labels[label] = pcofs(c); 6192} 6193 6194/* Creates a reference to a numbered label; either a forward reference 6195 * (positive arg) or backward reference (negative arg). For forward references 6196 * the value returned now is actually a "next" pointer into a linked list of all 6197 * instructions that use this label and will be patched later when the label is 6198 * defined with label(). 6199 * 6200 * The returned value is the offset that should be written into the instruction. 6201 */ 6202static int32_t labelref(compiler *c, int label) { 6203 UPB_ASSERT(label < MAXLABEL); 6204 if (label == LABEL_DISPATCH) { 6205 /* No resolving required. */ 6206 return 0; 6207 } else if (label < 0) { 6208 /* Backward local label. Relative to the next instruction. */ 6209 uint32_t from = (uint32_t)((c->pc + 1) - c->group->bytecode); 6210 return c->back_labels[-label] - from; 6211 } else { 6212 /* Forward local label: prepend to (possibly-empty) linked list. */ 6213 int *lptr = &c->fwd_labels[label]; 6214 int32_t ret = (*lptr == EMPTYLABEL) ? 0 : *lptr - pcofs(c); 6215 *lptr = pcofs(c); 6216 return ret; 6217 } 6218} 6219 6220static void put32(compiler *c, uint32_t v) { 6221 mgroup *g = c->group; 6222 if (c->pc == g->bytecode_end) { 6223 int ofs = pcofs(c); 6224 size_t oldsize = g->bytecode_end - g->bytecode; 6225 size_t newsize = UPB_MAX(oldsize * 2, 64); 6226 /* TODO(haberman): handle OOM. */ 6227 g->bytecode = upb_grealloc(g->bytecode, oldsize * sizeof(uint32_t), 6228 newsize * sizeof(uint32_t)); 6229 g->bytecode_end = g->bytecode + newsize; 6230 c->pc = g->bytecode + ofs; 6231 } 6232 *c->pc++ = v; 6233} 6234 6235static void putop(compiler *c, int op, ...) { 6236 va_list ap; 6237 va_start(ap, op); 6238 6239 switch (op) { 6240 case OP_SETDISPATCH: { 6241 uintptr_t ptr = (uintptr_t)va_arg(ap, void*); 6242 put32(c, OP_SETDISPATCH); 6243 put32(c, (uint32_t)ptr); 6244 if (sizeof(uintptr_t) > sizeof(uint32_t)) 6245 put32(c, (uint64_t)ptr >> 32); 6246 break; 6247 } 6248 case OP_STARTMSG: 6249 case OP_ENDMSG: 6250 case OP_PUSHLENDELIM: 6251 case OP_POP: 6252 case OP_SETDELIM: 6253 case OP_HALT: 6254 case OP_RET: 6255 case OP_DISPATCH: 6256 put32(c, op); 6257 break; 6258 case OP_PARSE_DOUBLE: 6259 case OP_PARSE_FLOAT: 6260 case OP_PARSE_INT64: 6261 case OP_PARSE_UINT64: 6262 case OP_PARSE_INT32: 6263 case OP_PARSE_FIXED64: 6264 case OP_PARSE_FIXED32: 6265 case OP_PARSE_BOOL: 6266 case OP_PARSE_UINT32: 6267 case OP_PARSE_SFIXED32: 6268 case OP_PARSE_SFIXED64: 6269 case OP_PARSE_SINT32: 6270 case OP_PARSE_SINT64: 6271 case OP_STARTSEQ: 6272 case OP_ENDSEQ: 6273 case OP_STARTSUBMSG: 6274 case OP_ENDSUBMSG: 6275 case OP_STARTSTR: 6276 case OP_STRING: 6277 case OP_ENDSTR: 6278 case OP_PUSHTAGDELIM: 6279 put32(c, op | va_arg(ap, upb_selector_t) << 8); 6280 break; 6281 case OP_SETBIGGROUPNUM: 6282 put32(c, op); 6283 put32(c, va_arg(ap, int)); 6284 break; 6285 case OP_CALL: { 6286 const upb_pbdecodermethod *method = va_arg(ap, upb_pbdecodermethod *); 6287 put32(c, op | (method->code_base.ofs - (pcofs(c) + 1)) << 8); 6288 break; 6289 } 6290 case OP_CHECKDELIM: 6291 case OP_BRANCH: { 6292 uint32_t instruction = op; 6293 int label = va_arg(ap, int); 6294 setofs(&instruction, labelref(c, label)); 6295 put32(c, instruction); 6296 break; 6297 } 6298 case OP_TAG1: 6299 case OP_TAG2: { 6300 int label = va_arg(ap, int); 6301 uint64_t tag = va_arg(ap, uint64_t); 6302 uint32_t instruction = (uint32_t)(op | (tag << 16)); 6303 UPB_ASSERT(tag <= 0xffff); 6304 setofs(&instruction, labelref(c, label)); 6305 put32(c, instruction); 6306 break; 6307 } 6308 case OP_TAGN: { 6309 int label = va_arg(ap, int); 6310 uint64_t tag = va_arg(ap, uint64_t); 6311 uint32_t instruction = op | (upb_value_size(tag) << 16); 6312 setofs(&instruction, labelref(c, label)); 6313 put32(c, instruction); 6314 put32(c, (uint32_t)tag); 6315 put32(c, tag >> 32); 6316 break; 6317 } 6318 } 6319 6320 va_end(ap); 6321} 6322 6323#if defined(UPB_DUMP_BYTECODE) 6324 6325const char *upb_pbdecoder_getopname(unsigned int op) { 6326#define QUOTE(x) #x 6327#define EXPAND_AND_QUOTE(x) QUOTE(x) 6328#define OPNAME(x) OP_##x 6329#define OP(x) case OPNAME(x): return EXPAND_AND_QUOTE(OPNAME(x)); 6330#define T(x) OP(PARSE_##x) 6331 /* Keep in sync with list in decoder.int.h. */ 6332 switch ((opcode)op) { 6333 T(DOUBLE) T(FLOAT) T(INT64) T(UINT64) T(INT32) T(FIXED64) T(FIXED32) 6334 T(BOOL) T(UINT32) T(SFIXED32) T(SFIXED64) T(SINT32) T(SINT64) 6335 OP(STARTMSG) OP(ENDMSG) OP(STARTSEQ) OP(ENDSEQ) OP(STARTSUBMSG) 6336 OP(ENDSUBMSG) OP(STARTSTR) OP(STRING) OP(ENDSTR) OP(CALL) OP(RET) 6337 OP(PUSHLENDELIM) OP(PUSHTAGDELIM) OP(SETDELIM) OP(CHECKDELIM) 6338 OP(BRANCH) OP(TAG1) OP(TAG2) OP(TAGN) OP(SETDISPATCH) OP(POP) 6339 OP(SETBIGGROUPNUM) OP(DISPATCH) OP(HALT) 6340 } 6341 return "<unknown op>"; 6342#undef OP 6343#undef T 6344} 6345 6346#endif 6347 6348#ifdef UPB_DUMP_BYTECODE 6349 6350static void dumpbc(uint32_t *p, uint32_t *end, FILE *f) { 6351 6352 uint32_t *begin = p; 6353 6354 while (p < end) { 6355 fprintf(f, "%p %8tx", p, p - begin); 6356 uint32_t instr = *p++; 6357 uint8_t op = getop(instr); 6358 fprintf(f, " %s", upb_pbdecoder_getopname(op)); 6359 switch ((opcode)op) { 6360 case OP_SETDISPATCH: { 6361 const upb_inttable *dispatch; 6362 memcpy(&dispatch, p, sizeof(void*)); 6363 p += ptr_words; 6364 const upb_pbdecodermethod *method = 6365 (void *)((char *)dispatch - 6366 offsetof(upb_pbdecodermethod, dispatch)); 6367 fprintf(f, " %s", upb_msgdef_fullname( 6368 upb_handlers_msgdef(method->dest_handlers_))); 6369 break; 6370 } 6371 case OP_DISPATCH: 6372 case OP_STARTMSG: 6373 case OP_ENDMSG: 6374 case OP_PUSHLENDELIM: 6375 case OP_POP: 6376 case OP_SETDELIM: 6377 case OP_HALT: 6378 case OP_RET: 6379 break; 6380 case OP_PARSE_DOUBLE: 6381 case OP_PARSE_FLOAT: 6382 case OP_PARSE_INT64: 6383 case OP_PARSE_UINT64: 6384 case OP_PARSE_INT32: 6385 case OP_PARSE_FIXED64: 6386 case OP_PARSE_FIXED32: 6387 case OP_PARSE_BOOL: 6388 case OP_PARSE_UINT32: 6389 case OP_PARSE_SFIXED32: 6390 case OP_PARSE_SFIXED64: 6391 case OP_PARSE_SINT32: 6392 case OP_PARSE_SINT64: 6393 case OP_STARTSEQ: 6394 case OP_ENDSEQ: 6395 case OP_STARTSUBMSG: 6396 case OP_ENDSUBMSG: 6397 case OP_STARTSTR: 6398 case OP_STRING: 6399 case OP_ENDSTR: 6400 case OP_PUSHTAGDELIM: 6401 fprintf(f, " %d", instr >> 8); 6402 break; 6403 case OP_SETBIGGROUPNUM: 6404 fprintf(f, " %d", *p++); 6405 break; 6406 case OP_CHECKDELIM: 6407 case OP_CALL: 6408 case OP_BRANCH: 6409 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6410 break; 6411 case OP_TAG1: 6412 case OP_TAG2: { 6413 fprintf(f, " tag:0x%x", instr >> 16); 6414 if (getofs(instr)) { 6415 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6416 } 6417 break; 6418 } 6419 case OP_TAGN: { 6420 uint64_t tag = *p++; 6421 tag |= (uint64_t)*p++ << 32; 6422 fprintf(f, " tag:0x%llx", (long long)tag); 6423 fprintf(f, " n:%d", instr >> 16); 6424 if (getofs(instr)) { 6425 fprintf(f, " =>0x%tx", p + getofs(instr) - begin); 6426 } 6427 break; 6428 } 6429 } 6430 fputs("\n", f); 6431 } 6432} 6433 6434#endif 6435 6436static uint64_t get_encoded_tag(const upb_fielddef *f, int wire_type) { 6437 uint32_t tag = (upb_fielddef_number(f) << 3) | wire_type; 6438 uint64_t encoded_tag = upb_vencode32(tag); 6439 /* No tag should be greater than 5 bytes. */ 6440 UPB_ASSERT(encoded_tag <= 0xffffffffff); 6441 return encoded_tag; 6442} 6443 6444static void putchecktag(compiler *c, const upb_fielddef *f, 6445 int wire_type, int dest) { 6446 uint64_t tag = get_encoded_tag(f, wire_type); 6447 switch (upb_value_size(tag)) { 6448 case 1: 6449 putop(c, OP_TAG1, dest, tag); 6450 break; 6451 case 2: 6452 putop(c, OP_TAG2, dest, tag); 6453 break; 6454 default: 6455 putop(c, OP_TAGN, dest, tag); 6456 break; 6457 } 6458} 6459 6460static upb_selector_t getsel(const upb_fielddef *f, upb_handlertype_t type) { 6461 upb_selector_t selector; 6462 bool ok = upb_handlers_getselector(f, type, &selector); 6463 UPB_ASSERT(ok); 6464 return selector; 6465} 6466 6467/* Takes an existing, primary dispatch table entry and repacks it with a 6468 * different alternate wire type. Called when we are inserting a secondary 6469 * dispatch table entry for an alternate wire type. */ 6470static uint64_t repack(uint64_t dispatch, int new_wt2) { 6471 uint64_t ofs; 6472 uint8_t wt1; 6473 uint8_t old_wt2; 6474 upb_pbdecoder_unpackdispatch(dispatch, &ofs, &wt1, &old_wt2); 6475 UPB_ASSERT(old_wt2 == NO_WIRE_TYPE); /* wt2 should not be set yet. */ 6476 return upb_pbdecoder_packdispatch(ofs, wt1, new_wt2); 6477} 6478 6479/* Marks the current bytecode position as the dispatch target for this message, 6480 * field, and wire type. */ 6481static void dispatchtarget(compiler *c, upb_pbdecodermethod *method, 6482 const upb_fielddef *f, int wire_type) { 6483 /* Offset is relative to msg base. */ 6484 uint64_t ofs = pcofs(c) - method->code_base.ofs; 6485 uint32_t fn = upb_fielddef_number(f); 6486 upb_inttable *d = &method->dispatch; 6487 upb_value v; 6488 if (upb_inttable_remove(d, fn, &v)) { 6489 /* TODO: prioritize based on packed setting in .proto file. */ 6490 uint64_t repacked = repack(upb_value_getuint64(v), wire_type); 6491 upb_inttable_insert(d, fn, upb_value_uint64(repacked)); 6492 upb_inttable_insert(d, fn + UPB_MAX_FIELDNUMBER, upb_value_uint64(ofs)); 6493 } else { 6494 uint64_t val = upb_pbdecoder_packdispatch(ofs, wire_type, NO_WIRE_TYPE); 6495 upb_inttable_insert(d, fn, upb_value_uint64(val)); 6496 } 6497} 6498 6499static void putpush(compiler *c, const upb_fielddef *f) { 6500 if (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) { 6501 putop(c, OP_PUSHLENDELIM); 6502 } else { 6503 uint32_t fn = upb_fielddef_number(f); 6504 if (fn >= 1 << 24) { 6505 putop(c, OP_PUSHTAGDELIM, 0); 6506 putop(c, OP_SETBIGGROUPNUM, fn); 6507 } else { 6508 putop(c, OP_PUSHTAGDELIM, fn); 6509 } 6510 } 6511} 6512 6513static upb_pbdecodermethod *find_submethod(const compiler *c, 6514 const upb_pbdecodermethod *method, 6515 const upb_fielddef *f) { 6516 const upb_handlers *sub = 6517 upb_handlers_getsubhandlers(method->dest_handlers_, f); 6518 upb_value v; 6519 return upb_inttable_lookupptr(&c->group->methods, sub, &v) 6520 ? upb_value_getptr(v) 6521 : NULL; 6522} 6523 6524static void putsel(compiler *c, opcode op, upb_selector_t sel, 6525 const upb_handlers *h) { 6526 if (upb_handlers_gethandler(h, sel, NULL)) { 6527 putop(c, op, sel); 6528 } 6529} 6530 6531/* Puts an opcode to call a callback, but only if a callback actually exists for 6532 * this field and handler type. */ 6533static void maybeput(compiler *c, opcode op, const upb_handlers *h, 6534 const upb_fielddef *f, upb_handlertype_t type) { 6535 putsel(c, op, getsel(f, type), h); 6536} 6537 6538static bool haslazyhandlers(const upb_handlers *h, const upb_fielddef *f) { 6539 if (!upb_fielddef_lazy(f)) 6540 return false; 6541 6542 return upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STARTSTR), NULL) || 6543 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_STRING), NULL) || 6544 upb_handlers_gethandler(h, getsel(f, UPB_HANDLER_ENDSTR), NULL); 6545} 6546 6547 6548/* bytecode compiler code generation ******************************************/ 6549 6550/* Symbolic names for our local labels. */ 6551#define LABEL_LOOPSTART 1 /* Top of a repeated field loop. */ 6552#define LABEL_LOOPBREAK 2 /* To jump out of a repeated loop */ 6553#define LABEL_FIELD 3 /* Jump backward to find the most recent field. */ 6554#define LABEL_ENDMSG 4 /* To reach the OP_ENDMSG instr for this msg. */ 6555 6556/* Generates bytecode to parse a single non-lazy message field. */ 6557static void generate_msgfield(compiler *c, const upb_fielddef *f, 6558 upb_pbdecodermethod *method) { 6559 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 6560 const upb_pbdecodermethod *sub_m = find_submethod(c, method, f); 6561 int wire_type; 6562 6563 if (!sub_m) { 6564 /* Don't emit any code for this field at all; it will be parsed as an 6565 * unknown field. 6566 * 6567 * TODO(haberman): we should change this to parse it as a string field 6568 * instead. It will probably be faster, but more importantly, once we 6569 * start vending unknown fields, a field shouldn't be treated as unknown 6570 * just because it doesn't have subhandlers registered. */ 6571 return; 6572 } 6573 6574 label(c, LABEL_FIELD); 6575 6576 wire_type = 6577 (upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_MESSAGE) 6578 ? UPB_WIRE_TYPE_DELIMITED 6579 : UPB_WIRE_TYPE_START_GROUP; 6580 6581 if (upb_fielddef_isseq(f)) { 6582 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6583 putchecktag(c, f, wire_type, LABEL_DISPATCH); 6584 dispatchtarget(c, method, f, wire_type); 6585 putop(c, OP_PUSHTAGDELIM, 0); 6586 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); 6587 label(c, LABEL_LOOPSTART); 6588 putpush(c, f); 6589 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); 6590 putop(c, OP_CALL, sub_m); 6591 putop(c, OP_POP); 6592 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); 6593 if (wire_type == UPB_WIRE_TYPE_DELIMITED) { 6594 putop(c, OP_SETDELIM); 6595 } 6596 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 6597 putchecktag(c, f, wire_type, LABEL_LOOPBREAK); 6598 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 6599 label(c, LABEL_LOOPBREAK); 6600 putop(c, OP_POP); 6601 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 6602 } else { 6603 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6604 putchecktag(c, f, wire_type, LABEL_DISPATCH); 6605 dispatchtarget(c, method, f, wire_type); 6606 putpush(c, f); 6607 putop(c, OP_STARTSUBMSG, getsel(f, UPB_HANDLER_STARTSUBMSG)); 6608 putop(c, OP_CALL, sub_m); 6609 putop(c, OP_POP); 6610 maybeput(c, OP_ENDSUBMSG, h, f, UPB_HANDLER_ENDSUBMSG); 6611 if (wire_type == UPB_WIRE_TYPE_DELIMITED) { 6612 putop(c, OP_SETDELIM); 6613 } 6614 } 6615} 6616 6617/* Generates bytecode to parse a single string or lazy submessage field. */ 6618static void generate_delimfield(compiler *c, const upb_fielddef *f, 6619 upb_pbdecodermethod *method) { 6620 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 6621 6622 label(c, LABEL_FIELD); 6623 if (upb_fielddef_isseq(f)) { 6624 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6625 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 6626 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 6627 putop(c, OP_PUSHTAGDELIM, 0); 6628 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); 6629 label(c, LABEL_LOOPSTART); 6630 putop(c, OP_PUSHLENDELIM); 6631 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); 6632 /* Need to emit even if no handler to skip past the string. */ 6633 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); 6634 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); 6635 putop(c, OP_POP); 6636 putop(c, OP_SETDELIM); 6637 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 6638 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_LOOPBREAK); 6639 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 6640 label(c, LABEL_LOOPBREAK); 6641 putop(c, OP_POP); 6642 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 6643 } else { 6644 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6645 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 6646 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 6647 putop(c, OP_PUSHLENDELIM); 6648 putop(c, OP_STARTSTR, getsel(f, UPB_HANDLER_STARTSTR)); 6649 putop(c, OP_STRING, getsel(f, UPB_HANDLER_STRING)); 6650 maybeput(c, OP_ENDSTR, h, f, UPB_HANDLER_ENDSTR); 6651 putop(c, OP_POP); 6652 putop(c, OP_SETDELIM); 6653 } 6654} 6655 6656/* Generates bytecode to parse a single primitive field. */ 6657static void generate_primitivefield(compiler *c, const upb_fielddef *f, 6658 upb_pbdecodermethod *method) { 6659 const upb_handlers *h = upb_pbdecodermethod_desthandlers(method); 6660 upb_descriptortype_t descriptor_type = upb_fielddef_descriptortype(f); 6661 opcode parse_type; 6662 upb_selector_t sel; 6663 int wire_type; 6664 6665 label(c, LABEL_FIELD); 6666 6667 /* From a decoding perspective, ENUM is the same as INT32. */ 6668 if (descriptor_type == UPB_DESCRIPTOR_TYPE_ENUM) 6669 descriptor_type = UPB_DESCRIPTOR_TYPE_INT32; 6670 6671 parse_type = (opcode)descriptor_type; 6672 6673 /* TODO(haberman): generate packed or non-packed first depending on "packed" 6674 * setting in the fielddef. This will favor (in speed) whichever was 6675 * specified. */ 6676 6677 UPB_ASSERT((int)parse_type >= 0 && parse_type <= OP_MAX); 6678 sel = getsel(f, upb_handlers_getprimitivehandlertype(f)); 6679 wire_type = upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; 6680 if (upb_fielddef_isseq(f)) { 6681 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6682 putchecktag(c, f, UPB_WIRE_TYPE_DELIMITED, LABEL_DISPATCH); 6683 dispatchtarget(c, method, f, UPB_WIRE_TYPE_DELIMITED); 6684 putop(c, OP_PUSHLENDELIM); 6685 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Packed */ 6686 label(c, LABEL_LOOPSTART); 6687 putop(c, parse_type, sel); 6688 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 6689 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 6690 dispatchtarget(c, method, f, wire_type); 6691 putop(c, OP_PUSHTAGDELIM, 0); 6692 putop(c, OP_STARTSEQ, getsel(f, UPB_HANDLER_STARTSEQ)); /* Non-packed */ 6693 label(c, LABEL_LOOPSTART); 6694 putop(c, parse_type, sel); 6695 putop(c, OP_CHECKDELIM, LABEL_LOOPBREAK); 6696 putchecktag(c, f, wire_type, LABEL_LOOPBREAK); 6697 putop(c, OP_BRANCH, -LABEL_LOOPSTART); 6698 label(c, LABEL_LOOPBREAK); 6699 putop(c, OP_POP); /* Packed and non-packed join. */ 6700 maybeput(c, OP_ENDSEQ, h, f, UPB_HANDLER_ENDSEQ); 6701 putop(c, OP_SETDELIM); /* Could remove for non-packed by dup ENDSEQ. */ 6702 } else { 6703 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6704 putchecktag(c, f, wire_type, LABEL_DISPATCH); 6705 dispatchtarget(c, method, f, wire_type); 6706 putop(c, parse_type, sel); 6707 } 6708} 6709 6710/* Adds bytecode for parsing the given message to the given decoderplan, 6711 * while adding all dispatch targets to this message's dispatch table. */ 6712static void compile_method(compiler *c, upb_pbdecodermethod *method) { 6713 const upb_handlers *h; 6714 const upb_msgdef *md; 6715 uint32_t* start_pc; 6716 upb_msg_field_iter i; 6717 upb_value val; 6718 6719 UPB_ASSERT(method); 6720 6721 /* Clear all entries in the dispatch table. */ 6722 upb_inttable_uninit(&method->dispatch); 6723 upb_inttable_init(&method->dispatch, UPB_CTYPE_UINT64); 6724 6725 h = upb_pbdecodermethod_desthandlers(method); 6726 md = upb_handlers_msgdef(h); 6727 6728 method->code_base.ofs = pcofs(c); 6729 putop(c, OP_SETDISPATCH, &method->dispatch); 6730 putsel(c, OP_STARTMSG, UPB_STARTMSG_SELECTOR, h); 6731 label(c, LABEL_FIELD); 6732 start_pc = c->pc; 6733 for(upb_msg_field_begin(&i, md); 6734 !upb_msg_field_done(&i); 6735 upb_msg_field_next(&i)) { 6736 const upb_fielddef *f = upb_msg_iter_field(&i); 6737 upb_fieldtype_t type = upb_fielddef_type(f); 6738 6739 if (type == UPB_TYPE_MESSAGE && !(haslazyhandlers(h, f) && c->lazy)) { 6740 generate_msgfield(c, f, method); 6741 } else if (type == UPB_TYPE_STRING || type == UPB_TYPE_BYTES || 6742 type == UPB_TYPE_MESSAGE) { 6743 generate_delimfield(c, f, method); 6744 } else { 6745 generate_primitivefield(c, f, method); 6746 } 6747 } 6748 6749 /* If there were no fields, or if no handlers were defined, we need to 6750 * generate a non-empty loop body so that we can at least dispatch for unknown 6751 * fields and check for the end of the message. */ 6752 if (c->pc == start_pc) { 6753 /* Check for end-of-message. */ 6754 putop(c, OP_CHECKDELIM, LABEL_ENDMSG); 6755 /* Unconditionally dispatch. */ 6756 putop(c, OP_DISPATCH, 0); 6757 } 6758 6759 /* For now we just loop back to the last field of the message (or if none, 6760 * the DISPATCH opcode for the message). */ 6761 putop(c, OP_BRANCH, -LABEL_FIELD); 6762 6763 /* Insert both a label and a dispatch table entry for this end-of-msg. */ 6764 label(c, LABEL_ENDMSG); 6765 val = upb_value_uint64(pcofs(c) - method->code_base.ofs); 6766 upb_inttable_insert(&method->dispatch, DISPATCH_ENDMSG, val); 6767 6768 putsel(c, OP_ENDMSG, UPB_ENDMSG_SELECTOR, h); 6769 putop(c, OP_RET); 6770 6771 upb_inttable_compact(&method->dispatch); 6772} 6773 6774/* Populate "methods" with new upb_pbdecodermethod objects reachable from "h". 6775 * Returns the method for these handlers. 6776 * 6777 * Generates a new method for every destination handlers reachable from "h". */ 6778static void find_methods(compiler *c, const upb_handlers *h) { 6779 upb_value v; 6780 upb_msg_field_iter i; 6781 const upb_msgdef *md; 6782 upb_pbdecodermethod *method; 6783 6784 if (upb_inttable_lookupptr(&c->group->methods, h, &v)) 6785 return; 6786 6787 method = newmethod(h, c->group); 6788 upb_inttable_insertptr(&c->group->methods, h, upb_value_ptr(method)); 6789 6790 /* Find submethods. */ 6791 md = upb_handlers_msgdef(h); 6792 for(upb_msg_field_begin(&i, md); 6793 !upb_msg_field_done(&i); 6794 upb_msg_field_next(&i)) { 6795 const upb_fielddef *f = upb_msg_iter_field(&i); 6796 const upb_handlers *sub_h; 6797 if (upb_fielddef_type(f) == UPB_TYPE_MESSAGE && 6798 (sub_h = upb_handlers_getsubhandlers(h, f)) != NULL) { 6799 /* We only generate a decoder method for submessages with handlers. 6800 * Others will be parsed as unknown fields. */ 6801 find_methods(c, sub_h); 6802 } 6803 } 6804} 6805 6806/* (Re-)compile bytecode for all messages in "msgs." 6807 * Overwrites any existing bytecode in "c". */ 6808static void compile_methods(compiler *c) { 6809 upb_inttable_iter i; 6810 6811 /* Start over at the beginning of the bytecode. */ 6812 c->pc = c->group->bytecode; 6813 6814 upb_inttable_begin(&i, &c->group->methods); 6815 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 6816 upb_pbdecodermethod *method = upb_value_getptr(upb_inttable_iter_value(&i)); 6817 compile_method(c, method); 6818 } 6819} 6820 6821static void set_bytecode_handlers(mgroup *g) { 6822 upb_inttable_iter i; 6823 upb_inttable_begin(&i, &g->methods); 6824 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 6825 upb_pbdecodermethod *m = upb_value_getptr(upb_inttable_iter_value(&i)); 6826 upb_byteshandler *h = &m->input_handler_; 6827 6828 m->code_base.ptr = g->bytecode + m->code_base.ofs; 6829 6830 upb_byteshandler_setstartstr(h, upb_pbdecoder_startbc, m->code_base.ptr); 6831 upb_byteshandler_setstring(h, upb_pbdecoder_decode, g); 6832 upb_byteshandler_setendstr(h, upb_pbdecoder_end, m); 6833 } 6834} 6835 6836 6837/* TODO(haberman): allow this to be constructed for an arbitrary set of dest 6838 * handlers and other mgroups (but verify we have a transitive closure). */ 6839const mgroup *mgroup_new(const upb_handlers *dest, bool lazy) { 6840 mgroup *g; 6841 compiler *c; 6842 6843 g = newgroup(); 6844 c = newcompiler(g, lazy); 6845 find_methods(c, dest); 6846 6847 /* We compile in two passes: 6848 * 1. all messages are assigned relative offsets from the beginning of the 6849 * bytecode (saved in method->code_base). 6850 * 2. forwards OP_CALL instructions can be correctly linked since message 6851 * offsets have been previously assigned. 6852 * 6853 * Could avoid the second pass by linking OP_CALL instructions somehow. */ 6854 compile_methods(c); 6855 compile_methods(c); 6856 g->bytecode_end = c->pc; 6857 freecompiler(c); 6858 6859#ifdef UPB_DUMP_BYTECODE 6860 { 6861 FILE *f = fopen("/tmp/upb-bytecode", "w"); 6862 UPB_ASSERT(f); 6863 dumpbc(g->bytecode, g->bytecode_end, stderr); 6864 dumpbc(g->bytecode, g->bytecode_end, f); 6865 fclose(f); 6866 6867 f = fopen("/tmp/upb-bytecode.bin", "wb"); 6868 UPB_ASSERT(f); 6869 fwrite(g->bytecode, 1, g->bytecode_end - g->bytecode, f); 6870 fclose(f); 6871 } 6872#endif 6873 6874 set_bytecode_handlers(g); 6875 return g; 6876} 6877 6878 6879/* upb_pbcodecache ************************************************************/ 6880 6881upb_pbcodecache *upb_pbcodecache_new(upb_handlercache *dest) { 6882 upb_pbcodecache *c = upb_gmalloc(sizeof(*c)); 6883 6884 if (!c) return NULL; 6885 6886 c->dest = dest; 6887 c->lazy = false; 6888 6889 c->arena = upb_arena_new(); 6890 if (!upb_inttable_init(&c->groups, UPB_CTYPE_CONSTPTR)) return NULL; 6891 6892 return c; 6893} 6894 6895void upb_pbcodecache_free(upb_pbcodecache *c) { 6896 upb_inttable_iter i; 6897 6898 upb_inttable_begin(&i, &c->groups); 6899 for(; !upb_inttable_done(&i); upb_inttable_next(&i)) { 6900 upb_value val = upb_inttable_iter_value(&i); 6901 freegroup((void*)upb_value_getconstptr(val)); 6902 } 6903 6904 upb_inttable_uninit(&c->groups); 6905 upb_arena_free(c->arena); 6906 upb_gfree(c); 6907} 6908 6909void upb_pbdecodermethodopts_setlazy(upb_pbcodecache *c, bool lazy) { 6910 UPB_ASSERT(upb_inttable_count(&c->groups) == 0); 6911 c->lazy = lazy; 6912} 6913 6914const upb_pbdecodermethod *upb_pbcodecache_get(upb_pbcodecache *c, 6915 const upb_msgdef *md) { 6916 upb_value v; 6917 bool ok; 6918 const upb_handlers *h; 6919 const mgroup *g; 6920 6921 h = upb_handlercache_get(c->dest, md); 6922 if (upb_inttable_lookupptr(&c->groups, md, &v)) { 6923 g = upb_value_getconstptr(v); 6924 } else { 6925 g = mgroup_new(h, c->lazy); 6926 ok = upb_inttable_insertptr(&c->groups, md, upb_value_constptr(g)); 6927 UPB_ASSUME(ok); 6928 } 6929 6930 ok = upb_inttable_lookupptr(&g->methods, h, &v); 6931 UPB_ASSUME(ok); 6932 return upb_value_getptr(v); 6933} 6934/* 6935** upb::Decoder (Bytecode Decoder VM) 6936** 6937** Bytecode must previously have been generated using the bytecode compiler in 6938** compile_decoder.c. This decoder then walks through the bytecode op-by-op to 6939** parse the input. 6940** 6941** Decoding is fully resumable; we just keep a pointer to the current bytecode 6942** instruction and resume from there. A fair amount of the logic here is to 6943** handle the fact that values can span buffer seams and we have to be able to 6944** be capable of suspending/resuming from any byte in the stream. This 6945** sometimes requires keeping a few trailing bytes from the last buffer around 6946** in the "residual" buffer. 6947*/ 6948 6949#include <inttypes.h> 6950#include <stddef.h> 6951 6952#ifdef UPB_DUMP_BYTECODE 6953#include <stdio.h> 6954#endif 6955 6956 6957#define CHECK_SUSPEND(x) if (!(x)) return upb_pbdecoder_suspend(d); 6958 6959/* Error messages that are shared between the bytecode and JIT decoders. */ 6960const char *kPbDecoderStackOverflow = "Nesting too deep."; 6961const char *kPbDecoderSubmessageTooLong = 6962 "Submessage end extends past enclosing submessage."; 6963 6964/* Error messages shared within this file. */ 6965static const char *kUnterminatedVarint = "Unterminated varint."; 6966 6967/* upb_pbdecoder **************************************************************/ 6968 6969static opcode halt = OP_HALT; 6970 6971/* A dummy character we can point to when the user passes us a NULL buffer. 6972 * We need this because in C (NULL + 0) and (NULL - NULL) are undefined 6973 * behavior, which would invalidate functions like curbufleft(). */ 6974static const char dummy_char; 6975 6976/* Whether an op consumes any of the input buffer. */ 6977static bool consumes_input(opcode op) { 6978 switch (op) { 6979 case OP_SETDISPATCH: 6980 case OP_STARTMSG: 6981 case OP_ENDMSG: 6982 case OP_STARTSEQ: 6983 case OP_ENDSEQ: 6984 case OP_STARTSUBMSG: 6985 case OP_ENDSUBMSG: 6986 case OP_STARTSTR: 6987 case OP_ENDSTR: 6988 case OP_PUSHTAGDELIM: 6989 case OP_POP: 6990 case OP_SETDELIM: 6991 case OP_SETBIGGROUPNUM: 6992 case OP_CHECKDELIM: 6993 case OP_CALL: 6994 case OP_RET: 6995 case OP_BRANCH: 6996 return false; 6997 default: 6998 return true; 6999 } 7000} 7001 7002static size_t stacksize(upb_pbdecoder *d, size_t entries) { 7003 UPB_UNUSED(d); 7004 return entries * sizeof(upb_pbdecoder_frame); 7005} 7006 7007static size_t callstacksize(upb_pbdecoder *d, size_t entries) { 7008 UPB_UNUSED(d); 7009 7010 return entries * sizeof(uint32_t*); 7011} 7012 7013 7014static bool in_residual_buf(const upb_pbdecoder *d, const char *p); 7015 7016/* It's unfortunate that we have to micro-manage the compiler with 7017 * UPB_FORCEINLINE and UPB_NOINLINE, especially since this tuning is necessarily 7018 * specific to one hardware configuration. But empirically on a Core i7, 7019 * performance increases 30-50% with these annotations. Every instance where 7020 * these appear, gcc 4.2.1 made the wrong decision and degraded performance in 7021 * benchmarks. */ 7022 7023static void seterr(upb_pbdecoder *d, const char *msg) { 7024 upb_status_seterrmsg(d->status, msg); 7025} 7026 7027void upb_pbdecoder_seterr(upb_pbdecoder *d, const char *msg) { 7028 seterr(d, msg); 7029} 7030 7031 7032/* Buffering ******************************************************************/ 7033 7034/* We operate on one buffer at a time, which is either the user's buffer passed 7035 * to our "decode" callback or some residual bytes from the previous buffer. */ 7036 7037/* How many bytes can be safely read from d->ptr without reading past end-of-buf 7038 * or past the current delimited end. */ 7039static size_t curbufleft(const upb_pbdecoder *d) { 7040 UPB_ASSERT(d->data_end >= d->ptr); 7041 return d->data_end - d->ptr; 7042} 7043 7044/* How many bytes are available before end-of-buffer. */ 7045static size_t bufleft(const upb_pbdecoder *d) { 7046 return d->end - d->ptr; 7047} 7048 7049/* Overall stream offset of d->ptr. */ 7050uint64_t offset(const upb_pbdecoder *d) { 7051 return d->bufstart_ofs + (d->ptr - d->buf); 7052} 7053 7054/* How many bytes are available before the end of this delimited region. */ 7055size_t delim_remaining(const upb_pbdecoder *d) { 7056 return d->top->end_ofs - offset(d); 7057} 7058 7059/* Advances d->ptr. */ 7060static void advance(upb_pbdecoder *d, size_t len) { 7061 UPB_ASSERT(curbufleft(d) >= len); 7062 d->ptr += len; 7063} 7064 7065static bool in_buf(const char *p, const char *buf, const char *end) { 7066 return p >= buf && p <= end; 7067} 7068 7069static bool in_residual_buf(const upb_pbdecoder *d, const char *p) { 7070 return in_buf(p, d->residual, d->residual_end); 7071} 7072 7073/* Calculates the delim_end value, which is affected by both the current buffer 7074 * and the parsing stack, so must be called whenever either is updated. */ 7075static void set_delim_end(upb_pbdecoder *d) { 7076 size_t delim_ofs = d->top->end_ofs - d->bufstart_ofs; 7077 if (delim_ofs <= (size_t)(d->end - d->buf)) { 7078 d->delim_end = d->buf + delim_ofs; 7079 d->data_end = d->delim_end; 7080 } else { 7081 d->data_end = d->end; 7082 d->delim_end = NULL; 7083 } 7084} 7085 7086static void switchtobuf(upb_pbdecoder *d, const char *buf, const char *end) { 7087 d->ptr = buf; 7088 d->buf = buf; 7089 d->end = end; 7090 set_delim_end(d); 7091} 7092 7093static void advancetobuf(upb_pbdecoder *d, const char *buf, size_t len) { 7094 UPB_ASSERT(curbufleft(d) == 0); 7095 d->bufstart_ofs += (d->end - d->buf); 7096 switchtobuf(d, buf, buf + len); 7097} 7098 7099static void checkpoint(upb_pbdecoder *d) { 7100 /* The assertion here is in the interests of efficiency, not correctness. 7101 * We are trying to ensure that we don't checkpoint() more often than 7102 * necessary. */ 7103 UPB_ASSERT(d->checkpoint != d->ptr); 7104 d->checkpoint = d->ptr; 7105} 7106 7107/* Skips "bytes" bytes in the stream, which may be more than available. If we 7108 * skip more bytes than are available, we return a long read count to the caller 7109 * indicating how many bytes can be skipped over before passing actual data 7110 * again. Skipped bytes can pass a NULL buffer and the decoder guarantees they 7111 * won't actually be read. 7112 */ 7113static int32_t skip(upb_pbdecoder *d, size_t bytes) { 7114 UPB_ASSERT(!in_residual_buf(d, d->ptr) || d->size_param == 0); 7115 UPB_ASSERT(d->skip == 0); 7116 if (bytes > delim_remaining(d)) { 7117 seterr(d, "Skipped value extended beyond enclosing submessage."); 7118 return (int32_t)upb_pbdecoder_suspend(d); 7119 } else if (bufleft(d) >= bytes) { 7120 /* Skipped data is all in current buffer, and more is still available. */ 7121 advance(d, bytes); 7122 d->skip = 0; 7123 return DECODE_OK; 7124 } else { 7125 /* Skipped data extends beyond currently available buffers. */ 7126 d->pc = d->last; 7127 d->skip = bytes - curbufleft(d); 7128 d->bufstart_ofs += (d->end - d->buf); 7129 d->residual_end = d->residual; 7130 switchtobuf(d, d->residual, d->residual_end); 7131 return (int32_t)(d->size_param + d->skip); 7132 } 7133} 7134 7135 7136/* Resumes the decoder from an initial state or from a previous suspend. */ 7137int32_t upb_pbdecoder_resume(upb_pbdecoder *d, void *p, const char *buf, 7138 size_t size, const upb_bufhandle *handle) { 7139 UPB_UNUSED(p); /* Useless; just for the benefit of the JIT. */ 7140 7141 /* d->skip and d->residual_end could probably elegantly be represented 7142 * as a single variable, to more easily represent this invariant. */ 7143 UPB_ASSERT(!(d->skip && d->residual_end > d->residual)); 7144 7145 /* We need to remember the original size_param, so that the value we return 7146 * is relative to it, even if we do some skipping first. */ 7147 d->size_param = size; 7148 d->handle = handle; 7149 7150 /* Have to handle this case specially (ie. not with skip()) because the user 7151 * is allowed to pass a NULL buffer here, which won't allow us to safely 7152 * calculate a d->end or use our normal functions like curbufleft(). */ 7153 if (d->skip && d->skip >= size) { 7154 d->skip -= size; 7155 d->bufstart_ofs += size; 7156 buf = &dummy_char; 7157 size = 0; 7158 7159 /* We can't just return now, because we might need to execute some ops 7160 * like CHECKDELIM, which could call some callbacks and pop the stack. */ 7161 } 7162 7163 /* We need to pretend that this was the actual buffer param, since some of the 7164 * calculations assume that d->ptr/d->buf is relative to this. */ 7165 d->buf_param = buf; 7166 7167 if (!buf) { 7168 /* NULL buf is ok if its entire span is covered by the "skip" above, but 7169 * by this point we know that "skip" doesn't cover the buffer. */ 7170 seterr(d, "Passed NULL buffer over non-skippable region."); 7171 return (int32_t)upb_pbdecoder_suspend(d); 7172 } 7173 7174 if (d->residual_end > d->residual) { 7175 /* We have residual bytes from the last buffer. */ 7176 UPB_ASSERT(d->ptr == d->residual); 7177 } else { 7178 switchtobuf(d, buf, buf + size); 7179 } 7180 7181 d->checkpoint = d->ptr; 7182 7183 /* Handle skips that don't cover the whole buffer (as above). */ 7184 if (d->skip) { 7185 size_t skip_bytes = d->skip; 7186 d->skip = 0; 7187 CHECK_RETURN(skip(d, skip_bytes)); 7188 checkpoint(d); 7189 } 7190 7191 /* If we're inside an unknown group, continue to parse unknown values. */ 7192 if (d->top->groupnum < 0) { 7193 CHECK_RETURN(upb_pbdecoder_skipunknown(d, -1, 0)); 7194 checkpoint(d); 7195 } 7196 7197 return DECODE_OK; 7198} 7199 7200/* Suspends the decoder at the last checkpoint, without saving any residual 7201 * bytes. If there are any unconsumed bytes, returns a short byte count. */ 7202size_t upb_pbdecoder_suspend(upb_pbdecoder *d) { 7203 d->pc = d->last; 7204 if (d->checkpoint == d->residual) { 7205 /* Checkpoint was in residual buf; no user bytes were consumed. */ 7206 d->ptr = d->residual; 7207 return 0; 7208 } else { 7209 size_t ret = d->size_param - (d->end - d->checkpoint); 7210 UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); 7211 UPB_ASSERT(d->buf == d->buf_param || d->buf == &dummy_char); 7212 7213 d->bufstart_ofs += (d->checkpoint - d->buf); 7214 d->residual_end = d->residual; 7215 switchtobuf(d, d->residual, d->residual_end); 7216 return ret; 7217 } 7218} 7219 7220/* Suspends the decoder at the last checkpoint, and saves any unconsumed 7221 * bytes in our residual buffer. This is necessary if we need more user 7222 * bytes to form a complete value, which might not be contiguous in the 7223 * user's buffers. Always consumes all user bytes. */ 7224static size_t suspend_save(upb_pbdecoder *d) { 7225 /* We hit end-of-buffer before we could parse a full value. 7226 * Save any unconsumed bytes (if any) to the residual buffer. */ 7227 d->pc = d->last; 7228 7229 if (d->checkpoint == d->residual) { 7230 /* Checkpoint was in residual buf; append user byte(s) to residual buf. */ 7231 UPB_ASSERT((d->residual_end - d->residual) + d->size_param <= 7232 sizeof(d->residual)); 7233 if (!in_residual_buf(d, d->ptr)) { 7234 d->bufstart_ofs -= (d->residual_end - d->residual); 7235 } 7236 memcpy(d->residual_end, d->buf_param, d->size_param); 7237 d->residual_end += d->size_param; 7238 } else { 7239 /* Checkpoint was in user buf; old residual bytes not needed. */ 7240 size_t save; 7241 UPB_ASSERT(!in_residual_buf(d, d->checkpoint)); 7242 7243 d->ptr = d->checkpoint; 7244 save = curbufleft(d); 7245 UPB_ASSERT(save <= sizeof(d->residual)); 7246 memcpy(d->residual, d->ptr, save); 7247 d->residual_end = d->residual + save; 7248 d->bufstart_ofs = offset(d); 7249 } 7250 7251 switchtobuf(d, d->residual, d->residual_end); 7252 return d->size_param; 7253} 7254 7255/* Copies the next "bytes" bytes into "buf" and advances the stream. 7256 * Requires that this many bytes are available in the current buffer. */ 7257UPB_FORCEINLINE static void consumebytes(upb_pbdecoder *d, void *buf, 7258 size_t bytes) { 7259 UPB_ASSERT(bytes <= curbufleft(d)); 7260 memcpy(buf, d->ptr, bytes); 7261 advance(d, bytes); 7262} 7263 7264/* Slow path for getting the next "bytes" bytes, regardless of whether they are 7265 * available in the current buffer or not. Returns a status code as described 7266 * in decoder.int.h. */ 7267UPB_NOINLINE static int32_t getbytes_slow(upb_pbdecoder *d, void *buf, 7268 size_t bytes) { 7269 const size_t avail = curbufleft(d); 7270 consumebytes(d, buf, avail); 7271 bytes -= avail; 7272 UPB_ASSERT(bytes > 0); 7273 if (in_residual_buf(d, d->ptr)) { 7274 advancetobuf(d, d->buf_param, d->size_param); 7275 } 7276 if (curbufleft(d) >= bytes) { 7277 consumebytes(d, (char *)buf + avail, bytes); 7278 return DECODE_OK; 7279 } else if (d->data_end == d->delim_end) { 7280 seterr(d, "Submessage ended in the middle of a value or group"); 7281 return (int32_t)upb_pbdecoder_suspend(d); 7282 } else { 7283 return (int32_t)suspend_save(d); 7284 } 7285} 7286 7287/* Gets the next "bytes" bytes, regardless of whether they are available in the 7288 * current buffer or not. Returns a status code as described in decoder.int.h. 7289 */ 7290UPB_FORCEINLINE static int32_t getbytes(upb_pbdecoder *d, void *buf, 7291 size_t bytes) { 7292 if (curbufleft(d) >= bytes) { 7293 /* Buffer has enough data to satisfy. */ 7294 consumebytes(d, buf, bytes); 7295 return DECODE_OK; 7296 } else { 7297 return getbytes_slow(d, buf, bytes); 7298 } 7299} 7300 7301UPB_NOINLINE static size_t peekbytes_slow(upb_pbdecoder *d, void *buf, 7302 size_t bytes) { 7303 size_t ret = curbufleft(d); 7304 memcpy(buf, d->ptr, ret); 7305 if (in_residual_buf(d, d->ptr)) { 7306 size_t copy = UPB_MIN(bytes - ret, d->size_param); 7307 memcpy((char *)buf + ret, d->buf_param, copy); 7308 ret += copy; 7309 } 7310 return ret; 7311} 7312 7313UPB_FORCEINLINE static size_t peekbytes(upb_pbdecoder *d, void *buf, 7314 size_t bytes) { 7315 if (curbufleft(d) >= bytes) { 7316 memcpy(buf, d->ptr, bytes); 7317 return bytes; 7318 } else { 7319 return peekbytes_slow(d, buf, bytes); 7320 } 7321} 7322 7323 7324/* Decoding of wire types *****************************************************/ 7325 7326/* Slow path for decoding a varint from the current buffer position. 7327 * Returns a status code as described in decoder.int.h. */ 7328UPB_NOINLINE int32_t upb_pbdecoder_decode_varint_slow(upb_pbdecoder *d, 7329 uint64_t *u64) { 7330 uint8_t byte = 0x80; 7331 int bitpos; 7332 *u64 = 0; 7333 for(bitpos = 0; bitpos < 70 && (byte & 0x80); bitpos += 7) { 7334 CHECK_RETURN(getbytes(d, &byte, 1)); 7335 *u64 |= (uint64_t)(byte & 0x7F) << bitpos; 7336 } 7337 if(bitpos == 70 && (byte & 0x80)) { 7338 seterr(d, kUnterminatedVarint); 7339 return (int32_t)upb_pbdecoder_suspend(d); 7340 } 7341 return DECODE_OK; 7342} 7343 7344/* Decodes a varint from the current buffer position. 7345 * Returns a status code as described in decoder.int.h. */ 7346UPB_FORCEINLINE static int32_t decode_varint(upb_pbdecoder *d, uint64_t *u64) { 7347 if (curbufleft(d) > 0 && !(*d->ptr & 0x80)) { 7348 *u64 = *d->ptr; 7349 advance(d, 1); 7350 return DECODE_OK; 7351 } else if (curbufleft(d) >= 10) { 7352 /* Fast case. */ 7353 upb_decoderet r = upb_vdecode_fast(d->ptr); 7354 if (r.p == NULL) { 7355 seterr(d, kUnterminatedVarint); 7356 return (int32_t)upb_pbdecoder_suspend(d); 7357 } 7358 advance(d, r.p - d->ptr); 7359 *u64 = r.val; 7360 return DECODE_OK; 7361 } else { 7362 /* Slow case -- varint spans buffer seam. */ 7363 return upb_pbdecoder_decode_varint_slow(d, u64); 7364 } 7365} 7366 7367/* Decodes a 32-bit varint from the current buffer position. 7368 * Returns a status code as described in decoder.int.h. */ 7369UPB_FORCEINLINE static int32_t decode_v32(upb_pbdecoder *d, uint32_t *u32) { 7370 uint64_t u64; 7371 int32_t ret = decode_varint(d, &u64); 7372 if (ret >= 0) return ret; 7373 if (u64 > UINT32_MAX) { 7374 seterr(d, "Unterminated 32-bit varint"); 7375 /* TODO(haberman) guarantee that this function return is >= 0 somehow, 7376 * so we know this path will always be treated as error by our caller. 7377 * Right now the size_t -> int32_t can overflow and produce negative values. 7378 */ 7379 *u32 = 0; 7380 return (int32_t)upb_pbdecoder_suspend(d); 7381 } 7382 *u32 = (uint32_t)u64; 7383 return DECODE_OK; 7384} 7385 7386/* Decodes a fixed32 from the current buffer position. 7387 * Returns a status code as described in decoder.int.h. 7388 * TODO: proper byte swapping for big-endian machines. */ 7389UPB_FORCEINLINE static int32_t decode_fixed32(upb_pbdecoder *d, uint32_t *u32) { 7390 return getbytes(d, u32, 4); 7391} 7392 7393/* Decodes a fixed64 from the current buffer position. 7394 * Returns a status code as described in decoder.int.h. 7395 * TODO: proper byte swapping for big-endian machines. */ 7396UPB_FORCEINLINE static int32_t decode_fixed64(upb_pbdecoder *d, uint64_t *u64) { 7397 return getbytes(d, u64, 8); 7398} 7399 7400/* Non-static versions of the above functions. 7401 * These are called by the JIT for fallback paths. */ 7402int32_t upb_pbdecoder_decode_f32(upb_pbdecoder *d, uint32_t *u32) { 7403 return decode_fixed32(d, u32); 7404} 7405 7406int32_t upb_pbdecoder_decode_f64(upb_pbdecoder *d, uint64_t *u64) { 7407 return decode_fixed64(d, u64); 7408} 7409 7410static double as_double(uint64_t n) { double d; memcpy(&d, &n, 8); return d; } 7411static float as_float(uint32_t n) { float f; memcpy(&f, &n, 4); return f; } 7412 7413/* Pushes a frame onto the decoder stack. */ 7414static bool decoder_push(upb_pbdecoder *d, uint64_t end) { 7415 upb_pbdecoder_frame *fr = d->top; 7416 7417 if (end > fr->end_ofs) { 7418 seterr(d, kPbDecoderSubmessageTooLong); 7419 return false; 7420 } else if (fr == d->limit) { 7421 seterr(d, kPbDecoderStackOverflow); 7422 return false; 7423 } 7424 7425 fr++; 7426 fr->end_ofs = end; 7427 fr->dispatch = NULL; 7428 fr->groupnum = 0; 7429 d->top = fr; 7430 return true; 7431} 7432 7433static bool pushtagdelim(upb_pbdecoder *d, uint32_t arg) { 7434 /* While we expect to see an "end" tag (either ENDGROUP or a non-sequence 7435 * field number) prior to hitting any enclosing submessage end, pushing our 7436 * existing delim end prevents us from continuing to parse values from a 7437 * corrupt proto that doesn't give us an END tag in time. */ 7438 if (!decoder_push(d, d->top->end_ofs)) 7439 return false; 7440 d->top->groupnum = arg; 7441 return true; 7442} 7443 7444/* Pops a frame from the decoder stack. */ 7445static void decoder_pop(upb_pbdecoder *d) { d->top--; } 7446 7447UPB_NOINLINE int32_t upb_pbdecoder_checktag_slow(upb_pbdecoder *d, 7448 uint64_t expected) { 7449 uint64_t data = 0; 7450 size_t bytes = upb_value_size(expected); 7451 size_t read = peekbytes(d, &data, bytes); 7452 if (read == bytes && data == expected) { 7453 /* Advance past matched bytes. */ 7454 int32_t ok = getbytes(d, &data, read); 7455 UPB_ASSERT(ok < 0); 7456 return DECODE_OK; 7457 } else if (read < bytes && memcmp(&data, &expected, read) == 0) { 7458 return (int32_t)suspend_save(d); 7459 } else { 7460 return DECODE_MISMATCH; 7461 } 7462} 7463 7464int32_t upb_pbdecoder_skipunknown(upb_pbdecoder *d, int32_t fieldnum, 7465 uint8_t wire_type) { 7466 if (fieldnum >= 0) 7467 goto have_tag; 7468 7469 while (true) { 7470 uint32_t tag; 7471 CHECK_RETURN(decode_v32(d, &tag)); 7472 wire_type = tag & 0x7; 7473 fieldnum = tag >> 3; 7474 7475have_tag: 7476 if (fieldnum == 0) { 7477 seterr(d, "Saw invalid field number (0)"); 7478 return (int32_t)upb_pbdecoder_suspend(d); 7479 } 7480 7481 switch (wire_type) { 7482 case UPB_WIRE_TYPE_32BIT: 7483 CHECK_RETURN(skip(d, 4)); 7484 break; 7485 case UPB_WIRE_TYPE_64BIT: 7486 CHECK_RETURN(skip(d, 8)); 7487 break; 7488 case UPB_WIRE_TYPE_VARINT: { 7489 uint64_t u64; 7490 CHECK_RETURN(decode_varint(d, &u64)); 7491 break; 7492 } 7493 case UPB_WIRE_TYPE_DELIMITED: { 7494 uint32_t len; 7495 CHECK_RETURN(decode_v32(d, &len)); 7496 CHECK_RETURN(skip(d, len)); 7497 break; 7498 } 7499 case UPB_WIRE_TYPE_START_GROUP: 7500 if (!pushtagdelim(d, -fieldnum)) { 7501 return (int32_t)upb_pbdecoder_suspend(d); 7502 } 7503 break; 7504 case UPB_WIRE_TYPE_END_GROUP: 7505 if (fieldnum == -d->top->groupnum) { 7506 decoder_pop(d); 7507 } else if (fieldnum == d->top->groupnum) { 7508 return DECODE_ENDGROUP; 7509 } else { 7510 seterr(d, "Unmatched ENDGROUP tag."); 7511 return (int32_t)upb_pbdecoder_suspend(d); 7512 } 7513 break; 7514 default: 7515 seterr(d, "Invalid wire type"); 7516 return (int32_t)upb_pbdecoder_suspend(d); 7517 } 7518 7519 if (d->top->groupnum >= 0) { 7520 /* TODO: More code needed for handling unknown groups. */ 7521 upb_sink_putunknown(d->top->sink, d->checkpoint, d->ptr - d->checkpoint); 7522 return DECODE_OK; 7523 } 7524 7525 /* Unknown group -- continue looping over unknown fields. */ 7526 checkpoint(d); 7527 } 7528} 7529 7530static void goto_endmsg(upb_pbdecoder *d) { 7531 upb_value v; 7532 bool found = upb_inttable_lookup32(d->top->dispatch, DISPATCH_ENDMSG, &v); 7533 UPB_ASSERT(found); 7534 d->pc = d->top->base + upb_value_getuint64(v); 7535} 7536 7537/* Parses a tag and jumps to the corresponding bytecode instruction for this 7538 * field. 7539 * 7540 * If the tag is unknown (or the wire type doesn't match), parses the field as 7541 * unknown. If the tag is a valid ENDGROUP tag, jumps to the bytecode 7542 * instruction for the end of message. */ 7543static int32_t dispatch(upb_pbdecoder *d) { 7544 upb_inttable *dispatch = d->top->dispatch; 7545 uint32_t tag; 7546 uint8_t wire_type; 7547 uint32_t fieldnum; 7548 upb_value val; 7549 int32_t retval; 7550 7551 /* Decode tag. */ 7552 CHECK_RETURN(decode_v32(d, &tag)); 7553 wire_type = tag & 0x7; 7554 fieldnum = tag >> 3; 7555 7556 /* Lookup tag. Because of packed/non-packed compatibility, we have to 7557 * check the wire type against two possibilities. */ 7558 if (fieldnum != DISPATCH_ENDMSG && 7559 upb_inttable_lookup32(dispatch, fieldnum, &val)) { 7560 uint64_t v = upb_value_getuint64(val); 7561 if (wire_type == (v & 0xff)) { 7562 d->pc = d->top->base + (v >> 16); 7563 return DECODE_OK; 7564 } else if (wire_type == ((v >> 8) & 0xff)) { 7565 bool found = 7566 upb_inttable_lookup(dispatch, fieldnum + UPB_MAX_FIELDNUMBER, &val); 7567 UPB_ASSERT(found); 7568 d->pc = d->top->base + upb_value_getuint64(val); 7569 return DECODE_OK; 7570 } 7571 } 7572 7573 /* We have some unknown fields (or ENDGROUP) to parse. The DISPATCH or TAG 7574 * bytecode that triggered this is preceded by a CHECKDELIM bytecode which 7575 * we need to back up to, so that when we're done skipping unknown data we 7576 * can re-check the delimited end. */ 7577 d->last--; /* Necessary if we get suspended */ 7578 d->pc = d->last; 7579 UPB_ASSERT(getop(*d->last) == OP_CHECKDELIM); 7580 7581 /* Unknown field or ENDGROUP. */ 7582 retval = upb_pbdecoder_skipunknown(d, fieldnum, wire_type); 7583 7584 CHECK_RETURN(retval); 7585 7586 if (retval == DECODE_ENDGROUP) { 7587 goto_endmsg(d); 7588 return DECODE_OK; 7589 } 7590 7591 return DECODE_OK; 7592} 7593 7594/* Callers know that the stack is more than one deep because the opcodes that 7595 * call this only occur after PUSH operations. */ 7596upb_pbdecoder_frame *outer_frame(upb_pbdecoder *d) { 7597 UPB_ASSERT(d->top != d->stack); 7598 return d->top - 1; 7599} 7600 7601 7602/* The main decoding loop *****************************************************/ 7603 7604/* The main decoder VM function. Uses traditional bytecode dispatch loop with a 7605 * switch() statement. */ 7606size_t run_decoder_vm(upb_pbdecoder *d, const mgroup *group, 7607 const upb_bufhandle* handle) { 7608 7609#define VMCASE(op, code) \ 7610 case op: { code; if (consumes_input(op)) checkpoint(d); break; } 7611#define PRIMITIVE_OP(type, wt, name, convfunc, ctype) \ 7612 VMCASE(OP_PARSE_ ## type, { \ 7613 ctype val; \ 7614 CHECK_RETURN(decode_ ## wt(d, &val)); \ 7615 upb_sink_put ## name(d->top->sink, arg, (convfunc)(val)); \ 7616 }) 7617 7618 while(1) { 7619 int32_t instruction; 7620 opcode op; 7621 uint32_t arg; 7622 int32_t longofs; 7623 7624 d->last = d->pc; 7625 instruction = *d->pc++; 7626 op = getop(instruction); 7627 arg = instruction >> 8; 7628 longofs = arg; 7629 UPB_ASSERT(d->ptr != d->residual_end); 7630 UPB_UNUSED(group); 7631#ifdef UPB_DUMP_BYTECODE 7632 fprintf(stderr, "s_ofs=%d buf_ofs=%d data_rem=%d buf_rem=%d delim_rem=%d " 7633 "%x %s (%d)\n", 7634 (int)offset(d), 7635 (int)(d->ptr - d->buf), 7636 (int)(d->data_end - d->ptr), 7637 (int)(d->end - d->ptr), 7638 (int)((d->top->end_ofs - d->bufstart_ofs) - (d->ptr - d->buf)), 7639 (int)(d->pc - 1 - group->bytecode), 7640 upb_pbdecoder_getopname(op), 7641 arg); 7642#endif 7643 switch (op) { 7644 /* Technically, we are losing data if we see a 32-bit varint that is not 7645 * properly sign-extended. We could detect this and error about the data 7646 * loss, but proto2 does not do this, so we pass. */ 7647 PRIMITIVE_OP(INT32, varint, int32, int32_t, uint64_t) 7648 PRIMITIVE_OP(INT64, varint, int64, int64_t, uint64_t) 7649 PRIMITIVE_OP(UINT32, varint, uint32, uint32_t, uint64_t) 7650 PRIMITIVE_OP(UINT64, varint, uint64, uint64_t, uint64_t) 7651 PRIMITIVE_OP(FIXED32, fixed32, uint32, uint32_t, uint32_t) 7652 PRIMITIVE_OP(FIXED64, fixed64, uint64, uint64_t, uint64_t) 7653 PRIMITIVE_OP(SFIXED32, fixed32, int32, int32_t, uint32_t) 7654 PRIMITIVE_OP(SFIXED64, fixed64, int64, int64_t, uint64_t) 7655 PRIMITIVE_OP(BOOL, varint, bool, bool, uint64_t) 7656 PRIMITIVE_OP(DOUBLE, fixed64, double, as_double, uint64_t) 7657 PRIMITIVE_OP(FLOAT, fixed32, float, as_float, uint32_t) 7658 PRIMITIVE_OP(SINT32, varint, int32, upb_zzdec_32, uint64_t) 7659 PRIMITIVE_OP(SINT64, varint, int64, upb_zzdec_64, uint64_t) 7660 7661 VMCASE(OP_SETDISPATCH, 7662 d->top->base = d->pc - 1; 7663 memcpy(&d->top->dispatch, d->pc, sizeof(void*)); 7664 d->pc += sizeof(void*) / sizeof(uint32_t); 7665 ) 7666 VMCASE(OP_STARTMSG, 7667 CHECK_SUSPEND(upb_sink_startmsg(d->top->sink)); 7668 ) 7669 VMCASE(OP_ENDMSG, 7670 CHECK_SUSPEND(upb_sink_endmsg(d->top->sink, d->status)); 7671 ) 7672 VMCASE(OP_STARTSEQ, 7673 upb_pbdecoder_frame *outer = outer_frame(d); 7674 CHECK_SUSPEND(upb_sink_startseq(outer->sink, arg, &d->top->sink)); 7675 ) 7676 VMCASE(OP_ENDSEQ, 7677 CHECK_SUSPEND(upb_sink_endseq(d->top->sink, arg)); 7678 ) 7679 VMCASE(OP_STARTSUBMSG, 7680 upb_pbdecoder_frame *outer = outer_frame(d); 7681 CHECK_SUSPEND(upb_sink_startsubmsg(outer->sink, arg, &d->top->sink)); 7682 ) 7683 VMCASE(OP_ENDSUBMSG, 7684 upb_sink subsink = (d->top + 1)->sink; 7685 CHECK_SUSPEND(upb_sink_endsubmsg(d->top->sink, subsink, arg)); 7686 ) 7687 VMCASE(OP_STARTSTR, 7688 uint32_t len = (uint32_t)delim_remaining(d); 7689 upb_pbdecoder_frame *outer = outer_frame(d); 7690 CHECK_SUSPEND(upb_sink_startstr(outer->sink, arg, len, &d->top->sink)); 7691 if (len == 0) { 7692 d->pc++; /* Skip OP_STRING. */ 7693 } 7694 ) 7695 VMCASE(OP_STRING, 7696 uint32_t len = (uint32_t)curbufleft(d); 7697 size_t n = upb_sink_putstring(d->top->sink, arg, d->ptr, len, handle); 7698 if (n > len) { 7699 if (n > delim_remaining(d)) { 7700 seterr(d, "Tried to skip past end of string."); 7701 return upb_pbdecoder_suspend(d); 7702 } else { 7703 int32_t ret = skip(d, n); 7704 /* This shouldn't return DECODE_OK, because n > len. */ 7705 UPB_ASSERT(ret >= 0); 7706 return ret; 7707 } 7708 } 7709 advance(d, n); 7710 if (n < len || d->delim_end == NULL) { 7711 /* We aren't finished with this string yet. */ 7712 d->pc--; /* Repeat OP_STRING. */ 7713 if (n > 0) checkpoint(d); 7714 return upb_pbdecoder_suspend(d); 7715 } 7716 ) 7717 VMCASE(OP_ENDSTR, 7718 CHECK_SUSPEND(upb_sink_endstr(d->top->sink, arg)); 7719 ) 7720 VMCASE(OP_PUSHTAGDELIM, 7721 CHECK_SUSPEND(pushtagdelim(d, arg)); 7722 ) 7723 VMCASE(OP_SETBIGGROUPNUM, 7724 d->top->groupnum = *d->pc++; 7725 ) 7726 VMCASE(OP_POP, 7727 UPB_ASSERT(d->top > d->stack); 7728 decoder_pop(d); 7729 ) 7730 VMCASE(OP_PUSHLENDELIM, 7731 uint32_t len; 7732 CHECK_RETURN(decode_v32(d, &len)); 7733 CHECK_SUSPEND(decoder_push(d, offset(d) + len)); 7734 set_delim_end(d); 7735 ) 7736 VMCASE(OP_SETDELIM, 7737 set_delim_end(d); 7738 ) 7739 VMCASE(OP_CHECKDELIM, 7740 /* We are guaranteed of this assert because we never allow ourselves to 7741 * consume bytes beyond data_end, which covers delim_end when non-NULL. 7742 */ 7743 UPB_ASSERT(!(d->delim_end && d->ptr > d->delim_end)); 7744 if (d->ptr == d->delim_end) 7745 d->pc += longofs; 7746 ) 7747 VMCASE(OP_CALL, 7748 d->callstack[d->call_len++] = d->pc; 7749 d->pc += longofs; 7750 ) 7751 VMCASE(OP_RET, 7752 UPB_ASSERT(d->call_len > 0); 7753 d->pc = d->callstack[--d->call_len]; 7754 ) 7755 VMCASE(OP_BRANCH, 7756 d->pc += longofs; 7757 ) 7758 VMCASE(OP_TAG1, 7759 uint8_t expected; 7760 CHECK_SUSPEND(curbufleft(d) > 0); 7761 expected = (arg >> 8) & 0xff; 7762 if (*d->ptr == expected) { 7763 advance(d, 1); 7764 } else { 7765 int8_t shortofs; 7766 badtag: 7767 shortofs = arg; 7768 if (shortofs == LABEL_DISPATCH) { 7769 CHECK_RETURN(dispatch(d)); 7770 } else { 7771 d->pc += shortofs; 7772 break; /* Avoid checkpoint(). */ 7773 } 7774 } 7775 ) 7776 VMCASE(OP_TAG2, 7777 uint16_t expected; 7778 CHECK_SUSPEND(curbufleft(d) > 0); 7779 expected = (arg >> 8) & 0xffff; 7780 if (curbufleft(d) >= 2) { 7781 uint16_t actual; 7782 memcpy(&actual, d->ptr, 2); 7783 if (expected == actual) { 7784 advance(d, 2); 7785 } else { 7786 goto badtag; 7787 } 7788 } else { 7789 int32_t result = upb_pbdecoder_checktag_slow(d, expected); 7790 if (result == DECODE_MISMATCH) goto badtag; 7791 if (result >= 0) return result; 7792 } 7793 ) 7794 VMCASE(OP_TAGN, { 7795 uint64_t expected; 7796 int32_t result; 7797 memcpy(&expected, d->pc, 8); 7798 d->pc += 2; 7799 result = upb_pbdecoder_checktag_slow(d, expected); 7800 if (result == DECODE_MISMATCH) goto badtag; 7801 if (result >= 0) return result; 7802 }) 7803 VMCASE(OP_DISPATCH, { 7804 CHECK_RETURN(dispatch(d)); 7805 }) 7806 VMCASE(OP_HALT, { 7807 return d->size_param; 7808 }) 7809 } 7810 } 7811} 7812 7813 7814/* BytesHandler handlers ******************************************************/ 7815 7816void *upb_pbdecoder_startbc(void *closure, const void *pc, size_t size_hint) { 7817 upb_pbdecoder *d = closure; 7818 UPB_UNUSED(size_hint); 7819 d->top->end_ofs = UINT64_MAX; 7820 d->bufstart_ofs = 0; 7821 d->call_len = 1; 7822 d->callstack[0] = &halt; 7823 d->pc = pc; 7824 d->skip = 0; 7825 return d; 7826} 7827 7828bool upb_pbdecoder_end(void *closure, const void *handler_data) { 7829 upb_pbdecoder *d = closure; 7830 const upb_pbdecodermethod *method = handler_data; 7831 uint64_t end; 7832 char dummy; 7833 7834 if (d->residual_end > d->residual) { 7835 seterr(d, "Unexpected EOF: decoder still has buffered unparsed data"); 7836 return false; 7837 } 7838 7839 if (d->skip) { 7840 seterr(d, "Unexpected EOF inside skipped data"); 7841 return false; 7842 } 7843 7844 if (d->top->end_ofs != UINT64_MAX) { 7845 seterr(d, "Unexpected EOF inside delimited string"); 7846 return false; 7847 } 7848 7849 /* The user's end() call indicates that the message ends here. */ 7850 end = offset(d); 7851 d->top->end_ofs = end; 7852 7853 { 7854 const uint32_t *p = d->pc; 7855 d->stack->end_ofs = end; 7856 /* Check the previous bytecode, but guard against beginning. */ 7857 if (p != method->code_base.ptr) p--; 7858 if (getop(*p) == OP_CHECKDELIM) { 7859 /* Rewind from OP_TAG* to OP_CHECKDELIM. */ 7860 UPB_ASSERT(getop(*d->pc) == OP_TAG1 || 7861 getop(*d->pc) == OP_TAG2 || 7862 getop(*d->pc) == OP_TAGN || 7863 getop(*d->pc) == OP_DISPATCH); 7864 d->pc = p; 7865 } 7866 upb_pbdecoder_decode(closure, handler_data, &dummy, 0, NULL); 7867 } 7868 7869 if (d->call_len != 0) { 7870 seterr(d, "Unexpected EOF inside submessage or group"); 7871 return false; 7872 } 7873 7874 return true; 7875} 7876 7877size_t upb_pbdecoder_decode(void *decoder, const void *group, const char *buf, 7878 size_t size, const upb_bufhandle *handle) { 7879 int32_t result = upb_pbdecoder_resume(decoder, NULL, buf, size, handle); 7880 7881 if (result == DECODE_ENDGROUP) goto_endmsg(decoder); 7882 CHECK_RETURN(result); 7883 7884 return run_decoder_vm(decoder, group, handle); 7885} 7886 7887 7888/* Public API *****************************************************************/ 7889 7890void upb_pbdecoder_reset(upb_pbdecoder *d) { 7891 d->top = d->stack; 7892 d->top->groupnum = 0; 7893 d->ptr = d->residual; 7894 d->buf = d->residual; 7895 d->end = d->residual; 7896 d->residual_end = d->residual; 7897} 7898 7899upb_pbdecoder *upb_pbdecoder_create(upb_arena *a, const upb_pbdecodermethod *m, 7900 upb_sink sink, upb_status *status) { 7901 const size_t default_max_nesting = 64; 7902#ifndef NDEBUG 7903 size_t size_before = upb_arena_bytesallocated(a); 7904#endif 7905 7906 upb_pbdecoder *d = upb_arena_malloc(a, sizeof(upb_pbdecoder)); 7907 if (!d) return NULL; 7908 7909 d->method_ = m; 7910 d->callstack = upb_arena_malloc(a, callstacksize(d, default_max_nesting)); 7911 d->stack = upb_arena_malloc(a, stacksize(d, default_max_nesting)); 7912 if (!d->stack || !d->callstack) { 7913 return NULL; 7914 } 7915 7916 d->arena = a; 7917 d->limit = d->stack + default_max_nesting - 1; 7918 d->stack_size = default_max_nesting; 7919 d->status = status; 7920 7921 upb_pbdecoder_reset(d); 7922 upb_bytessink_reset(&d->input_, &m->input_handler_, d); 7923 7924 if (d->method_->dest_handlers_) { 7925 if (sink.handlers != d->method_->dest_handlers_) 7926 return NULL; 7927 } 7928 d->top->sink = sink; 7929 7930 /* If this fails, increase the value in decoder.h. */ 7931 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <= 7932 UPB_PB_DECODER_SIZE); 7933 return d; 7934} 7935 7936uint64_t upb_pbdecoder_bytesparsed(const upb_pbdecoder *d) { 7937 return offset(d); 7938} 7939 7940const upb_pbdecodermethod *upb_pbdecoder_method(const upb_pbdecoder *d) { 7941 return d->method_; 7942} 7943 7944upb_bytessink upb_pbdecoder_input(upb_pbdecoder *d) { 7945 return d->input_; 7946} 7947 7948size_t upb_pbdecoder_maxnesting(const upb_pbdecoder *d) { 7949 return d->stack_size; 7950} 7951 7952bool upb_pbdecoder_setmaxnesting(upb_pbdecoder *d, size_t max) { 7953 UPB_ASSERT(d->top >= d->stack); 7954 7955 if (max < (size_t)(d->top - d->stack)) { 7956 /* Can't set a limit smaller than what we are currently at. */ 7957 return false; 7958 } 7959 7960 if (max > d->stack_size) { 7961 /* Need to reallocate stack and callstack to accommodate. */ 7962 size_t old_size = stacksize(d, d->stack_size); 7963 size_t new_size = stacksize(d, max); 7964 void *p = upb_arena_realloc(d->arena, d->stack, old_size, new_size); 7965 if (!p) { 7966 return false; 7967 } 7968 d->stack = p; 7969 7970 old_size = callstacksize(d, d->stack_size); 7971 new_size = callstacksize(d, max); 7972 p = upb_arena_realloc(d->arena, d->callstack, old_size, new_size); 7973 if (!p) { 7974 return false; 7975 } 7976 d->callstack = p; 7977 7978 d->stack_size = max; 7979 } 7980 7981 d->limit = d->stack + max - 1; 7982 return true; 7983} 7984/* 7985** upb::Encoder 7986** 7987** Since we are implementing pure handlers (ie. without any out-of-band access 7988** to pre-computed lengths), we have to buffer all submessages before we can 7989** emit even their first byte. 7990** 7991** Not knowing the size of submessages also means we can't write a perfect 7992** zero-copy implementation, even with buffering. Lengths are stored as 7993** varints, which means that we don't know how many bytes to reserve for the 7994** length until we know what the length is. 7995** 7996** This leaves us with three main choices: 7997** 7998** 1. buffer all submessage data in a temporary buffer, then copy it exactly 7999** once into the output buffer. 8000** 8001** 2. attempt to buffer data directly into the output buffer, estimating how 8002** many bytes each length will take. When our guesses are wrong, use 8003** memmove() to grow or shrink the allotted space. 8004** 8005** 3. buffer directly into the output buffer, allocating a max length 8006** ahead-of-time for each submessage length. If we overallocated, we waste 8007** space, but no memcpy() or memmove() is required. This approach requires 8008** defining a maximum size for submessages and rejecting submessages that 8009** exceed that size. 8010** 8011** (2) and (3) have the potential to have better performance, but they are more 8012** complicated and subtle to implement: 8013** 8014** (3) requires making an arbitrary choice of the maximum message size; it 8015** wastes space when submessages are shorter than this and fails 8016** completely when they are longer. This makes it more finicky and 8017** requires configuration based on the input. It also makes it impossible 8018** to perfectly match the output of reference encoders that always use the 8019** optimal amount of space for each length. 8020** 8021** (2) requires guessing the the size upfront, and if multiple lengths are 8022** guessed wrong the minimum required number of memmove() operations may 8023** be complicated to compute correctly. Implemented properly, it may have 8024** a useful amortized or average cost, but more investigation is required 8025** to determine this and what the optimal algorithm is to achieve it. 8026** 8027** (1) makes you always pay for exactly one copy, but its implementation is 8028** the simplest and its performance is predictable. 8029** 8030** So for now, we implement (1) only. If we wish to optimize later, we should 8031** be able to do it without affecting users. 8032** 8033** The strategy is to buffer the segments of data that do *not* depend on 8034** unknown lengths in one buffer, and keep a separate buffer of segment pointers 8035** and lengths. When the top-level submessage ends, we can go beginning to end, 8036** alternating the writing of lengths with memcpy() of the rest of the data. 8037** At the top level though, no buffering is required. 8038*/ 8039 8040 8041 8042/* The output buffer is divided into segments; a segment is a string of data 8043 * that is "ready to go" -- it does not need any varint lengths inserted into 8044 * the middle. The seams between segments are where varints will be inserted 8045 * once they are known. 8046 * 8047 * We also use the concept of a "run", which is a range of encoded bytes that 8048 * occur at a single submessage level. Every segment contains one or more runs. 8049 * 8050 * A segment can span messages. Consider: 8051 * 8052 * .--Submessage lengths---------. 8053 * | | | 8054 * | V V 8055 * V | |--------------- | |----------------- 8056 * Submessages: | |----------------------------------------------- 8057 * Top-level msg: ------------------------------------------------------------ 8058 * 8059 * Segments: ----- ------------------- ----------------- 8060 * Runs: *---- *--------------*--- *---------------- 8061 * (* marks the start) 8062 * 8063 * Note that the top-level menssage is not in any segment because it does not 8064 * have any length preceding it. 8065 * 8066 * A segment is only interrupted when another length needs to be inserted. So 8067 * observe how the second segment spans both the inner submessage and part of 8068 * the next enclosing message. */ 8069typedef struct { 8070 uint32_t msglen; /* The length to varint-encode before this segment. */ 8071 uint32_t seglen; /* Length of the segment. */ 8072} upb_pb_encoder_segment; 8073 8074struct upb_pb_encoder { 8075 upb_arena *arena; 8076 8077 /* Our input and output. */ 8078 upb_sink input_; 8079 upb_bytessink output_; 8080 8081 /* The "subclosure" -- used as the inner closure as part of the bytessink 8082 * protocol. */ 8083 void *subc; 8084 8085 /* The output buffer and limit, and our current write position. "buf" 8086 * initially points to "initbuf", but is dynamically allocated if we need to 8087 * grow beyond the initial size. */ 8088 char *buf, *ptr, *limit; 8089 8090 /* The beginning of the current run, or undefined if we are at the top 8091 * level. */ 8092 char *runbegin; 8093 8094 /* The list of segments we are accumulating. */ 8095 upb_pb_encoder_segment *segbuf, *segptr, *seglimit; 8096 8097 /* The stack of enclosing submessages. Each entry in the stack points to the 8098 * segment where this submessage's length is being accumulated. */ 8099 int *stack, *top, *stacklimit; 8100 8101 /* Depth of startmsg/endmsg calls. */ 8102 int depth; 8103}; 8104 8105/* low-level buffering ********************************************************/ 8106 8107/* Low-level functions for interacting with the output buffer. */ 8108 8109/* TODO(haberman): handle pushback */ 8110static void putbuf(upb_pb_encoder *e, const char *buf, size_t len) { 8111 size_t n = upb_bytessink_putbuf(e->output_, e->subc, buf, len, NULL); 8112 UPB_ASSERT(n == len); 8113} 8114 8115static upb_pb_encoder_segment *top(upb_pb_encoder *e) { 8116 return &e->segbuf[*e->top]; 8117} 8118 8119/* Call to ensure that at least "bytes" bytes are available for writing at 8120 * e->ptr. Returns false if the bytes could not be allocated. */ 8121static bool reserve(upb_pb_encoder *e, size_t bytes) { 8122 if ((size_t)(e->limit - e->ptr) < bytes) { 8123 /* Grow buffer. */ 8124 char *new_buf; 8125 size_t needed = bytes + (e->ptr - e->buf); 8126 size_t old_size = e->limit - e->buf; 8127 8128 size_t new_size = old_size; 8129 8130 while (new_size < needed) { 8131 new_size *= 2; 8132 } 8133 8134 new_buf = upb_arena_realloc(e->arena, e->buf, old_size, new_size); 8135 8136 if (new_buf == NULL) { 8137 return false; 8138 } 8139 8140 e->ptr = new_buf + (e->ptr - e->buf); 8141 e->runbegin = new_buf + (e->runbegin - e->buf); 8142 e->limit = new_buf + new_size; 8143 e->buf = new_buf; 8144 } 8145 8146 return true; 8147} 8148 8149/* Call when "bytes" bytes have been writte at e->ptr. The caller *must* have 8150 * previously called reserve() with at least this many bytes. */ 8151static void encoder_advance(upb_pb_encoder *e, size_t bytes) { 8152 UPB_ASSERT((size_t)(e->limit - e->ptr) >= bytes); 8153 e->ptr += bytes; 8154} 8155 8156/* Call when all of the bytes for a handler have been written. Flushes the 8157 * bytes if possible and necessary, returning false if this failed. */ 8158static bool commit(upb_pb_encoder *e) { 8159 if (!e->top) { 8160 /* We aren't inside a delimited region. Flush our accumulated bytes to 8161 * the output. 8162 * 8163 * TODO(haberman): in the future we may want to delay flushing for 8164 * efficiency reasons. */ 8165 putbuf(e, e->buf, e->ptr - e->buf); 8166 e->ptr = e->buf; 8167 } 8168 8169 return true; 8170} 8171 8172/* Writes the given bytes to the buffer, handling reserve/advance. */ 8173static bool encode_bytes(upb_pb_encoder *e, const void *data, size_t len) { 8174 if (!reserve(e, len)) { 8175 return false; 8176 } 8177 8178 memcpy(e->ptr, data, len); 8179 encoder_advance(e, len); 8180 return true; 8181} 8182 8183/* Finish the current run by adding the run totals to the segment and message 8184 * length. */ 8185static void accumulate(upb_pb_encoder *e) { 8186 size_t run_len; 8187 UPB_ASSERT(e->ptr >= e->runbegin); 8188 run_len = e->ptr - e->runbegin; 8189 e->segptr->seglen += run_len; 8190 top(e)->msglen += run_len; 8191 e->runbegin = e->ptr; 8192} 8193 8194/* Call to indicate the start of delimited region for which the full length is 8195 * not yet known. All data will be buffered until the length is known. 8196 * Delimited regions may be nested; their lengths will all be tracked properly. */ 8197static bool start_delim(upb_pb_encoder *e) { 8198 if (e->top) { 8199 /* We are already buffering, advance to the next segment and push it on the 8200 * stack. */ 8201 accumulate(e); 8202 8203 if (++e->top == e->stacklimit) { 8204 /* TODO(haberman): grow stack? */ 8205 return false; 8206 } 8207 8208 if (++e->segptr == e->seglimit) { 8209 /* Grow segment buffer. */ 8210 size_t old_size = 8211 (e->seglimit - e->segbuf) * sizeof(upb_pb_encoder_segment); 8212 size_t new_size = old_size * 2; 8213 upb_pb_encoder_segment *new_buf = 8214 upb_arena_realloc(e->arena, e->segbuf, old_size, new_size); 8215 8216 if (new_buf == NULL) { 8217 return false; 8218 } 8219 8220 e->segptr = new_buf + (e->segptr - e->segbuf); 8221 e->seglimit = new_buf + (new_size / sizeof(upb_pb_encoder_segment)); 8222 e->segbuf = new_buf; 8223 } 8224 } else { 8225 /* We were previously at the top level, start buffering. */ 8226 e->segptr = e->segbuf; 8227 e->top = e->stack; 8228 e->runbegin = e->ptr; 8229 } 8230 8231 *e->top = (int)(e->segptr - e->segbuf); 8232 e->segptr->seglen = 0; 8233 e->segptr->msglen = 0; 8234 8235 return true; 8236} 8237 8238/* Call to indicate the end of a delimited region. We now know the length of 8239 * the delimited region. If we are not nested inside any other delimited 8240 * regions, we can now emit all of the buffered data we accumulated. */ 8241static bool end_delim(upb_pb_encoder *e) { 8242 size_t msglen; 8243 accumulate(e); 8244 msglen = top(e)->msglen; 8245 8246 if (e->top == e->stack) { 8247 /* All lengths are now available, emit all buffered data. */ 8248 char buf[UPB_PB_VARINT_MAX_LEN]; 8249 upb_pb_encoder_segment *s; 8250 const char *ptr = e->buf; 8251 for (s = e->segbuf; s <= e->segptr; s++) { 8252 size_t lenbytes = upb_vencode64(s->msglen, buf); 8253 putbuf(e, buf, lenbytes); 8254 putbuf(e, ptr, s->seglen); 8255 ptr += s->seglen; 8256 } 8257 8258 e->ptr = e->buf; 8259 e->top = NULL; 8260 } else { 8261 /* Need to keep buffering; propagate length info into enclosing 8262 * submessages. */ 8263 --e->top; 8264 top(e)->msglen += msglen + upb_varint_size(msglen); 8265 } 8266 8267 return true; 8268} 8269 8270 8271/* tag_t **********************************************************************/ 8272 8273/* A precomputed (pre-encoded) tag and length. */ 8274 8275typedef struct { 8276 uint8_t bytes; 8277 char tag[7]; 8278} tag_t; 8279 8280/* Allocates a new tag for this field, and sets it in these handlerattr. */ 8281static void new_tag(upb_handlers *h, const upb_fielddef *f, upb_wiretype_t wt, 8282 upb_handlerattr *attr) { 8283 uint32_t n = upb_fielddef_number(f); 8284 8285 tag_t *tag = upb_gmalloc(sizeof(tag_t)); 8286 tag->bytes = upb_vencode64((n << 3) | wt, tag->tag); 8287 8288 attr->handler_data = tag; 8289 upb_handlers_addcleanup(h, tag, upb_gfree); 8290} 8291 8292static bool encode_tag(upb_pb_encoder *e, const tag_t *tag) { 8293 return encode_bytes(e, tag->tag, tag->bytes); 8294} 8295 8296 8297/* encoding of wire types *****************************************************/ 8298 8299static bool encode_fixed64(upb_pb_encoder *e, uint64_t val) { 8300 /* TODO(haberman): byte-swap for big endian. */ 8301 return encode_bytes(e, &val, sizeof(uint64_t)); 8302} 8303 8304static bool encode_fixed32(upb_pb_encoder *e, uint32_t val) { 8305 /* TODO(haberman): byte-swap for big endian. */ 8306 return encode_bytes(e, &val, sizeof(uint32_t)); 8307} 8308 8309static bool encode_varint(upb_pb_encoder *e, uint64_t val) { 8310 if (!reserve(e, UPB_PB_VARINT_MAX_LEN)) { 8311 return false; 8312 } 8313 8314 encoder_advance(e, upb_vencode64(val, e->ptr)); 8315 return true; 8316} 8317 8318static uint64_t dbl2uint64(double d) { 8319 uint64_t ret; 8320 memcpy(&ret, &d, sizeof(uint64_t)); 8321 return ret; 8322} 8323 8324static uint32_t flt2uint32(float d) { 8325 uint32_t ret; 8326 memcpy(&ret, &d, sizeof(uint32_t)); 8327 return ret; 8328} 8329 8330 8331/* encoding of proto types ****************************************************/ 8332 8333static bool startmsg(void *c, const void *hd) { 8334 upb_pb_encoder *e = c; 8335 UPB_UNUSED(hd); 8336 if (e->depth++ == 0) { 8337 upb_bytessink_start(e->output_, 0, &e->subc); 8338 } 8339 return true; 8340} 8341 8342static bool endmsg(void *c, const void *hd, upb_status *status) { 8343 upb_pb_encoder *e = c; 8344 UPB_UNUSED(hd); 8345 UPB_UNUSED(status); 8346 if (--e->depth == 0) { 8347 upb_bytessink_end(e->output_); 8348 } 8349 return true; 8350} 8351 8352static void *encode_startdelimfield(void *c, const void *hd) { 8353 bool ok = encode_tag(c, hd) && commit(c) && start_delim(c); 8354 return ok ? c : UPB_BREAK; 8355} 8356 8357static bool encode_unknown(void *c, const void *hd, const char *buf, 8358 size_t len) { 8359 UPB_UNUSED(hd); 8360 return encode_bytes(c, buf, len) && commit(c); 8361} 8362 8363static bool encode_enddelimfield(void *c, const void *hd) { 8364 UPB_UNUSED(hd); 8365 return end_delim(c); 8366} 8367 8368static void *encode_startgroup(void *c, const void *hd) { 8369 return (encode_tag(c, hd) && commit(c)) ? c : UPB_BREAK; 8370} 8371 8372static bool encode_endgroup(void *c, const void *hd) { 8373 return encode_tag(c, hd) && commit(c); 8374} 8375 8376static void *encode_startstr(void *c, const void *hd, size_t size_hint) { 8377 UPB_UNUSED(size_hint); 8378 return encode_startdelimfield(c, hd); 8379} 8380 8381static size_t encode_strbuf(void *c, const void *hd, const char *buf, 8382 size_t len, const upb_bufhandle *h) { 8383 UPB_UNUSED(hd); 8384 UPB_UNUSED(h); 8385 return encode_bytes(c, buf, len) ? len : 0; 8386} 8387 8388#define T(type, ctype, convert, encode) \ 8389 static bool encode_scalar_##type(void *e, const void *hd, ctype val) { \ 8390 return encode_tag(e, hd) && encode(e, (convert)(val)) && commit(e); \ 8391 } \ 8392 static bool encode_packed_##type(void *e, const void *hd, ctype val) { \ 8393 UPB_UNUSED(hd); \ 8394 return encode(e, (convert)(val)); \ 8395 } 8396 8397T(double, double, dbl2uint64, encode_fixed64) 8398T(float, float, flt2uint32, encode_fixed32) 8399T(int64, int64_t, uint64_t, encode_varint) 8400T(int32, int32_t, int64_t, encode_varint) 8401T(fixed64, uint64_t, uint64_t, encode_fixed64) 8402T(fixed32, uint32_t, uint32_t, encode_fixed32) 8403T(bool, bool, bool, encode_varint) 8404T(uint32, uint32_t, uint32_t, encode_varint) 8405T(uint64, uint64_t, uint64_t, encode_varint) 8406T(enum, int32_t, uint32_t, encode_varint) 8407T(sfixed32, int32_t, uint32_t, encode_fixed32) 8408T(sfixed64, int64_t, uint64_t, encode_fixed64) 8409T(sint32, int32_t, upb_zzenc_32, encode_varint) 8410T(sint64, int64_t, upb_zzenc_64, encode_varint) 8411 8412#undef T 8413 8414 8415/* code to build the handlers *************************************************/ 8416 8417#include <stdio.h> 8418static void newhandlers_callback(const void *closure, upb_handlers *h) { 8419 const upb_msgdef *m; 8420 upb_msg_field_iter i; 8421 8422 UPB_UNUSED(closure); 8423 8424 upb_handlers_setstartmsg(h, startmsg, NULL); 8425 upb_handlers_setendmsg(h, endmsg, NULL); 8426 upb_handlers_setunknown(h, encode_unknown, NULL); 8427 8428 m = upb_handlers_msgdef(h); 8429 for(upb_msg_field_begin(&i, m); 8430 !upb_msg_field_done(&i); 8431 upb_msg_field_next(&i)) { 8432 const upb_fielddef *f = upb_msg_iter_field(&i); 8433 bool packed = upb_fielddef_isseq(f) && upb_fielddef_isprimitive(f) && 8434 upb_fielddef_packed(f); 8435 upb_handlerattr attr = UPB_HANDLERATTR_INIT; 8436 upb_wiretype_t wt = 8437 packed ? UPB_WIRE_TYPE_DELIMITED 8438 : upb_pb_native_wire_types[upb_fielddef_descriptortype(f)]; 8439 8440 /* Pre-encode the tag for this field. */ 8441 new_tag(h, f, wt, &attr); 8442 8443 if (packed) { 8444 upb_handlers_setstartseq(h, f, encode_startdelimfield, &attr); 8445 upb_handlers_setendseq(h, f, encode_enddelimfield, &attr); 8446 } 8447 8448#define T(upper, lower, upbtype) \ 8449 case UPB_DESCRIPTOR_TYPE_##upper: \ 8450 if (packed) { \ 8451 upb_handlers_set##upbtype(h, f, encode_packed_##lower, &attr); \ 8452 } else { \ 8453 upb_handlers_set##upbtype(h, f, encode_scalar_##lower, &attr); \ 8454 } \ 8455 break; 8456 8457 switch (upb_fielddef_descriptortype(f)) { 8458 T(DOUBLE, double, double); 8459 T(FLOAT, float, float); 8460 T(INT64, int64, int64); 8461 T(INT32, int32, int32); 8462 T(FIXED64, fixed64, uint64); 8463 T(FIXED32, fixed32, uint32); 8464 T(BOOL, bool, bool); 8465 T(UINT32, uint32, uint32); 8466 T(UINT64, uint64, uint64); 8467 T(ENUM, enum, int32); 8468 T(SFIXED32, sfixed32, int32); 8469 T(SFIXED64, sfixed64, int64); 8470 T(SINT32, sint32, int32); 8471 T(SINT64, sint64, int64); 8472 case UPB_DESCRIPTOR_TYPE_STRING: 8473 case UPB_DESCRIPTOR_TYPE_BYTES: 8474 upb_handlers_setstartstr(h, f, encode_startstr, &attr); 8475 upb_handlers_setendstr(h, f, encode_enddelimfield, &attr); 8476 upb_handlers_setstring(h, f, encode_strbuf, &attr); 8477 break; 8478 case UPB_DESCRIPTOR_TYPE_MESSAGE: 8479 upb_handlers_setstartsubmsg(h, f, encode_startdelimfield, &attr); 8480 upb_handlers_setendsubmsg(h, f, encode_enddelimfield, &attr); 8481 break; 8482 case UPB_DESCRIPTOR_TYPE_GROUP: { 8483 /* Endgroup takes a different tag (wire_type = END_GROUP). */ 8484 upb_handlerattr attr2 = UPB_HANDLERATTR_INIT; 8485 new_tag(h, f, UPB_WIRE_TYPE_END_GROUP, &attr2); 8486 8487 upb_handlers_setstartsubmsg(h, f, encode_startgroup, &attr); 8488 upb_handlers_setendsubmsg(h, f, encode_endgroup, &attr2); 8489 8490 break; 8491 } 8492 } 8493 8494#undef T 8495 } 8496} 8497 8498void upb_pb_encoder_reset(upb_pb_encoder *e) { 8499 e->segptr = NULL; 8500 e->top = NULL; 8501 e->depth = 0; 8502} 8503 8504 8505/* public API *****************************************************************/ 8506 8507upb_handlercache *upb_pb_encoder_newcache(void) { 8508 return upb_handlercache_new(newhandlers_callback, NULL); 8509} 8510 8511upb_pb_encoder *upb_pb_encoder_create(upb_arena *arena, const upb_handlers *h, 8512 upb_bytessink output) { 8513 const size_t initial_bufsize = 256; 8514 const size_t initial_segbufsize = 16; 8515 /* TODO(haberman): make this configurable. */ 8516 const size_t stack_size = 64; 8517#ifndef NDEBUG 8518 const size_t size_before = upb_arena_bytesallocated(arena); 8519#endif 8520 8521 upb_pb_encoder *e = upb_arena_malloc(arena, sizeof(upb_pb_encoder)); 8522 if (!e) return NULL; 8523 8524 e->buf = upb_arena_malloc(arena, initial_bufsize); 8525 e->segbuf = upb_arena_malloc(arena, initial_segbufsize * sizeof(*e->segbuf)); 8526 e->stack = upb_arena_malloc(arena, stack_size * sizeof(*e->stack)); 8527 8528 if (!e->buf || !e->segbuf || !e->stack) { 8529 return NULL; 8530 } 8531 8532 e->limit = e->buf + initial_bufsize; 8533 e->seglimit = e->segbuf + initial_segbufsize; 8534 e->stacklimit = e->stack + stack_size; 8535 8536 upb_pb_encoder_reset(e); 8537 upb_sink_reset(&e->input_, h, e); 8538 8539 e->arena = arena; 8540 e->output_ = output; 8541 e->subc = output.closure; 8542 e->ptr = e->buf; 8543 8544 /* If this fails, increase the value in encoder.h. */ 8545 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <= 8546 UPB_PB_ENCODER_SIZE); 8547 return e; 8548} 8549 8550upb_sink upb_pb_encoder_input(upb_pb_encoder *e) { return e->input_; } 8551/* 8552 * upb::pb::TextPrinter 8553 * 8554 * OPT: This is not optimized at all. It uses printf() which parses the format 8555 * string every time, and it allocates memory for every put. 8556 */ 8557 8558 8559#include <ctype.h> 8560#include <float.h> 8561#include <inttypes.h> 8562#include <stdarg.h> 8563#include <stdio.h> 8564#include <string.h> 8565 8566 8567 8568struct upb_textprinter { 8569 upb_sink input_; 8570 upb_bytessink output_; 8571 int indent_depth_; 8572 bool single_line_; 8573 void *subc; 8574}; 8575 8576#define CHECK(x) if ((x) < 0) goto err; 8577 8578static const char *shortname(const char *longname) { 8579 const char *last = strrchr(longname, '.'); 8580 return last ? last + 1 : longname; 8581} 8582 8583static int indent(upb_textprinter *p) { 8584 int i; 8585 if (!p->single_line_) 8586 for (i = 0; i < p->indent_depth_; i++) 8587 upb_bytessink_putbuf(p->output_, p->subc, " ", 2, NULL); 8588 return 0; 8589} 8590 8591static int endfield(upb_textprinter *p) { 8592 const char ch = (p->single_line_ ? ' ' : '\n'); 8593 upb_bytessink_putbuf(p->output_, p->subc, &ch, 1, NULL); 8594 return 0; 8595} 8596 8597static int putescaped(upb_textprinter *p, const char *buf, size_t len, 8598 bool preserve_utf8) { 8599 /* Based on CEscapeInternal() from Google's protobuf release. */ 8600 char dstbuf[4096], *dst = dstbuf, *dstend = dstbuf + sizeof(dstbuf); 8601 const char *end = buf + len; 8602 8603 /* I think hex is prettier and more useful, but proto2 uses octal; should 8604 * investigate whether it can parse hex also. */ 8605 const bool use_hex = false; 8606 bool last_hex_escape = false; /* true if last output char was \xNN */ 8607 8608 for (; buf < end; buf++) { 8609 bool is_hex_escape; 8610 8611 if (dstend - dst < 4) { 8612 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); 8613 dst = dstbuf; 8614 } 8615 8616 is_hex_escape = false; 8617 switch (*buf) { 8618 case '\n': *(dst++) = '\\'; *(dst++) = 'n'; break; 8619 case '\r': *(dst++) = '\\'; *(dst++) = 'r'; break; 8620 case '\t': *(dst++) = '\\'; *(dst++) = 't'; break; 8621 case '\"': *(dst++) = '\\'; *(dst++) = '\"'; break; 8622 case '\'': *(dst++) = '\\'; *(dst++) = '\''; break; 8623 case '\\': *(dst++) = '\\'; *(dst++) = '\\'; break; 8624 default: 8625 /* Note that if we emit \xNN and the buf character after that is a hex 8626 * digit then that digit must be escaped too to prevent it being 8627 * interpreted as part of the character code by C. */ 8628 if ((!preserve_utf8 || (uint8_t)*buf < 0x80) && 8629 (!isprint(*buf) || (last_hex_escape && isxdigit(*buf)))) { 8630 sprintf(dst, (use_hex ? "\\x%02x" : "\\%03o"), (uint8_t)*buf); 8631 is_hex_escape = use_hex; 8632 dst += 4; 8633 } else { 8634 *(dst++) = *buf; break; 8635 } 8636 } 8637 last_hex_escape = is_hex_escape; 8638 } 8639 /* Flush remaining data. */ 8640 upb_bytessink_putbuf(p->output_, p->subc, dstbuf, dst - dstbuf, NULL); 8641 return 0; 8642} 8643 8644bool putf(upb_textprinter *p, const char *fmt, ...) { 8645 va_list args; 8646 va_list args_copy; 8647 char *str; 8648 int written; 8649 int len; 8650 bool ok; 8651 8652 va_start(args, fmt); 8653 8654 /* Run once to get the length of the string. */ 8655 _upb_va_copy(args_copy, args); 8656 len = _upb_vsnprintf(NULL, 0, fmt, args_copy); 8657 va_end(args_copy); 8658 8659 /* + 1 for NULL terminator (vsprintf() requires it even if we don't). */ 8660 str = upb_gmalloc(len + 1); 8661 if (!str) return false; 8662 written = vsprintf(str, fmt, args); 8663 va_end(args); 8664 UPB_ASSERT(written == len); 8665 8666 ok = upb_bytessink_putbuf(p->output_, p->subc, str, len, NULL); 8667 upb_gfree(str); 8668 return ok; 8669} 8670 8671 8672/* handlers *******************************************************************/ 8673 8674static bool textprinter_startmsg(void *c, const void *hd) { 8675 upb_textprinter *p = c; 8676 UPB_UNUSED(hd); 8677 if (p->indent_depth_ == 0) { 8678 upb_bytessink_start(p->output_, 0, &p->subc); 8679 } 8680 return true; 8681} 8682 8683static bool textprinter_endmsg(void *c, const void *hd, upb_status *s) { 8684 upb_textprinter *p = c; 8685 UPB_UNUSED(hd); 8686 UPB_UNUSED(s); 8687 if (p->indent_depth_ == 0) { 8688 upb_bytessink_end(p->output_); 8689 } 8690 return true; 8691} 8692 8693#define TYPE(name, ctype, fmt) \ 8694 static bool textprinter_put ## name(void *closure, const void *handler_data, \ 8695 ctype val) { \ 8696 upb_textprinter *p = closure; \ 8697 const upb_fielddef *f = handler_data; \ 8698 CHECK(indent(p)); \ 8699 putf(p, "%s: " fmt, upb_fielddef_name(f), val); \ 8700 CHECK(endfield(p)); \ 8701 return true; \ 8702 err: \ 8703 return false; \ 8704} 8705 8706static bool textprinter_putbool(void *closure, const void *handler_data, 8707 bool val) { 8708 upb_textprinter *p = closure; 8709 const upb_fielddef *f = handler_data; 8710 CHECK(indent(p)); 8711 putf(p, "%s: %s", upb_fielddef_name(f), val ? "true" : "false"); 8712 CHECK(endfield(p)); 8713 return true; 8714err: 8715 return false; 8716} 8717 8718#define STRINGIFY_HELPER(x) #x 8719#define STRINGIFY_MACROVAL(x) STRINGIFY_HELPER(x) 8720 8721TYPE(int32, int32_t, "%" PRId32) 8722TYPE(int64, int64_t, "%" PRId64) 8723TYPE(uint32, uint32_t, "%" PRIu32) 8724TYPE(uint64, uint64_t, "%" PRIu64) 8725TYPE(float, float, "%." STRINGIFY_MACROVAL(FLT_DIG) "g") 8726TYPE(double, double, "%." STRINGIFY_MACROVAL(DBL_DIG) "g") 8727 8728#undef TYPE 8729 8730/* Output a symbolic value from the enum if found, else just print as int32. */ 8731static bool textprinter_putenum(void *closure, const void *handler_data, 8732 int32_t val) { 8733 upb_textprinter *p = closure; 8734 const upb_fielddef *f = handler_data; 8735 const upb_enumdef *enum_def = upb_fielddef_enumsubdef(f); 8736 const char *label = upb_enumdef_iton(enum_def, val); 8737 if (label) { 8738 indent(p); 8739 putf(p, "%s: %s", upb_fielddef_name(f), label); 8740 endfield(p); 8741 } else { 8742 if (!textprinter_putint32(closure, handler_data, val)) 8743 return false; 8744 } 8745 return true; 8746} 8747 8748static void *textprinter_startstr(void *closure, const void *handler_data, 8749 size_t size_hint) { 8750 upb_textprinter *p = closure; 8751 const upb_fielddef *f = handler_data; 8752 UPB_UNUSED(size_hint); 8753 indent(p); 8754 putf(p, "%s: \"", upb_fielddef_name(f)); 8755 return p; 8756} 8757 8758static bool textprinter_endstr(void *closure, const void *handler_data) { 8759 upb_textprinter *p = closure; 8760 UPB_UNUSED(handler_data); 8761 putf(p, "\""); 8762 endfield(p); 8763 return true; 8764} 8765 8766static size_t textprinter_putstr(void *closure, const void *hd, const char *buf, 8767 size_t len, const upb_bufhandle *handle) { 8768 upb_textprinter *p = closure; 8769 const upb_fielddef *f = hd; 8770 UPB_UNUSED(handle); 8771 CHECK(putescaped(p, buf, len, upb_fielddef_type(f) == UPB_TYPE_STRING)); 8772 return len; 8773err: 8774 return 0; 8775} 8776 8777static void *textprinter_startsubmsg(void *closure, const void *handler_data) { 8778 upb_textprinter *p = closure; 8779 const char *name = handler_data; 8780 CHECK(indent(p)); 8781 putf(p, "%s {%c", name, p->single_line_ ? ' ' : '\n'); 8782 p->indent_depth_++; 8783 return p; 8784err: 8785 return UPB_BREAK; 8786} 8787 8788static bool textprinter_endsubmsg(void *closure, const void *handler_data) { 8789 upb_textprinter *p = closure; 8790 UPB_UNUSED(handler_data); 8791 p->indent_depth_--; 8792 CHECK(indent(p)); 8793 upb_bytessink_putbuf(p->output_, p->subc, "}", 1, NULL); 8794 CHECK(endfield(p)); 8795 return true; 8796err: 8797 return false; 8798} 8799 8800static void onmreg(const void *c, upb_handlers *h) { 8801 const upb_msgdef *m = upb_handlers_msgdef(h); 8802 upb_msg_field_iter i; 8803 UPB_UNUSED(c); 8804 8805 upb_handlers_setstartmsg(h, textprinter_startmsg, NULL); 8806 upb_handlers_setendmsg(h, textprinter_endmsg, NULL); 8807 8808 for(upb_msg_field_begin(&i, m); 8809 !upb_msg_field_done(&i); 8810 upb_msg_field_next(&i)) { 8811 upb_fielddef *f = upb_msg_iter_field(&i); 8812 upb_handlerattr attr = UPB_HANDLERATTR_INIT; 8813 attr.handler_data = f; 8814 switch (upb_fielddef_type(f)) { 8815 case UPB_TYPE_INT32: 8816 upb_handlers_setint32(h, f, textprinter_putint32, &attr); 8817 break; 8818 case UPB_TYPE_INT64: 8819 upb_handlers_setint64(h, f, textprinter_putint64, &attr); 8820 break; 8821 case UPB_TYPE_UINT32: 8822 upb_handlers_setuint32(h, f, textprinter_putuint32, &attr); 8823 break; 8824 case UPB_TYPE_UINT64: 8825 upb_handlers_setuint64(h, f, textprinter_putuint64, &attr); 8826 break; 8827 case UPB_TYPE_FLOAT: 8828 upb_handlers_setfloat(h, f, textprinter_putfloat, &attr); 8829 break; 8830 case UPB_TYPE_DOUBLE: 8831 upb_handlers_setdouble(h, f, textprinter_putdouble, &attr); 8832 break; 8833 case UPB_TYPE_BOOL: 8834 upb_handlers_setbool(h, f, textprinter_putbool, &attr); 8835 break; 8836 case UPB_TYPE_STRING: 8837 case UPB_TYPE_BYTES: 8838 upb_handlers_setstartstr(h, f, textprinter_startstr, &attr); 8839 upb_handlers_setstring(h, f, textprinter_putstr, &attr); 8840 upb_handlers_setendstr(h, f, textprinter_endstr, &attr); 8841 break; 8842 case UPB_TYPE_MESSAGE: { 8843 const char *name = 8844 upb_fielddef_descriptortype(f) == UPB_DESCRIPTOR_TYPE_GROUP 8845 ? shortname(upb_msgdef_fullname(upb_fielddef_msgsubdef(f))) 8846 : upb_fielddef_name(f); 8847 attr.handler_data = name; 8848 upb_handlers_setstartsubmsg(h, f, textprinter_startsubmsg, &attr); 8849 upb_handlers_setendsubmsg(h, f, textprinter_endsubmsg, &attr); 8850 break; 8851 } 8852 case UPB_TYPE_ENUM: 8853 upb_handlers_setint32(h, f, textprinter_putenum, &attr); 8854 break; 8855 } 8856 } 8857} 8858 8859static void textprinter_reset(upb_textprinter *p, bool single_line) { 8860 p->single_line_ = single_line; 8861 p->indent_depth_ = 0; 8862} 8863 8864 8865/* Public API *****************************************************************/ 8866 8867upb_textprinter *upb_textprinter_create(upb_arena *arena, const upb_handlers *h, 8868 upb_bytessink output) { 8869 upb_textprinter *p = upb_arena_malloc(arena, sizeof(upb_textprinter)); 8870 if (!p) return NULL; 8871 8872 p->output_ = output; 8873 upb_sink_reset(&p->input_, h, p); 8874 textprinter_reset(p, false); 8875 8876 return p; 8877} 8878 8879upb_handlercache *upb_textprinter_newcache(void) { 8880 return upb_handlercache_new(&onmreg, NULL); 8881} 8882 8883upb_sink upb_textprinter_input(upb_textprinter *p) { return p->input_; } 8884 8885void upb_textprinter_setsingleline(upb_textprinter *p, bool single_line) { 8886 p->single_line_ = single_line; 8887} 8888 8889 8890/* Index is descriptor type. */ 8891const uint8_t upb_pb_native_wire_types[] = { 8892 UPB_WIRE_TYPE_END_GROUP, /* ENDGROUP */ 8893 UPB_WIRE_TYPE_64BIT, /* DOUBLE */ 8894 UPB_WIRE_TYPE_32BIT, /* FLOAT */ 8895 UPB_WIRE_TYPE_VARINT, /* INT64 */ 8896 UPB_WIRE_TYPE_VARINT, /* UINT64 */ 8897 UPB_WIRE_TYPE_VARINT, /* INT32 */ 8898 UPB_WIRE_TYPE_64BIT, /* FIXED64 */ 8899 UPB_WIRE_TYPE_32BIT, /* FIXED32 */ 8900 UPB_WIRE_TYPE_VARINT, /* BOOL */ 8901 UPB_WIRE_TYPE_DELIMITED, /* STRING */ 8902 UPB_WIRE_TYPE_START_GROUP, /* GROUP */ 8903 UPB_WIRE_TYPE_DELIMITED, /* MESSAGE */ 8904 UPB_WIRE_TYPE_DELIMITED, /* BYTES */ 8905 UPB_WIRE_TYPE_VARINT, /* UINT32 */ 8906 UPB_WIRE_TYPE_VARINT, /* ENUM */ 8907 UPB_WIRE_TYPE_32BIT, /* SFIXED32 */ 8908 UPB_WIRE_TYPE_64BIT, /* SFIXED64 */ 8909 UPB_WIRE_TYPE_VARINT, /* SINT32 */ 8910 UPB_WIRE_TYPE_VARINT, /* SINT64 */ 8911}; 8912 8913/* A basic branch-based decoder, uses 32-bit values to get good performance 8914 * on 32-bit architectures (but performs well on 64-bits also). 8915 * This scheme comes from the original Google Protobuf implementation 8916 * (proto2). */ 8917upb_decoderet upb_vdecode_max8_branch32(upb_decoderet r) { 8918 upb_decoderet err = {NULL, 0}; 8919 const char *p = r.p; 8920 uint32_t low = (uint32_t)r.val; 8921 uint32_t high = 0; 8922 uint32_t b; 8923 b = *(p++); low |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; 8924 b = *(p++); low |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; 8925 b = *(p++); low |= (b & 0x7fU) << 28; 8926 high = (b & 0x7fU) >> 4; if (!(b & 0x80)) goto done; 8927 b = *(p++); high |= (b & 0x7fU) << 3; if (!(b & 0x80)) goto done; 8928 b = *(p++); high |= (b & 0x7fU) << 10; if (!(b & 0x80)) goto done; 8929 b = *(p++); high |= (b & 0x7fU) << 17; if (!(b & 0x80)) goto done; 8930 b = *(p++); high |= (b & 0x7fU) << 24; if (!(b & 0x80)) goto done; 8931 b = *(p++); high |= (b & 0x7fU) << 31; if (!(b & 0x80)) goto done; 8932 return err; 8933 8934done: 8935 r.val = ((uint64_t)high << 32) | low; 8936 r.p = p; 8937 return r; 8938} 8939 8940/* Like the previous, but uses 64-bit values. */ 8941upb_decoderet upb_vdecode_max8_branch64(upb_decoderet r) { 8942 const char *p = r.p; 8943 uint64_t val = r.val; 8944 uint64_t b; 8945 upb_decoderet err = {NULL, 0}; 8946 b = *(p++); val |= (b & 0x7fU) << 14; if (!(b & 0x80)) goto done; 8947 b = *(p++); val |= (b & 0x7fU) << 21; if (!(b & 0x80)) goto done; 8948 b = *(p++); val |= (b & 0x7fU) << 28; if (!(b & 0x80)) goto done; 8949 b = *(p++); val |= (b & 0x7fU) << 35; if (!(b & 0x80)) goto done; 8950 b = *(p++); val |= (b & 0x7fU) << 42; if (!(b & 0x80)) goto done; 8951 b = *(p++); val |= (b & 0x7fU) << 49; if (!(b & 0x80)) goto done; 8952 b = *(p++); val |= (b & 0x7fU) << 56; if (!(b & 0x80)) goto done; 8953 b = *(p++); val |= (b & 0x7fU) << 63; if (!(b & 0x80)) goto done; 8954 return err; 8955 8956done: 8957 r.val = val; 8958 r.p = p; 8959 return r; 8960} 8961 8962#line 1 "upb/json/parser.rl" 8963/* 8964** upb::json::Parser (upb_json_parser) 8965** 8966** A parser that uses the Ragel State Machine Compiler to generate 8967** the finite automata. 8968** 8969** Ragel only natively handles regular languages, but we can manually 8970** program it a bit to handle context-free languages like JSON, by using 8971** the "fcall" and "fret" constructs. 8972** 8973** This parser can handle the basics, but needs several things to be fleshed 8974** out: 8975** 8976** - handling of unicode escape sequences (including high surrogate pairs). 8977** - properly check and report errors for unknown fields, stack overflow, 8978** improper array nesting (or lack of nesting). 8979** - handling of base64 sequences with padding characters. 8980** - handling of push-back (non-success returns from sink functions). 8981** - handling of keys/escape-sequences/etc that span input buffers. 8982*/ 8983 8984#include <ctype.h> 8985#include <errno.h> 8986#include <float.h> 8987#include <math.h> 8988#include <stdint.h> 8989#include <stdio.h> 8990#include <stdlib.h> 8991#include <string.h> 8992 8993#include <time.h> 8994 8995 8996 8997#define UPB_JSON_MAX_DEPTH 64 8998 8999/* Type of value message */ 9000enum { 9001 VALUE_NULLVALUE = 0, 9002 VALUE_NUMBERVALUE = 1, 9003 VALUE_STRINGVALUE = 2, 9004 VALUE_BOOLVALUE = 3, 9005 VALUE_STRUCTVALUE = 4, 9006 VALUE_LISTVALUE = 5 9007}; 9008 9009/* Forward declare */ 9010static bool is_top_level(upb_json_parser *p); 9011static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type); 9012static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type); 9013 9014static bool is_number_wrapper_object(upb_json_parser *p); 9015static bool does_number_wrapper_start(upb_json_parser *p); 9016static bool does_number_wrapper_end(upb_json_parser *p); 9017 9018static bool is_string_wrapper_object(upb_json_parser *p); 9019static bool does_string_wrapper_start(upb_json_parser *p); 9020static bool does_string_wrapper_end(upb_json_parser *p); 9021 9022static bool does_fieldmask_start(upb_json_parser *p); 9023static bool does_fieldmask_end(upb_json_parser *p); 9024static void start_fieldmask_object(upb_json_parser *p); 9025static void end_fieldmask_object(upb_json_parser *p); 9026 9027static void start_wrapper_object(upb_json_parser *p); 9028static void end_wrapper_object(upb_json_parser *p); 9029 9030static void start_value_object(upb_json_parser *p, int value_type); 9031static void end_value_object(upb_json_parser *p); 9032 9033static void start_listvalue_object(upb_json_parser *p); 9034static void end_listvalue_object(upb_json_parser *p); 9035 9036static void start_structvalue_object(upb_json_parser *p); 9037static void end_structvalue_object(upb_json_parser *p); 9038 9039static void start_object(upb_json_parser *p); 9040static void end_object(upb_json_parser *p); 9041 9042static void start_any_object(upb_json_parser *p, const char *ptr); 9043static bool end_any_object(upb_json_parser *p, const char *ptr); 9044 9045static bool start_subobject(upb_json_parser *p); 9046static void end_subobject(upb_json_parser *p); 9047 9048static void start_member(upb_json_parser *p); 9049static void end_member(upb_json_parser *p); 9050static bool end_membername(upb_json_parser *p); 9051 9052static void start_any_member(upb_json_parser *p, const char *ptr); 9053static void end_any_member(upb_json_parser *p, const char *ptr); 9054static bool end_any_membername(upb_json_parser *p); 9055 9056size_t parse(void *closure, const void *hd, const char *buf, size_t size, 9057 const upb_bufhandle *handle); 9058static bool end(void *closure, const void *hd); 9059 9060static const char eof_ch = 'e'; 9061 9062/* stringsink */ 9063typedef struct { 9064 upb_byteshandler handler; 9065 upb_bytessink sink; 9066 char *ptr; 9067 size_t len, size; 9068} upb_stringsink; 9069 9070 9071static void *stringsink_start(void *_sink, const void *hd, size_t size_hint) { 9072 upb_stringsink *sink = _sink; 9073 sink->len = 0; 9074 UPB_UNUSED(hd); 9075 UPB_UNUSED(size_hint); 9076 return sink; 9077} 9078 9079static size_t stringsink_string(void *_sink, const void *hd, const char *ptr, 9080 size_t len, const upb_bufhandle *handle) { 9081 upb_stringsink *sink = _sink; 9082 size_t new_size = sink->size; 9083 9084 UPB_UNUSED(hd); 9085 UPB_UNUSED(handle); 9086 9087 while (sink->len + len > new_size) { 9088 new_size *= 2; 9089 } 9090 9091 if (new_size != sink->size) { 9092 sink->ptr = realloc(sink->ptr, new_size); 9093 sink->size = new_size; 9094 } 9095 9096 memcpy(sink->ptr + sink->len, ptr, len); 9097 sink->len += len; 9098 9099 return len; 9100} 9101 9102void upb_stringsink_init(upb_stringsink *sink) { 9103 upb_byteshandler_init(&sink->handler); 9104 upb_byteshandler_setstartstr(&sink->handler, stringsink_start, NULL); 9105 upb_byteshandler_setstring(&sink->handler, stringsink_string, NULL); 9106 9107 upb_bytessink_reset(&sink->sink, &sink->handler, sink); 9108 9109 sink->size = 32; 9110 sink->ptr = malloc(sink->size); 9111 sink->len = 0; 9112} 9113 9114void upb_stringsink_uninit(upb_stringsink *sink) { free(sink->ptr); } 9115 9116typedef struct { 9117 /* For encoding Any value field in binary format. */ 9118 upb_handlercache *encoder_handlercache; 9119 upb_stringsink stringsink; 9120 9121 /* For decoding Any value field in json format. */ 9122 upb_json_codecache *parser_codecache; 9123 upb_sink sink; 9124 upb_json_parser *parser; 9125 9126 /* Mark the range of uninterpreted values in json input before type url. */ 9127 const char *before_type_url_start; 9128 const char *before_type_url_end; 9129 9130 /* Mark the range of uninterpreted values in json input after type url. */ 9131 const char *after_type_url_start; 9132} upb_jsonparser_any_frame; 9133 9134typedef struct { 9135 upb_sink sink; 9136 9137 /* The current message in which we're parsing, and the field whose value we're 9138 * expecting next. */ 9139 const upb_msgdef *m; 9140 const upb_fielddef *f; 9141 9142 /* The table mapping json name to fielddef for this message. */ 9143 const upb_strtable *name_table; 9144 9145 /* We are in a repeated-field context. We need this flag to decide whether to 9146 * handle the array as a normal repeated field or a 9147 * google.protobuf.ListValue/google.protobuf.Value. */ 9148 bool is_repeated; 9149 9150 /* We are in a repeated-field context, ready to emit mapentries as 9151 * submessages. This flag alters the start-of-object (open-brace) behavior to 9152 * begin a sequence of mapentry messages rather than a single submessage. */ 9153 bool is_map; 9154 9155 /* We are in a map-entry message context. This flag is set when parsing the 9156 * value field of a single map entry and indicates to all value-field parsers 9157 * (subobjects, strings, numbers, and bools) that the map-entry submessage 9158 * should end as soon as the value is parsed. */ 9159 bool is_mapentry; 9160 9161 /* If |is_map| or |is_mapentry| is true, |mapfield| refers to the parent 9162 * message's map field that we're currently parsing. This differs from |f| 9163 * because |f| is the field in the *current* message (i.e., the map-entry 9164 * message itself), not the parent's field that leads to this map. */ 9165 const upb_fielddef *mapfield; 9166 9167 /* We are in an Any message context. This flag is set when parsing the Any 9168 * message and indicates to all field parsers (subobjects, strings, numbers, 9169 * and bools) that the parsed field should be serialized as binary data or 9170 * cached (type url not found yet). */ 9171 bool is_any; 9172 9173 /* The type of packed message in Any. */ 9174 upb_jsonparser_any_frame *any_frame; 9175 9176 /* True if the field to be parsed is unknown. */ 9177 bool is_unknown_field; 9178} upb_jsonparser_frame; 9179 9180static void init_frame(upb_jsonparser_frame* frame) { 9181 frame->m = NULL; 9182 frame->f = NULL; 9183 frame->name_table = NULL; 9184 frame->is_repeated = false; 9185 frame->is_map = false; 9186 frame->is_mapentry = false; 9187 frame->mapfield = NULL; 9188 frame->is_any = false; 9189 frame->any_frame = NULL; 9190 frame->is_unknown_field = false; 9191} 9192 9193struct upb_json_parser { 9194 upb_arena *arena; 9195 const upb_json_parsermethod *method; 9196 upb_bytessink input_; 9197 9198 /* Stack to track the JSON scopes we are in. */ 9199 upb_jsonparser_frame stack[UPB_JSON_MAX_DEPTH]; 9200 upb_jsonparser_frame *top; 9201 upb_jsonparser_frame *limit; 9202 9203 upb_status *status; 9204 9205 /* Ragel's internal parsing stack for the parsing state machine. */ 9206 int current_state; 9207 int parser_stack[UPB_JSON_MAX_DEPTH]; 9208 int parser_top; 9209 9210 /* The handle for the current buffer. */ 9211 const upb_bufhandle *handle; 9212 9213 /* Accumulate buffer. See details in parser.rl. */ 9214 const char *accumulated; 9215 size_t accumulated_len; 9216 char *accumulate_buf; 9217 size_t accumulate_buf_size; 9218 9219 /* Multi-part text data. See details in parser.rl. */ 9220 int multipart_state; 9221 upb_selector_t string_selector; 9222 9223 /* Input capture. See details in parser.rl. */ 9224 const char *capture; 9225 9226 /* Intermediate result of parsing a unicode escape sequence. */ 9227 uint32_t digit; 9228 9229 /* For resolve type url in Any. */ 9230 const upb_symtab *symtab; 9231 9232 /* Whether to proceed if unknown field is met. */ 9233 bool ignore_json_unknown; 9234 9235 /* Cache for parsing timestamp due to base and zone are handled in different 9236 * handlers. */ 9237 struct tm tm; 9238}; 9239 9240static upb_jsonparser_frame* start_jsonparser_frame(upb_json_parser *p) { 9241 upb_jsonparser_frame *inner; 9242 inner = p->top + 1; 9243 init_frame(inner); 9244 return inner; 9245} 9246 9247struct upb_json_codecache { 9248 upb_arena *arena; 9249 upb_inttable methods; /* upb_msgdef* -> upb_json_parsermethod* */ 9250}; 9251 9252struct upb_json_parsermethod { 9253 const upb_json_codecache *cache; 9254 upb_byteshandler input_handler_; 9255 9256 /* Maps json_name -> fielddef */ 9257 upb_strtable name_table; 9258}; 9259 9260#define PARSER_CHECK_RETURN(x) if (!(x)) return false 9261 9262static upb_jsonparser_any_frame *json_parser_any_frame_new( 9263 upb_json_parser *p) { 9264 upb_jsonparser_any_frame *frame; 9265 9266 frame = upb_arena_malloc(p->arena, sizeof(upb_jsonparser_any_frame)); 9267 9268 frame->encoder_handlercache = upb_pb_encoder_newcache(); 9269 frame->parser_codecache = upb_json_codecache_new(); 9270 frame->parser = NULL; 9271 frame->before_type_url_start = NULL; 9272 frame->before_type_url_end = NULL; 9273 frame->after_type_url_start = NULL; 9274 9275 upb_stringsink_init(&frame->stringsink); 9276 9277 return frame; 9278} 9279 9280static void json_parser_any_frame_set_payload_type( 9281 upb_json_parser *p, 9282 upb_jsonparser_any_frame *frame, 9283 const upb_msgdef *payload_type) { 9284 const upb_handlers *h; 9285 const upb_json_parsermethod *parser_method; 9286 upb_pb_encoder *encoder; 9287 9288 /* Initialize encoder. */ 9289 h = upb_handlercache_get(frame->encoder_handlercache, payload_type); 9290 encoder = upb_pb_encoder_create(p->arena, h, frame->stringsink.sink); 9291 9292 /* Initialize parser. */ 9293 parser_method = upb_json_codecache_get(frame->parser_codecache, payload_type); 9294 upb_sink_reset(&frame->sink, h, encoder); 9295 frame->parser = 9296 upb_json_parser_create(p->arena, parser_method, p->symtab, frame->sink, 9297 p->status, p->ignore_json_unknown); 9298} 9299 9300static void json_parser_any_frame_free(upb_jsonparser_any_frame *frame) { 9301 upb_handlercache_free(frame->encoder_handlercache); 9302 upb_json_codecache_free(frame->parser_codecache); 9303 upb_stringsink_uninit(&frame->stringsink); 9304} 9305 9306static bool json_parser_any_frame_has_type_url( 9307 upb_jsonparser_any_frame *frame) { 9308 return frame->parser != NULL; 9309} 9310 9311static bool json_parser_any_frame_has_value_before_type_url( 9312 upb_jsonparser_any_frame *frame) { 9313 return frame->before_type_url_start != frame->before_type_url_end; 9314} 9315 9316static bool json_parser_any_frame_has_value_after_type_url( 9317 upb_jsonparser_any_frame *frame) { 9318 return frame->after_type_url_start != NULL; 9319} 9320 9321static bool json_parser_any_frame_has_value( 9322 upb_jsonparser_any_frame *frame) { 9323 return json_parser_any_frame_has_value_before_type_url(frame) || 9324 json_parser_any_frame_has_value_after_type_url(frame); 9325} 9326 9327static void json_parser_any_frame_set_before_type_url_end( 9328 upb_jsonparser_any_frame *frame, 9329 const char *ptr) { 9330 if (frame->parser == NULL) { 9331 frame->before_type_url_end = ptr; 9332 } 9333} 9334 9335static void json_parser_any_frame_set_after_type_url_start_once( 9336 upb_jsonparser_any_frame *frame, 9337 const char *ptr) { 9338 if (json_parser_any_frame_has_type_url(frame) && 9339 frame->after_type_url_start == NULL) { 9340 frame->after_type_url_start = ptr; 9341 } 9342} 9343 9344/* Used to signal that a capture has been suspended. */ 9345static char suspend_capture; 9346 9347static upb_selector_t getsel_for_handlertype(upb_json_parser *p, 9348 upb_handlertype_t type) { 9349 upb_selector_t sel; 9350 bool ok = upb_handlers_getselector(p->top->f, type, &sel); 9351 UPB_ASSUME(ok); 9352 return sel; 9353} 9354 9355static upb_selector_t parser_getsel(upb_json_parser *p) { 9356 return getsel_for_handlertype( 9357 p, upb_handlers_getprimitivehandlertype(p->top->f)); 9358} 9359 9360static bool check_stack(upb_json_parser *p) { 9361 if ((p->top + 1) == p->limit) { 9362 upb_status_seterrmsg(p->status, "Nesting too deep"); 9363 return false; 9364 } 9365 9366 return true; 9367} 9368 9369static void set_name_table(upb_json_parser *p, upb_jsonparser_frame *frame) { 9370 upb_value v; 9371 const upb_json_codecache *cache = p->method->cache; 9372 bool ok; 9373 const upb_json_parsermethod *method; 9374 9375 ok = upb_inttable_lookupptr(&cache->methods, frame->m, &v); 9376 UPB_ASSUME(ok); 9377 method = upb_value_getconstptr(v); 9378 9379 frame->name_table = &method->name_table; 9380} 9381 9382/* There are GCC/Clang built-ins for overflow checking which we could start 9383 * using if there was any performance benefit to it. */ 9384 9385static bool checked_add(size_t a, size_t b, size_t *c) { 9386 if (SIZE_MAX - a < b) return false; 9387 *c = a + b; 9388 return true; 9389} 9390 9391static size_t saturating_multiply(size_t a, size_t b) { 9392 /* size_t is unsigned, so this is defined behavior even on overflow. */ 9393 size_t ret = a * b; 9394 if (b != 0 && ret / b != a) { 9395 ret = SIZE_MAX; 9396 } 9397 return ret; 9398} 9399 9400 9401/* Base64 decoding ************************************************************/ 9402 9403/* TODO(haberman): make this streaming. */ 9404 9405static const signed char b64table[] = { 9406 -1, -1, -1, -1, -1, -1, -1, -1, 9407 -1, -1, -1, -1, -1, -1, -1, -1, 9408 -1, -1, -1, -1, -1, -1, -1, -1, 9409 -1, -1, -1, -1, -1, -1, -1, -1, 9410 -1, -1, -1, -1, -1, -1, -1, -1, 9411 -1, -1, -1, 62/*+*/, -1, -1, -1, 63/*/ */, 9412 52/*0*/, 53/*1*/, 54/*2*/, 55/*3*/, 56/*4*/, 57/*5*/, 58/*6*/, 59/*7*/, 9413 60/*8*/, 61/*9*/, -1, -1, -1, -1, -1, -1, 9414 -1, 0/*A*/, 1/*B*/, 2/*C*/, 3/*D*/, 4/*E*/, 5/*F*/, 6/*G*/, 9415 07/*H*/, 8/*I*/, 9/*J*/, 10/*K*/, 11/*L*/, 12/*M*/, 13/*N*/, 14/*O*/, 9416 15/*P*/, 16/*Q*/, 17/*R*/, 18/*S*/, 19/*T*/, 20/*U*/, 21/*V*/, 22/*W*/, 9417 23/*X*/, 24/*Y*/, 25/*Z*/, -1, -1, -1, -1, -1, 9418 -1, 26/*a*/, 27/*b*/, 28/*c*/, 29/*d*/, 30/*e*/, 31/*f*/, 32/*g*/, 9419 33/*h*/, 34/*i*/, 35/*j*/, 36/*k*/, 37/*l*/, 38/*m*/, 39/*n*/, 40/*o*/, 9420 41/*p*/, 42/*q*/, 43/*r*/, 44/*s*/, 45/*t*/, 46/*u*/, 47/*v*/, 48/*w*/, 9421 49/*x*/, 50/*y*/, 51/*z*/, -1, -1, -1, -1, -1, 9422 -1, -1, -1, -1, -1, -1, -1, -1, 9423 -1, -1, -1, -1, -1, -1, -1, -1, 9424 -1, -1, -1, -1, -1, -1, -1, -1, 9425 -1, -1, -1, -1, -1, -1, -1, -1, 9426 -1, -1, -1, -1, -1, -1, -1, -1, 9427 -1, -1, -1, -1, -1, -1, -1, -1, 9428 -1, -1, -1, -1, -1, -1, -1, -1, 9429 -1, -1, -1, -1, -1, -1, -1, -1, 9430 -1, -1, -1, -1, -1, -1, -1, -1, 9431 -1, -1, -1, -1, -1, -1, -1, -1, 9432 -1, -1, -1, -1, -1, -1, -1, -1, 9433 -1, -1, -1, -1, -1, -1, -1, -1, 9434 -1, -1, -1, -1, -1, -1, -1, -1, 9435 -1, -1, -1, -1, -1, -1, -1, -1, 9436 -1, -1, -1, -1, -1, -1, -1, -1, 9437 -1, -1, -1, -1, -1, -1, -1, -1 9438}; 9439 9440/* Returns the table value sign-extended to 32 bits. Knowing that the upper 9441 * bits will be 1 for unrecognized characters makes it easier to check for 9442 * this error condition later (see below). */ 9443int32_t b64lookup(unsigned char ch) { return b64table[ch]; } 9444 9445/* Returns true if the given character is not a valid base64 character or 9446 * padding. */ 9447bool nonbase64(unsigned char ch) { return b64lookup(ch) == -1 && ch != '='; } 9448 9449static bool base64_push(upb_json_parser *p, upb_selector_t sel, const char *ptr, 9450 size_t len) { 9451 const char *limit = ptr + len; 9452 for (; ptr < limit; ptr += 4) { 9453 uint32_t val; 9454 char output[3]; 9455 9456 if (limit - ptr < 4) { 9457 upb_status_seterrf(p->status, 9458 "Base64 input for bytes field not a multiple of 4: %s", 9459 upb_fielddef_name(p->top->f)); 9460 return false; 9461 } 9462 9463 val = b64lookup(ptr[0]) << 18 | 9464 b64lookup(ptr[1]) << 12 | 9465 b64lookup(ptr[2]) << 6 | 9466 b64lookup(ptr[3]); 9467 9468 /* Test the upper bit; returns true if any of the characters returned -1. */ 9469 if (val & 0x80000000) { 9470 goto otherchar; 9471 } 9472 9473 output[0] = val >> 16; 9474 output[1] = (val >> 8) & 0xff; 9475 output[2] = val & 0xff; 9476 upb_sink_putstring(p->top->sink, sel, output, 3, NULL); 9477 } 9478 return true; 9479 9480otherchar: 9481 if (nonbase64(ptr[0]) || nonbase64(ptr[1]) || nonbase64(ptr[2]) || 9482 nonbase64(ptr[3]) ) { 9483 upb_status_seterrf(p->status, 9484 "Non-base64 characters in bytes field: %s", 9485 upb_fielddef_name(p->top->f)); 9486 return false; 9487 } if (ptr[2] == '=') { 9488 uint32_t val; 9489 char output; 9490 9491 /* Last group contains only two input bytes, one output byte. */ 9492 if (ptr[0] == '=' || ptr[1] == '=' || ptr[3] != '=') { 9493 goto badpadding; 9494 } 9495 9496 val = b64lookup(ptr[0]) << 18 | 9497 b64lookup(ptr[1]) << 12; 9498 9499 UPB_ASSERT(!(val & 0x80000000)); 9500 output = val >> 16; 9501 upb_sink_putstring(p->top->sink, sel, &output, 1, NULL); 9502 return true; 9503 } else { 9504 uint32_t val; 9505 char output[2]; 9506 9507 /* Last group contains only three input bytes, two output bytes. */ 9508 if (ptr[0] == '=' || ptr[1] == '=' || ptr[2] == '=') { 9509 goto badpadding; 9510 } 9511 9512 val = b64lookup(ptr[0]) << 18 | 9513 b64lookup(ptr[1]) << 12 | 9514 b64lookup(ptr[2]) << 6; 9515 9516 output[0] = val >> 16; 9517 output[1] = (val >> 8) & 0xff; 9518 upb_sink_putstring(p->top->sink, sel, output, 2, NULL); 9519 return true; 9520 } 9521 9522badpadding: 9523 upb_status_seterrf(p->status, 9524 "Incorrect base64 padding for field: %s (%.*s)", 9525 upb_fielddef_name(p->top->f), 9526 4, ptr); 9527 return false; 9528} 9529 9530 9531/* Accumulate buffer **********************************************************/ 9532 9533/* Functionality for accumulating a buffer. 9534 * 9535 * Some parts of the parser need an entire value as a contiguous string. For 9536 * example, to look up a member name in a hash table, or to turn a string into 9537 * a number, the relevant library routines need the input string to be in 9538 * contiguous memory, even if the value spanned two or more buffers in the 9539 * input. These routines handle that. 9540 * 9541 * In the common case we can just point to the input buffer to get this 9542 * contiguous string and avoid any actual copy. So we optimistically begin 9543 * this way. But there are a few cases where we must instead copy into a 9544 * separate buffer: 9545 * 9546 * 1. The string was not contiguous in the input (it spanned buffers). 9547 * 9548 * 2. The string included escape sequences that need to be interpreted to get 9549 * the true value in a contiguous buffer. */ 9550 9551static void assert_accumulate_empty(upb_json_parser *p) { 9552 UPB_ASSERT(p->accumulated == NULL); 9553 UPB_ASSERT(p->accumulated_len == 0); 9554} 9555 9556static void accumulate_clear(upb_json_parser *p) { 9557 p->accumulated = NULL; 9558 p->accumulated_len = 0; 9559} 9560 9561/* Used internally by accumulate_append(). */ 9562static bool accumulate_realloc(upb_json_parser *p, size_t need) { 9563 void *mem; 9564 size_t old_size = p->accumulate_buf_size; 9565 size_t new_size = UPB_MAX(old_size, 128); 9566 while (new_size < need) { 9567 new_size = saturating_multiply(new_size, 2); 9568 } 9569 9570 mem = upb_arena_realloc(p->arena, p->accumulate_buf, old_size, new_size); 9571 if (!mem) { 9572 upb_status_seterrmsg(p->status, "Out of memory allocating buffer."); 9573 return false; 9574 } 9575 9576 p->accumulate_buf = mem; 9577 p->accumulate_buf_size = new_size; 9578 return true; 9579} 9580 9581/* Logically appends the given data to the append buffer. 9582 * If "can_alias" is true, we will try to avoid actually copying, but the buffer 9583 * must be valid until the next accumulate_append() call (if any). */ 9584static bool accumulate_append(upb_json_parser *p, const char *buf, size_t len, 9585 bool can_alias) { 9586 size_t need; 9587 9588 if (!p->accumulated && can_alias) { 9589 p->accumulated = buf; 9590 p->accumulated_len = len; 9591 return true; 9592 } 9593 9594 if (!checked_add(p->accumulated_len, len, &need)) { 9595 upb_status_seterrmsg(p->status, "Integer overflow."); 9596 return false; 9597 } 9598 9599 if (need > p->accumulate_buf_size && !accumulate_realloc(p, need)) { 9600 return false; 9601 } 9602 9603 if (p->accumulated != p->accumulate_buf) { 9604 memcpy(p->accumulate_buf, p->accumulated, p->accumulated_len); 9605 p->accumulated = p->accumulate_buf; 9606 } 9607 9608 memcpy(p->accumulate_buf + p->accumulated_len, buf, len); 9609 p->accumulated_len += len; 9610 return true; 9611} 9612 9613/* Returns a pointer to the data accumulated since the last accumulate_clear() 9614 * call, and writes the length to *len. This with point either to the input 9615 * buffer or a temporary accumulate buffer. */ 9616static const char *accumulate_getptr(upb_json_parser *p, size_t *len) { 9617 UPB_ASSERT(p->accumulated); 9618 *len = p->accumulated_len; 9619 return p->accumulated; 9620} 9621 9622 9623/* Mult-part text data ********************************************************/ 9624 9625/* When we have text data in the input, it can often come in multiple segments. 9626 * For example, there may be some raw string data followed by an escape 9627 * sequence. The two segments are processed with different logic. Also buffer 9628 * seams in the input can cause multiple segments. 9629 * 9630 * As we see segments, there are two main cases for how we want to process them: 9631 * 9632 * 1. we want to push the captured input directly to string handlers. 9633 * 9634 * 2. we need to accumulate all the parts into a contiguous buffer for further 9635 * processing (field name lookup, string->number conversion, etc). */ 9636 9637/* This is the set of states for p->multipart_state. */ 9638enum { 9639 /* We are not currently processing multipart data. */ 9640 MULTIPART_INACTIVE = 0, 9641 9642 /* We are processing multipart data by accumulating it into a contiguous 9643 * buffer. */ 9644 MULTIPART_ACCUMULATE = 1, 9645 9646 /* We are processing multipart data by pushing each part directly to the 9647 * current string handlers. */ 9648 MULTIPART_PUSHEAGERLY = 2 9649}; 9650 9651/* Start a multi-part text value where we accumulate the data for processing at 9652 * the end. */ 9653static void multipart_startaccum(upb_json_parser *p) { 9654 assert_accumulate_empty(p); 9655 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); 9656 p->multipart_state = MULTIPART_ACCUMULATE; 9657} 9658 9659/* Start a multi-part text value where we immediately push text data to a string 9660 * value with the given selector. */ 9661static void multipart_start(upb_json_parser *p, upb_selector_t sel) { 9662 assert_accumulate_empty(p); 9663 UPB_ASSERT(p->multipart_state == MULTIPART_INACTIVE); 9664 p->multipart_state = MULTIPART_PUSHEAGERLY; 9665 p->string_selector = sel; 9666} 9667 9668static bool multipart_text(upb_json_parser *p, const char *buf, size_t len, 9669 bool can_alias) { 9670 switch (p->multipart_state) { 9671 case MULTIPART_INACTIVE: 9672 upb_status_seterrmsg( 9673 p->status, "Internal error: unexpected state MULTIPART_INACTIVE"); 9674 return false; 9675 9676 case MULTIPART_ACCUMULATE: 9677 if (!accumulate_append(p, buf, len, can_alias)) { 9678 return false; 9679 } 9680 break; 9681 9682 case MULTIPART_PUSHEAGERLY: { 9683 const upb_bufhandle *handle = can_alias ? p->handle : NULL; 9684 upb_sink_putstring(p->top->sink, p->string_selector, buf, len, handle); 9685 break; 9686 } 9687 } 9688 9689 return true; 9690} 9691 9692/* Note: this invalidates the accumulate buffer! Call only after reading its 9693 * contents. */ 9694static void multipart_end(upb_json_parser *p) { 9695 /* UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); */ 9696 p->multipart_state = MULTIPART_INACTIVE; 9697 accumulate_clear(p); 9698} 9699 9700 9701/* Input capture **************************************************************/ 9702 9703/* Functionality for capturing a region of the input as text. Gracefully 9704 * handles the case where a buffer seam occurs in the middle of the captured 9705 * region. */ 9706 9707static void capture_begin(upb_json_parser *p, const char *ptr) { 9708 UPB_ASSERT(p->multipart_state != MULTIPART_INACTIVE); 9709 UPB_ASSERT(p->capture == NULL); 9710 p->capture = ptr; 9711} 9712 9713static bool capture_end(upb_json_parser *p, const char *ptr) { 9714 UPB_ASSERT(p->capture); 9715 if (multipart_text(p, p->capture, ptr - p->capture, true)) { 9716 p->capture = NULL; 9717 return true; 9718 } else { 9719 return false; 9720 } 9721} 9722 9723/* This is called at the end of each input buffer (ie. when we have hit a 9724 * buffer seam). If we are in the middle of capturing the input, this 9725 * processes the unprocessed capture region. */ 9726static void capture_suspend(upb_json_parser *p, const char **ptr) { 9727 if (!p->capture) return; 9728 9729 if (multipart_text(p, p->capture, *ptr - p->capture, false)) { 9730 /* We use this as a signal that we were in the middle of capturing, and 9731 * that capturing should resume at the beginning of the next buffer. 9732 * 9733 * We can't use *ptr here, because we have no guarantee that this pointer 9734 * will be valid when we resume (if the underlying memory is freed, then 9735 * using the pointer at all, even to compare to NULL, is likely undefined 9736 * behavior). */ 9737 p->capture = &suspend_capture; 9738 } else { 9739 /* Need to back up the pointer to the beginning of the capture, since 9740 * we were not able to actually preserve it. */ 9741 *ptr = p->capture; 9742 } 9743} 9744 9745static void capture_resume(upb_json_parser *p, const char *ptr) { 9746 if (p->capture) { 9747 UPB_ASSERT(p->capture == &suspend_capture); 9748 p->capture = ptr; 9749 } 9750} 9751 9752 9753/* Callbacks from the parser **************************************************/ 9754 9755/* These are the functions called directly from the parser itself. 9756 * We define these in the same order as their declarations in the parser. */ 9757 9758static char escape_char(char in) { 9759 switch (in) { 9760 case 'r': return '\r'; 9761 case 't': return '\t'; 9762 case 'n': return '\n'; 9763 case 'f': return '\f'; 9764 case 'b': return '\b'; 9765 case '/': return '/'; 9766 case '"': return '"'; 9767 case '\\': return '\\'; 9768 default: 9769 UPB_ASSERT(0); 9770 return 'x'; 9771 } 9772} 9773 9774static bool escape(upb_json_parser *p, const char *ptr) { 9775 char ch = escape_char(*ptr); 9776 return multipart_text(p, &ch, 1, false); 9777} 9778 9779static void start_hex(upb_json_parser *p) { 9780 p->digit = 0; 9781} 9782 9783static void hexdigit(upb_json_parser *p, const char *ptr) { 9784 char ch = *ptr; 9785 9786 p->digit <<= 4; 9787 9788 if (ch >= '0' && ch <= '9') { 9789 p->digit += (ch - '0'); 9790 } else if (ch >= 'a' && ch <= 'f') { 9791 p->digit += ((ch - 'a') + 10); 9792 } else { 9793 UPB_ASSERT(ch >= 'A' && ch <= 'F'); 9794 p->digit += ((ch - 'A') + 10); 9795 } 9796} 9797 9798static bool end_hex(upb_json_parser *p) { 9799 uint32_t codepoint = p->digit; 9800 9801 /* emit the codepoint as UTF-8. */ 9802 char utf8[3]; /* support \u0000 -- \uFFFF -- need only three bytes. */ 9803 int length = 0; 9804 if (codepoint <= 0x7F) { 9805 utf8[0] = codepoint; 9806 length = 1; 9807 } else if (codepoint <= 0x07FF) { 9808 utf8[1] = (codepoint & 0x3F) | 0x80; 9809 codepoint >>= 6; 9810 utf8[0] = (codepoint & 0x1F) | 0xC0; 9811 length = 2; 9812 } else /* codepoint <= 0xFFFF */ { 9813 utf8[2] = (codepoint & 0x3F) | 0x80; 9814 codepoint >>= 6; 9815 utf8[1] = (codepoint & 0x3F) | 0x80; 9816 codepoint >>= 6; 9817 utf8[0] = (codepoint & 0x0F) | 0xE0; 9818 length = 3; 9819 } 9820 /* TODO(haberman): Handle high surrogates: if codepoint is a high surrogate 9821 * we have to wait for the next escape to get the full code point). */ 9822 9823 return multipart_text(p, utf8, length, false); 9824} 9825 9826static void start_text(upb_json_parser *p, const char *ptr) { 9827 capture_begin(p, ptr); 9828} 9829 9830static bool end_text(upb_json_parser *p, const char *ptr) { 9831 return capture_end(p, ptr); 9832} 9833 9834static bool start_number(upb_json_parser *p, const char *ptr) { 9835 if (is_top_level(p)) { 9836 if (is_number_wrapper_object(p)) { 9837 start_wrapper_object(p); 9838 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 9839 start_value_object(p, VALUE_NUMBERVALUE); 9840 } else { 9841 return false; 9842 } 9843 } else if (does_number_wrapper_start(p)) { 9844 if (!start_subobject(p)) { 9845 return false; 9846 } 9847 start_wrapper_object(p); 9848 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 9849 if (!start_subobject(p)) { 9850 return false; 9851 } 9852 start_value_object(p, VALUE_NUMBERVALUE); 9853 } 9854 9855 multipart_startaccum(p); 9856 capture_begin(p, ptr); 9857 return true; 9858} 9859 9860static bool parse_number(upb_json_parser *p, bool is_quoted); 9861 9862static bool end_number_nontop(upb_json_parser *p, const char *ptr) { 9863 if (!capture_end(p, ptr)) { 9864 return false; 9865 } 9866 9867 if (p->top->f == NULL) { 9868 multipart_end(p); 9869 return true; 9870 } 9871 9872 return parse_number(p, false); 9873} 9874 9875static bool end_number(upb_json_parser *p, const char *ptr) { 9876 if (!end_number_nontop(p, ptr)) { 9877 return false; 9878 } 9879 9880 if (does_number_wrapper_end(p)) { 9881 end_wrapper_object(p); 9882 if (!is_top_level(p)) { 9883 end_subobject(p); 9884 } 9885 return true; 9886 } 9887 9888 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 9889 end_value_object(p); 9890 if (!is_top_level(p)) { 9891 end_subobject(p); 9892 } 9893 return true; 9894 } 9895 9896 return true; 9897} 9898 9899/* |buf| is NULL-terminated. |buf| itself will never include quotes; 9900 * |is_quoted| tells us whether this text originally appeared inside quotes. */ 9901static bool parse_number_from_buffer(upb_json_parser *p, const char *buf, 9902 bool is_quoted) { 9903 size_t len = strlen(buf); 9904 const char *bufend = buf + len; 9905 char *end; 9906 upb_fieldtype_t type = upb_fielddef_type(p->top->f); 9907 double val; 9908 double dummy; 9909 double inf = UPB_INFINITY; 9910 9911 errno = 0; 9912 9913 if (len == 0 || buf[0] == ' ') { 9914 return false; 9915 } 9916 9917 /* For integer types, first try parsing with integer-specific routines. 9918 * If these succeed, they will be more accurate for int64/uint64 than 9919 * strtod(). 9920 */ 9921 switch (type) { 9922 case UPB_TYPE_ENUM: 9923 case UPB_TYPE_INT32: { 9924 long val = strtol(buf, &end, 0); 9925 if (errno == ERANGE || end != bufend) { 9926 break; 9927 } else if (val > INT32_MAX || val < INT32_MIN) { 9928 return false; 9929 } else { 9930 upb_sink_putint32(p->top->sink, parser_getsel(p), (int32_t)val); 9931 return true; 9932 } 9933 } 9934 case UPB_TYPE_UINT32: { 9935 unsigned long val = strtoul(buf, &end, 0); 9936 if (end != bufend) { 9937 break; 9938 } else if (val > UINT32_MAX || errno == ERANGE) { 9939 return false; 9940 } else { 9941 upb_sink_putuint32(p->top->sink, parser_getsel(p), (uint32_t)val); 9942 return true; 9943 } 9944 } 9945 /* XXX: We can't handle [u]int64 properly on 32-bit machines because 9946 * strto[u]ll isn't in C89. */ 9947 case UPB_TYPE_INT64: { 9948 long val = strtol(buf, &end, 0); 9949 if (errno == ERANGE || end != bufend) { 9950 break; 9951 } else { 9952 upb_sink_putint64(p->top->sink, parser_getsel(p), val); 9953 return true; 9954 } 9955 } 9956 case UPB_TYPE_UINT64: { 9957 unsigned long val = strtoul(p->accumulated, &end, 0); 9958 if (end != bufend) { 9959 break; 9960 } else if (errno == ERANGE) { 9961 return false; 9962 } else { 9963 upb_sink_putuint64(p->top->sink, parser_getsel(p), val); 9964 return true; 9965 } 9966 } 9967 default: 9968 break; 9969 } 9970 9971 if (type != UPB_TYPE_DOUBLE && type != UPB_TYPE_FLOAT && is_quoted) { 9972 /* Quoted numbers for integer types are not allowed to be in double form. */ 9973 return false; 9974 } 9975 9976 if (len == strlen("Infinity") && strcmp(buf, "Infinity") == 0) { 9977 /* C89 does not have an INFINITY macro. */ 9978 val = inf; 9979 } else if (len == strlen("-Infinity") && strcmp(buf, "-Infinity") == 0) { 9980 val = -inf; 9981 } else { 9982 val = strtod(buf, &end); 9983 if (errno == ERANGE || end != bufend) { 9984 return false; 9985 } 9986 } 9987 9988 switch (type) { 9989#define CASE(capitaltype, smalltype, ctype, min, max) \ 9990 case UPB_TYPE_ ## capitaltype: { \ 9991 if (modf(val, &dummy) != 0 || val > max || val < min) { \ 9992 return false; \ 9993 } else { \ 9994 upb_sink_put ## smalltype(p->top->sink, parser_getsel(p), \ 9995 (ctype)val); \ 9996 return true; \ 9997 } \ 9998 break; \ 9999 } 10000 case UPB_TYPE_ENUM: 10001 CASE(INT32, int32, int32_t, INT32_MIN, INT32_MAX); 10002 CASE(INT64, int64, int64_t, INT64_MIN, INT64_MAX); 10003 CASE(UINT32, uint32, uint32_t, 0, UINT32_MAX); 10004 CASE(UINT64, uint64, uint64_t, 0, UINT64_MAX); 10005#undef CASE 10006 10007 case UPB_TYPE_DOUBLE: 10008 upb_sink_putdouble(p->top->sink, parser_getsel(p), val); 10009 return true; 10010 case UPB_TYPE_FLOAT: 10011 if ((val > FLT_MAX || val < -FLT_MAX) && val != inf && val != -inf) { 10012 return false; 10013 } else { 10014 upb_sink_putfloat(p->top->sink, parser_getsel(p), val); 10015 return true; 10016 } 10017 default: 10018 return false; 10019 } 10020} 10021 10022static bool parse_number(upb_json_parser *p, bool is_quoted) { 10023 size_t len; 10024 const char *buf; 10025 10026 /* strtol() and friends unfortunately do not support specifying the length of 10027 * the input string, so we need to force a copy into a NULL-terminated buffer. */ 10028 if (!multipart_text(p, "\0", 1, false)) { 10029 return false; 10030 } 10031 10032 buf = accumulate_getptr(p, &len); 10033 10034 if (parse_number_from_buffer(p, buf, is_quoted)) { 10035 multipart_end(p); 10036 return true; 10037 } else { 10038 upb_status_seterrf(p->status, "error parsing number: %s", buf); 10039 multipart_end(p); 10040 return false; 10041 } 10042} 10043 10044static bool parser_putbool(upb_json_parser *p, bool val) { 10045 bool ok; 10046 10047 if (p->top->f == NULL) { 10048 return true; 10049 } 10050 10051 if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL) { 10052 upb_status_seterrf(p->status, 10053 "Boolean value specified for non-bool field: %s", 10054 upb_fielddef_name(p->top->f)); 10055 return false; 10056 } 10057 10058 ok = upb_sink_putbool(p->top->sink, parser_getsel(p), val); 10059 UPB_ASSERT(ok); 10060 10061 return true; 10062} 10063 10064static bool end_bool(upb_json_parser *p, bool val) { 10065 if (is_top_level(p)) { 10066 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { 10067 start_wrapper_object(p); 10068 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 10069 start_value_object(p, VALUE_BOOLVALUE); 10070 } else { 10071 return false; 10072 } 10073 } else if (is_wellknown_field(p, UPB_WELLKNOWN_BOOLVALUE)) { 10074 if (!start_subobject(p)) { 10075 return false; 10076 } 10077 start_wrapper_object(p); 10078 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 10079 if (!start_subobject(p)) { 10080 return false; 10081 } 10082 start_value_object(p, VALUE_BOOLVALUE); 10083 } 10084 10085 if (p->top->is_unknown_field) { 10086 return true; 10087 } 10088 10089 if (!parser_putbool(p, val)) { 10090 return false; 10091 } 10092 10093 if (is_wellknown_msg(p, UPB_WELLKNOWN_BOOLVALUE)) { 10094 end_wrapper_object(p); 10095 if (!is_top_level(p)) { 10096 end_subobject(p); 10097 } 10098 return true; 10099 } 10100 10101 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 10102 end_value_object(p); 10103 if (!is_top_level(p)) { 10104 end_subobject(p); 10105 } 10106 return true; 10107 } 10108 10109 return true; 10110} 10111 10112static bool end_null(upb_json_parser *p) { 10113 const char *zero_ptr = "0"; 10114 10115 if (is_top_level(p)) { 10116 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 10117 start_value_object(p, VALUE_NULLVALUE); 10118 } else { 10119 return true; 10120 } 10121 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 10122 if (!start_subobject(p)) { 10123 return false; 10124 } 10125 start_value_object(p, VALUE_NULLVALUE); 10126 } else { 10127 return true; 10128 } 10129 10130 /* Fill null_value field. */ 10131 multipart_startaccum(p); 10132 capture_begin(p, zero_ptr); 10133 capture_end(p, zero_ptr + 1); 10134 parse_number(p, false); 10135 10136 end_value_object(p); 10137 if (!is_top_level(p)) { 10138 end_subobject(p); 10139 } 10140 10141 return true; 10142} 10143 10144static bool start_any_stringval(upb_json_parser *p) { 10145 multipart_startaccum(p); 10146 return true; 10147} 10148 10149static bool start_stringval(upb_json_parser *p) { 10150 if (is_top_level(p)) { 10151 if (is_string_wrapper_object(p) || 10152 is_number_wrapper_object(p)) { 10153 start_wrapper_object(p); 10154 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { 10155 start_fieldmask_object(p); 10156 return true; 10157 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 10158 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { 10159 start_object(p); 10160 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 10161 start_value_object(p, VALUE_STRINGVALUE); 10162 } else { 10163 return false; 10164 } 10165 } else if (does_string_wrapper_start(p) || 10166 does_number_wrapper_start(p)) { 10167 if (!start_subobject(p)) { 10168 return false; 10169 } 10170 start_wrapper_object(p); 10171 } else if (does_fieldmask_start(p)) { 10172 if (!start_subobject(p)) { 10173 return false; 10174 } 10175 start_fieldmask_object(p); 10176 return true; 10177 } else if (is_wellknown_field(p, UPB_WELLKNOWN_TIMESTAMP) || 10178 is_wellknown_field(p, UPB_WELLKNOWN_DURATION)) { 10179 if (!start_subobject(p)) { 10180 return false; 10181 } 10182 start_object(p); 10183 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 10184 if (!start_subobject(p)) { 10185 return false; 10186 } 10187 start_value_object(p, VALUE_STRINGVALUE); 10188 } 10189 10190 if (p->top->f == NULL) { 10191 multipart_startaccum(p); 10192 return true; 10193 } 10194 10195 if (p->top->is_any) { 10196 return start_any_stringval(p); 10197 } 10198 10199 if (upb_fielddef_isstring(p->top->f)) { 10200 upb_jsonparser_frame *inner; 10201 upb_selector_t sel; 10202 10203 if (!check_stack(p)) return false; 10204 10205 /* Start a new parser frame: parser frames correspond one-to-one with 10206 * handler frames, and string events occur in a sub-frame. */ 10207 inner = start_jsonparser_frame(p); 10208 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10209 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 10210 inner->m = p->top->m; 10211 inner->f = p->top->f; 10212 p->top = inner; 10213 10214 if (upb_fielddef_type(p->top->f) == UPB_TYPE_STRING) { 10215 /* For STRING fields we push data directly to the handlers as it is 10216 * parsed. We don't do this yet for BYTES fields, because our base64 10217 * decoder is not streaming. 10218 * 10219 * TODO(haberman): make base64 decoding streaming also. */ 10220 multipart_start(p, getsel_for_handlertype(p, UPB_HANDLER_STRING)); 10221 return true; 10222 } else { 10223 multipart_startaccum(p); 10224 return true; 10225 } 10226 } else if (upb_fielddef_type(p->top->f) != UPB_TYPE_BOOL && 10227 upb_fielddef_type(p->top->f) != UPB_TYPE_MESSAGE) { 10228 /* No need to push a frame -- numeric values in quotes remain in the 10229 * current parser frame. These values must accmulate so we can convert 10230 * them all at once at the end. */ 10231 multipart_startaccum(p); 10232 return true; 10233 } else { 10234 upb_status_seterrf(p->status, 10235 "String specified for bool or submessage field: %s", 10236 upb_fielddef_name(p->top->f)); 10237 return false; 10238 } 10239} 10240 10241static bool end_any_stringval(upb_json_parser *p) { 10242 size_t len; 10243 const char *buf = accumulate_getptr(p, &len); 10244 10245 /* Set type_url */ 10246 upb_selector_t sel; 10247 upb_jsonparser_frame *inner; 10248 if (!check_stack(p)) return false; 10249 inner = p->top + 1; 10250 10251 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10252 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 10253 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 10254 upb_sink_putstring(inner->sink, sel, buf, len, NULL); 10255 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10256 upb_sink_endstr(inner->sink, sel); 10257 10258 multipart_end(p); 10259 10260 /* Resolve type url */ 10261 if (strncmp(buf, "type.googleapis.com/", 20) == 0 && len > 20) { 10262 const upb_msgdef *payload_type = NULL; 10263 buf += 20; 10264 len -= 20; 10265 10266 payload_type = upb_symtab_lookupmsg2(p->symtab, buf, len); 10267 if (payload_type == NULL) { 10268 upb_status_seterrf( 10269 p->status, "Cannot find packed type: %.*s\n", (int)len, buf); 10270 return false; 10271 } 10272 10273 json_parser_any_frame_set_payload_type(p, p->top->any_frame, payload_type); 10274 10275 return true; 10276 } else { 10277 upb_status_seterrf( 10278 p->status, "Invalid type url: %.*s\n", (int)len, buf); 10279 return false; 10280 } 10281} 10282 10283static bool end_stringval_nontop(upb_json_parser *p) { 10284 bool ok = true; 10285 10286 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 10287 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION)) { 10288 multipart_end(p); 10289 return true; 10290 } 10291 10292 if (p->top->f == NULL) { 10293 multipart_end(p); 10294 return true; 10295 } 10296 10297 if (p->top->is_any) { 10298 return end_any_stringval(p); 10299 } 10300 10301 switch (upb_fielddef_type(p->top->f)) { 10302 case UPB_TYPE_BYTES: 10303 if (!base64_push(p, getsel_for_handlertype(p, UPB_HANDLER_STRING), 10304 p->accumulated, p->accumulated_len)) { 10305 return false; 10306 } 10307 /* Fall through. */ 10308 10309 case UPB_TYPE_STRING: { 10310 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10311 upb_sink_endstr(p->top->sink, sel); 10312 p->top--; 10313 break; 10314 } 10315 10316 case UPB_TYPE_ENUM: { 10317 /* Resolve enum symbolic name to integer value. */ 10318 const upb_enumdef *enumdef = upb_fielddef_enumsubdef(p->top->f); 10319 10320 size_t len; 10321 const char *buf = accumulate_getptr(p, &len); 10322 10323 int32_t int_val = 0; 10324 ok = upb_enumdef_ntoi(enumdef, buf, len, &int_val); 10325 10326 if (ok) { 10327 upb_selector_t sel = parser_getsel(p); 10328 upb_sink_putint32(p->top->sink, sel, int_val); 10329 } else { 10330 upb_status_seterrf(p->status, "Enum value unknown: '%.*s'", len, buf); 10331 } 10332 10333 break; 10334 } 10335 10336 case UPB_TYPE_INT32: 10337 case UPB_TYPE_INT64: 10338 case UPB_TYPE_UINT32: 10339 case UPB_TYPE_UINT64: 10340 case UPB_TYPE_DOUBLE: 10341 case UPB_TYPE_FLOAT: 10342 ok = parse_number(p, true); 10343 break; 10344 10345 default: 10346 UPB_ASSERT(false); 10347 upb_status_seterrmsg(p->status, "Internal error in JSON decoder"); 10348 ok = false; 10349 break; 10350 } 10351 10352 multipart_end(p); 10353 10354 return ok; 10355} 10356 10357static bool end_stringval(upb_json_parser *p) { 10358 /* FieldMask's stringvals have been ended when handling them. Only need to 10359 * close FieldMask here.*/ 10360 if (does_fieldmask_end(p)) { 10361 end_fieldmask_object(p); 10362 if (!is_top_level(p)) { 10363 end_subobject(p); 10364 } 10365 return true; 10366 } 10367 10368 if (!end_stringval_nontop(p)) { 10369 return false; 10370 } 10371 10372 if (does_string_wrapper_end(p) || 10373 does_number_wrapper_end(p)) { 10374 end_wrapper_object(p); 10375 if (!is_top_level(p)) { 10376 end_subobject(p); 10377 } 10378 return true; 10379 } 10380 10381 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 10382 end_value_object(p); 10383 if (!is_top_level(p)) { 10384 end_subobject(p); 10385 } 10386 return true; 10387 } 10388 10389 if (is_wellknown_msg(p, UPB_WELLKNOWN_TIMESTAMP) || 10390 is_wellknown_msg(p, UPB_WELLKNOWN_DURATION) || 10391 is_wellknown_msg(p, UPB_WELLKNOWN_FIELDMASK)) { 10392 end_object(p); 10393 if (!is_top_level(p)) { 10394 end_subobject(p); 10395 } 10396 return true; 10397 } 10398 10399 return true; 10400} 10401 10402static void start_duration_base(upb_json_parser *p, const char *ptr) { 10403 capture_begin(p, ptr); 10404} 10405 10406static bool end_duration_base(upb_json_parser *p, const char *ptr) { 10407 size_t len; 10408 const char *buf; 10409 char seconds_buf[14]; 10410 char nanos_buf[12]; 10411 char *end; 10412 int64_t seconds = 0; 10413 int32_t nanos = 0; 10414 double val = 0.0; 10415 const char *seconds_membername = "seconds"; 10416 const char *nanos_membername = "nanos"; 10417 size_t fraction_start; 10418 10419 if (!capture_end(p, ptr)) { 10420 return false; 10421 } 10422 10423 buf = accumulate_getptr(p, &len); 10424 10425 memset(seconds_buf, 0, 14); 10426 memset(nanos_buf, 0, 12); 10427 10428 /* Find out base end. The maximus duration is 315576000000, which cannot be 10429 * represented by double without losing precision. Thus, we need to handle 10430 * fraction and base separately. */ 10431 for (fraction_start = 0; fraction_start < len && buf[fraction_start] != '.'; 10432 fraction_start++); 10433 10434 /* Parse base */ 10435 memcpy(seconds_buf, buf, fraction_start); 10436 seconds = strtol(seconds_buf, &end, 10); 10437 if (errno == ERANGE || end != seconds_buf + fraction_start) { 10438 upb_status_seterrf(p->status, "error parsing duration: %s", 10439 seconds_buf); 10440 return false; 10441 } 10442 10443 if (seconds > 315576000000) { 10444 upb_status_seterrf(p->status, "error parsing duration: " 10445 "maximum acceptable value is " 10446 "315576000000"); 10447 return false; 10448 } 10449 10450 if (seconds < -315576000000) { 10451 upb_status_seterrf(p->status, "error parsing duration: " 10452 "minimum acceptable value is " 10453 "-315576000000"); 10454 return false; 10455 } 10456 10457 /* Parse fraction */ 10458 nanos_buf[0] = '0'; 10459 memcpy(nanos_buf + 1, buf + fraction_start, len - fraction_start); 10460 val = strtod(nanos_buf, &end); 10461 if (errno == ERANGE || end != nanos_buf + len - fraction_start + 1) { 10462 upb_status_seterrf(p->status, "error parsing duration: %s", 10463 nanos_buf); 10464 return false; 10465 } 10466 10467 nanos = val * 1000000000; 10468 if (seconds < 0) nanos = -nanos; 10469 10470 /* Clean up buffer */ 10471 multipart_end(p); 10472 10473 /* Set seconds */ 10474 start_member(p); 10475 capture_begin(p, seconds_membername); 10476 capture_end(p, seconds_membername + 7); 10477 end_membername(p); 10478 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); 10479 end_member(p); 10480 10481 /* Set nanos */ 10482 start_member(p); 10483 capture_begin(p, nanos_membername); 10484 capture_end(p, nanos_membername + 5); 10485 end_membername(p); 10486 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); 10487 end_member(p); 10488 10489 /* Continue previous arena */ 10490 multipart_startaccum(p); 10491 10492 return true; 10493} 10494 10495static int parse_timestamp_number(upb_json_parser *p) { 10496 size_t len; 10497 const char *buf; 10498 int val; 10499 10500 /* atoi() and friends unfortunately do not support specifying the length of 10501 * the input string, so we need to force a copy into a NULL-terminated buffer. */ 10502 multipart_text(p, "\0", 1, false); 10503 10504 buf = accumulate_getptr(p, &len); 10505 val = atoi(buf); 10506 multipart_end(p); 10507 multipart_startaccum(p); 10508 10509 return val; 10510} 10511 10512static void start_year(upb_json_parser *p, const char *ptr) { 10513 capture_begin(p, ptr); 10514} 10515 10516static bool end_year(upb_json_parser *p, const char *ptr) { 10517 if (!capture_end(p, ptr)) { 10518 return false; 10519 } 10520 p->tm.tm_year = parse_timestamp_number(p) - 1900; 10521 return true; 10522} 10523 10524static void start_month(upb_json_parser *p, const char *ptr) { 10525 capture_begin(p, ptr); 10526} 10527 10528static bool end_month(upb_json_parser *p, const char *ptr) { 10529 if (!capture_end(p, ptr)) { 10530 return false; 10531 } 10532 p->tm.tm_mon = parse_timestamp_number(p) - 1; 10533 return true; 10534} 10535 10536static void start_day(upb_json_parser *p, const char *ptr) { 10537 capture_begin(p, ptr); 10538} 10539 10540static bool end_day(upb_json_parser *p, const char *ptr) { 10541 if (!capture_end(p, ptr)) { 10542 return false; 10543 } 10544 p->tm.tm_mday = parse_timestamp_number(p); 10545 return true; 10546} 10547 10548static void start_hour(upb_json_parser *p, const char *ptr) { 10549 capture_begin(p, ptr); 10550} 10551 10552static bool end_hour(upb_json_parser *p, const char *ptr) { 10553 if (!capture_end(p, ptr)) { 10554 return false; 10555 } 10556 p->tm.tm_hour = parse_timestamp_number(p); 10557 return true; 10558} 10559 10560static void start_minute(upb_json_parser *p, const char *ptr) { 10561 capture_begin(p, ptr); 10562} 10563 10564static bool end_minute(upb_json_parser *p, const char *ptr) { 10565 if (!capture_end(p, ptr)) { 10566 return false; 10567 } 10568 p->tm.tm_min = parse_timestamp_number(p); 10569 return true; 10570} 10571 10572static void start_second(upb_json_parser *p, const char *ptr) { 10573 capture_begin(p, ptr); 10574} 10575 10576static bool end_second(upb_json_parser *p, const char *ptr) { 10577 if (!capture_end(p, ptr)) { 10578 return false; 10579 } 10580 p->tm.tm_sec = parse_timestamp_number(p); 10581 return true; 10582} 10583 10584static void start_timestamp_base(upb_json_parser *p) { 10585 memset(&p->tm, 0, sizeof(struct tm)); 10586} 10587 10588static void start_timestamp_fraction(upb_json_parser *p, const char *ptr) { 10589 capture_begin(p, ptr); 10590} 10591 10592static bool end_timestamp_fraction(upb_json_parser *p, const char *ptr) { 10593 size_t len; 10594 const char *buf; 10595 char nanos_buf[12]; 10596 char *end; 10597 double val = 0.0; 10598 int32_t nanos; 10599 const char *nanos_membername = "nanos"; 10600 10601 memset(nanos_buf, 0, 12); 10602 10603 if (!capture_end(p, ptr)) { 10604 return false; 10605 } 10606 10607 buf = accumulate_getptr(p, &len); 10608 10609 if (len > 10) { 10610 upb_status_seterrf(p->status, 10611 "error parsing timestamp: at most 9-digit fraction."); 10612 return false; 10613 } 10614 10615 /* Parse nanos */ 10616 nanos_buf[0] = '0'; 10617 memcpy(nanos_buf + 1, buf, len); 10618 val = strtod(nanos_buf, &end); 10619 10620 if (errno == ERANGE || end != nanos_buf + len + 1) { 10621 upb_status_seterrf(p->status, "error parsing timestamp nanos: %s", 10622 nanos_buf); 10623 return false; 10624 } 10625 10626 nanos = val * 1000000000; 10627 10628 /* Clean up previous environment */ 10629 multipart_end(p); 10630 10631 /* Set nanos */ 10632 start_member(p); 10633 capture_begin(p, nanos_membername); 10634 capture_end(p, nanos_membername + 5); 10635 end_membername(p); 10636 upb_sink_putint32(p->top->sink, parser_getsel(p), nanos); 10637 end_member(p); 10638 10639 /* Continue previous environment */ 10640 multipart_startaccum(p); 10641 10642 return true; 10643} 10644 10645static void start_timestamp_zone(upb_json_parser *p, const char *ptr) { 10646 capture_begin(p, ptr); 10647} 10648 10649static int div_round_up2(int n, int d) { 10650 return (n + d - 1) / d; 10651} 10652 10653/* epoch_days(1970, 1, 1) == 1970-01-01 == 0. */ 10654static int epoch_days(int year, int month, int day) { 10655 static const uint16_t month_yday[12] = {0, 31, 59, 90, 120, 151, 10656 181, 212, 243, 273, 304, 334}; 10657 int febs_since_0 = month > 2 ? year + 1 : year; 10658 int leap_days_since_0 = div_round_up2(febs_since_0, 4) - 10659 div_round_up2(febs_since_0, 100) + 10660 div_round_up2(febs_since_0, 400); 10661 int days_since_0 = 10662 365 * year + month_yday[month - 1] + (day - 1) + leap_days_since_0; 10663 10664 /* Convert from 0-epoch (0001-01-01 BC) to Unix Epoch (1970-01-01 AD). 10665 * Since the "BC" system does not have a year zero, 1 BC == year zero. */ 10666 return days_since_0 - 719528; 10667} 10668 10669static int64_t upb_timegm(const struct tm *tp) { 10670 int64_t ret = epoch_days(tp->tm_year + 1900, tp->tm_mon + 1, tp->tm_mday); 10671 ret = (ret * 24) + tp->tm_hour; 10672 ret = (ret * 60) + tp->tm_min; 10673 ret = (ret * 60) + tp->tm_sec; 10674 return ret; 10675} 10676 10677static bool end_timestamp_zone(upb_json_parser *p, const char *ptr) { 10678 size_t len; 10679 const char *buf; 10680 int hours; 10681 int64_t seconds; 10682 const char *seconds_membername = "seconds"; 10683 10684 if (!capture_end(p, ptr)) { 10685 return false; 10686 } 10687 10688 buf = accumulate_getptr(p, &len); 10689 10690 if (buf[0] != 'Z') { 10691 if (sscanf(buf + 1, "%2d:00", &hours) != 1) { 10692 upb_status_seterrf(p->status, "error parsing timestamp offset"); 10693 return false; 10694 } 10695 10696 if (buf[0] == '+') { 10697 hours = -hours; 10698 } 10699 10700 p->tm.tm_hour += hours; 10701 } 10702 10703 /* Normalize tm */ 10704 seconds = upb_timegm(&p->tm); 10705 10706 /* Check timestamp boundary */ 10707 if (seconds < -62135596800) { 10708 upb_status_seterrf(p->status, "error parsing timestamp: " 10709 "minimum acceptable value is " 10710 "0001-01-01T00:00:00Z"); 10711 return false; 10712 } 10713 10714 /* Clean up previous environment */ 10715 multipart_end(p); 10716 10717 /* Set seconds */ 10718 start_member(p); 10719 capture_begin(p, seconds_membername); 10720 capture_end(p, seconds_membername + 7); 10721 end_membername(p); 10722 upb_sink_putint64(p->top->sink, parser_getsel(p), seconds); 10723 end_member(p); 10724 10725 /* Continue previous environment */ 10726 multipart_startaccum(p); 10727 10728 return true; 10729} 10730 10731static void start_fieldmask_path_text(upb_json_parser *p, const char *ptr) { 10732 capture_begin(p, ptr); 10733} 10734 10735static bool end_fieldmask_path_text(upb_json_parser *p, const char *ptr) { 10736 return capture_end(p, ptr); 10737} 10738 10739static bool start_fieldmask_path(upb_json_parser *p) { 10740 upb_jsonparser_frame *inner; 10741 upb_selector_t sel; 10742 10743 if (!check_stack(p)) return false; 10744 10745 /* Start a new parser frame: parser frames correspond one-to-one with 10746 * handler frames, and string events occur in a sub-frame. */ 10747 inner = start_jsonparser_frame(p); 10748 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10749 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 10750 inner->m = p->top->m; 10751 inner->f = p->top->f; 10752 p->top = inner; 10753 10754 multipart_startaccum(p); 10755 return true; 10756} 10757 10758static bool lower_camel_push( 10759 upb_json_parser *p, upb_selector_t sel, const char *ptr, size_t len) { 10760 const char *limit = ptr + len; 10761 bool first = true; 10762 for (;ptr < limit; ptr++) { 10763 if (*ptr >= 'A' && *ptr <= 'Z' && !first) { 10764 char lower = tolower(*ptr); 10765 upb_sink_putstring(p->top->sink, sel, "_", 1, NULL); 10766 upb_sink_putstring(p->top->sink, sel, &lower, 1, NULL); 10767 } else { 10768 upb_sink_putstring(p->top->sink, sel, ptr, 1, NULL); 10769 } 10770 first = false; 10771 } 10772 return true; 10773} 10774 10775static bool end_fieldmask_path(upb_json_parser *p) { 10776 upb_selector_t sel; 10777 10778 if (!lower_camel_push( 10779 p, getsel_for_handlertype(p, UPB_HANDLER_STRING), 10780 p->accumulated, p->accumulated_len)) { 10781 return false; 10782 } 10783 10784 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10785 upb_sink_endstr(p->top->sink, sel); 10786 p->top--; 10787 10788 multipart_end(p); 10789 return true; 10790} 10791 10792static void start_member(upb_json_parser *p) { 10793 UPB_ASSERT(!p->top->f); 10794 multipart_startaccum(p); 10795} 10796 10797/* Helper: invoked during parse_mapentry() to emit the mapentry message's key 10798 * field based on the current contents of the accumulate buffer. */ 10799static bool parse_mapentry_key(upb_json_parser *p) { 10800 10801 size_t len; 10802 const char *buf = accumulate_getptr(p, &len); 10803 10804 /* Emit the key field. We do a bit of ad-hoc parsing here because the 10805 * parser state machine has already decided that this is a string field 10806 * name, and we are reinterpreting it as some arbitrary key type. In 10807 * particular, integer and bool keys are quoted, so we need to parse the 10808 * quoted string contents here. */ 10809 10810 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_KEY); 10811 if (p->top->f == NULL) { 10812 upb_status_seterrmsg(p->status, "mapentry message has no key"); 10813 return false; 10814 } 10815 switch (upb_fielddef_type(p->top->f)) { 10816 case UPB_TYPE_INT32: 10817 case UPB_TYPE_INT64: 10818 case UPB_TYPE_UINT32: 10819 case UPB_TYPE_UINT64: 10820 /* Invoke end_number. The accum buffer has the number's text already. */ 10821 if (!parse_number(p, true)) { 10822 return false; 10823 } 10824 break; 10825 case UPB_TYPE_BOOL: 10826 if (len == 4 && !strncmp(buf, "true", 4)) { 10827 if (!parser_putbool(p, true)) { 10828 return false; 10829 } 10830 } else if (len == 5 && !strncmp(buf, "false", 5)) { 10831 if (!parser_putbool(p, false)) { 10832 return false; 10833 } 10834 } else { 10835 upb_status_seterrmsg(p->status, 10836 "Map bool key not 'true' or 'false'"); 10837 return false; 10838 } 10839 multipart_end(p); 10840 break; 10841 case UPB_TYPE_STRING: 10842 case UPB_TYPE_BYTES: { 10843 upb_sink subsink; 10844 upb_selector_t sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 10845 upb_sink_startstr(p->top->sink, sel, len, &subsink); 10846 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 10847 upb_sink_putstring(subsink, sel, buf, len, NULL); 10848 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 10849 upb_sink_endstr(subsink, sel); 10850 multipart_end(p); 10851 break; 10852 } 10853 default: 10854 upb_status_seterrmsg(p->status, "Invalid field type for map key"); 10855 return false; 10856 } 10857 10858 return true; 10859} 10860 10861/* Helper: emit one map entry (as a submessage in the map field sequence). This 10862 * is invoked from end_membername(), at the end of the map entry's key string, 10863 * with the map key in the accumulate buffer. It parses the key from that 10864 * buffer, emits the handler calls to start the mapentry submessage (setting up 10865 * its subframe in the process), and sets up state in the subframe so that the 10866 * value parser (invoked next) will emit the mapentry's value field and then 10867 * end the mapentry message. */ 10868 10869static bool handle_mapentry(upb_json_parser *p) { 10870 const upb_fielddef *mapfield; 10871 const upb_msgdef *mapentrymsg; 10872 upb_jsonparser_frame *inner; 10873 upb_selector_t sel; 10874 10875 /* Map entry: p->top->sink is the seq frame, so we need to start a frame 10876 * for the mapentry itself, and then set |f| in that frame so that the map 10877 * value field is parsed, and also set a flag to end the frame after the 10878 * map-entry value is parsed. */ 10879 if (!check_stack(p)) return false; 10880 10881 mapfield = p->top->mapfield; 10882 mapentrymsg = upb_fielddef_msgsubdef(mapfield); 10883 10884 inner = start_jsonparser_frame(p); 10885 p->top->f = mapfield; 10886 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 10887 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); 10888 inner->m = mapentrymsg; 10889 inner->mapfield = mapfield; 10890 10891 /* Don't set this to true *yet* -- we reuse parsing handlers below to push 10892 * the key field value to the sink, and these handlers will pop the frame 10893 * if they see is_mapentry (when invoked by the parser state machine, they 10894 * would have just seen the map-entry value, not key). */ 10895 inner->is_mapentry = false; 10896 p->top = inner; 10897 10898 /* send STARTMSG in submsg frame. */ 10899 upb_sink_startmsg(p->top->sink); 10900 10901 parse_mapentry_key(p); 10902 10903 /* Set up the value field to receive the map-entry value. */ 10904 p->top->f = upb_msgdef_itof(p->top->m, UPB_MAPENTRY_VALUE); 10905 p->top->is_mapentry = true; /* set up to pop frame after value is parsed. */ 10906 p->top->mapfield = mapfield; 10907 if (p->top->f == NULL) { 10908 upb_status_seterrmsg(p->status, "mapentry message has no value"); 10909 return false; 10910 } 10911 10912 return true; 10913} 10914 10915static bool end_membername(upb_json_parser *p) { 10916 UPB_ASSERT(!p->top->f); 10917 10918 if (!p->top->m) { 10919 p->top->is_unknown_field = true; 10920 multipart_end(p); 10921 return true; 10922 } 10923 10924 if (p->top->is_any) { 10925 return end_any_membername(p); 10926 } else if (p->top->is_map) { 10927 return handle_mapentry(p); 10928 } else { 10929 size_t len; 10930 const char *buf = accumulate_getptr(p, &len); 10931 upb_value v; 10932 10933 if (upb_strtable_lookup2(p->top->name_table, buf, len, &v)) { 10934 p->top->f = upb_value_getconstptr(v); 10935 multipart_end(p); 10936 10937 return true; 10938 } else if (p->ignore_json_unknown) { 10939 p->top->is_unknown_field = true; 10940 multipart_end(p); 10941 return true; 10942 } else { 10943 upb_status_seterrf(p->status, "No such field: %.*s\n", (int)len, buf); 10944 return false; 10945 } 10946 } 10947} 10948 10949static bool end_any_membername(upb_json_parser *p) { 10950 size_t len; 10951 const char *buf = accumulate_getptr(p, &len); 10952 upb_value v; 10953 10954 if (len == 5 && strncmp(buf, "@type", len) == 0) { 10955 upb_strtable_lookup2(p->top->name_table, "type_url", 8, &v); 10956 p->top->f = upb_value_getconstptr(v); 10957 multipart_end(p); 10958 return true; 10959 } else { 10960 p->top->is_unknown_field = true; 10961 multipart_end(p); 10962 return true; 10963 } 10964} 10965 10966static void end_member(upb_json_parser *p) { 10967 /* If we just parsed a map-entry value, end that frame too. */ 10968 if (p->top->is_mapentry) { 10969 upb_selector_t sel; 10970 bool ok; 10971 const upb_fielddef *mapfield; 10972 10973 UPB_ASSERT(p->top > p->stack); 10974 /* send ENDMSG on submsg. */ 10975 upb_sink_endmsg(p->top->sink, p->status); 10976 mapfield = p->top->mapfield; 10977 10978 /* send ENDSUBMSG in repeated-field-of-mapentries frame. */ 10979 p->top--; 10980 ok = upb_handlers_getselector(mapfield, UPB_HANDLER_ENDSUBMSG, &sel); 10981 UPB_ASSUME(ok); 10982 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); 10983 } 10984 10985 p->top->f = NULL; 10986 p->top->is_unknown_field = false; 10987} 10988 10989static void start_any_member(upb_json_parser *p, const char *ptr) { 10990 start_member(p); 10991 json_parser_any_frame_set_after_type_url_start_once(p->top->any_frame, ptr); 10992} 10993 10994static void end_any_member(upb_json_parser *p, const char *ptr) { 10995 json_parser_any_frame_set_before_type_url_end(p->top->any_frame, ptr); 10996 end_member(p); 10997} 10998 10999static bool start_subobject(upb_json_parser *p) { 11000 if (p->top->is_unknown_field) { 11001 if (!check_stack(p)) return false; 11002 11003 p->top = start_jsonparser_frame(p); 11004 return true; 11005 } 11006 11007 if (upb_fielddef_ismap(p->top->f)) { 11008 upb_jsonparser_frame *inner; 11009 upb_selector_t sel; 11010 11011 /* Beginning of a map. Start a new parser frame in a repeated-field 11012 * context. */ 11013 if (!check_stack(p)) return false; 11014 11015 inner = start_jsonparser_frame(p); 11016 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 11017 upb_sink_startseq(p->top->sink, sel, &inner->sink); 11018 inner->m = upb_fielddef_msgsubdef(p->top->f); 11019 inner->mapfield = p->top->f; 11020 inner->is_map = true; 11021 p->top = inner; 11022 11023 return true; 11024 } else if (upb_fielddef_issubmsg(p->top->f)) { 11025 upb_jsonparser_frame *inner; 11026 upb_selector_t sel; 11027 11028 /* Beginning of a subobject. Start a new parser frame in the submsg 11029 * context. */ 11030 if (!check_stack(p)) return false; 11031 11032 inner = start_jsonparser_frame(p); 11033 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSUBMSG); 11034 upb_sink_startsubmsg(p->top->sink, sel, &inner->sink); 11035 inner->m = upb_fielddef_msgsubdef(p->top->f); 11036 set_name_table(p, inner); 11037 p->top = inner; 11038 11039 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { 11040 p->top->is_any = true; 11041 p->top->any_frame = json_parser_any_frame_new(p); 11042 } else { 11043 p->top->is_any = false; 11044 p->top->any_frame = NULL; 11045 } 11046 11047 return true; 11048 } else { 11049 upb_status_seterrf(p->status, 11050 "Object specified for non-message/group field: %s", 11051 upb_fielddef_name(p->top->f)); 11052 return false; 11053 } 11054} 11055 11056static bool start_subobject_full(upb_json_parser *p) { 11057 if (is_top_level(p)) { 11058 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 11059 start_value_object(p, VALUE_STRUCTVALUE); 11060 if (!start_subobject(p)) return false; 11061 start_structvalue_object(p); 11062 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { 11063 start_structvalue_object(p); 11064 } else { 11065 return true; 11066 } 11067 } else if (is_wellknown_field(p, UPB_WELLKNOWN_STRUCT)) { 11068 if (!start_subobject(p)) return false; 11069 start_structvalue_object(p); 11070 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE)) { 11071 if (!start_subobject(p)) return false; 11072 start_value_object(p, VALUE_STRUCTVALUE); 11073 if (!start_subobject(p)) return false; 11074 start_structvalue_object(p); 11075 } 11076 11077 return start_subobject(p); 11078} 11079 11080static void end_subobject(upb_json_parser *p) { 11081 if (is_top_level(p)) { 11082 return; 11083 } 11084 11085 if (p->top->is_map) { 11086 upb_selector_t sel; 11087 p->top--; 11088 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 11089 upb_sink_endseq(p->top->sink, sel); 11090 } else { 11091 upb_selector_t sel; 11092 bool is_unknown = p->top->m == NULL; 11093 p->top--; 11094 if (!is_unknown) { 11095 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSUBMSG); 11096 upb_sink_endsubmsg(p->top->sink, (p->top + 1)->sink, sel); 11097 } 11098 } 11099} 11100 11101static void end_subobject_full(upb_json_parser *p) { 11102 end_subobject(p); 11103 11104 if (is_wellknown_msg(p, UPB_WELLKNOWN_STRUCT)) { 11105 end_structvalue_object(p); 11106 if (!is_top_level(p)) { 11107 end_subobject(p); 11108 } 11109 } 11110 11111 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 11112 end_value_object(p); 11113 if (!is_top_level(p)) { 11114 end_subobject(p); 11115 } 11116 } 11117} 11118 11119static bool start_array(upb_json_parser *p) { 11120 upb_jsonparser_frame *inner; 11121 upb_selector_t sel; 11122 11123 if (is_top_level(p)) { 11124 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 11125 start_value_object(p, VALUE_LISTVALUE); 11126 if (!start_subobject(p)) return false; 11127 start_listvalue_object(p); 11128 } else if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { 11129 start_listvalue_object(p); 11130 } else { 11131 return false; 11132 } 11133 } else if (is_wellknown_field(p, UPB_WELLKNOWN_LISTVALUE) && 11134 (!upb_fielddef_isseq(p->top->f) || 11135 p->top->is_repeated)) { 11136 if (!start_subobject(p)) return false; 11137 start_listvalue_object(p); 11138 } else if (is_wellknown_field(p, UPB_WELLKNOWN_VALUE) && 11139 (!upb_fielddef_isseq(p->top->f) || 11140 p->top->is_repeated)) { 11141 if (!start_subobject(p)) return false; 11142 start_value_object(p, VALUE_LISTVALUE); 11143 if (!start_subobject(p)) return false; 11144 start_listvalue_object(p); 11145 } 11146 11147 if (p->top->is_unknown_field) { 11148 inner = start_jsonparser_frame(p); 11149 inner->is_unknown_field = true; 11150 p->top = inner; 11151 11152 return true; 11153 } 11154 11155 if (!upb_fielddef_isseq(p->top->f)) { 11156 upb_status_seterrf(p->status, 11157 "Array specified for non-repeated field: %s", 11158 upb_fielddef_name(p->top->f)); 11159 return false; 11160 } 11161 11162 if (!check_stack(p)) return false; 11163 11164 inner = start_jsonparser_frame(p); 11165 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSEQ); 11166 upb_sink_startseq(p->top->sink, sel, &inner->sink); 11167 inner->m = p->top->m; 11168 inner->f = p->top->f; 11169 inner->is_repeated = true; 11170 p->top = inner; 11171 11172 return true; 11173} 11174 11175static void end_array(upb_json_parser *p) { 11176 upb_selector_t sel; 11177 11178 UPB_ASSERT(p->top > p->stack); 11179 11180 p->top--; 11181 11182 if (p->top->is_unknown_field) { 11183 return; 11184 } 11185 11186 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSEQ); 11187 upb_sink_endseq(p->top->sink, sel); 11188 11189 if (is_wellknown_msg(p, UPB_WELLKNOWN_LISTVALUE)) { 11190 end_listvalue_object(p); 11191 if (!is_top_level(p)) { 11192 end_subobject(p); 11193 } 11194 } 11195 11196 if (is_wellknown_msg(p, UPB_WELLKNOWN_VALUE)) { 11197 end_value_object(p); 11198 if (!is_top_level(p)) { 11199 end_subobject(p); 11200 } 11201 } 11202} 11203 11204static void start_object(upb_json_parser *p) { 11205 if (!p->top->is_map && p->top->m != NULL) { 11206 upb_sink_startmsg(p->top->sink); 11207 } 11208} 11209 11210static void end_object(upb_json_parser *p) { 11211 if (!p->top->is_map && p->top->m != NULL) { 11212 upb_sink_endmsg(p->top->sink, p->status); 11213 } 11214} 11215 11216static void start_any_object(upb_json_parser *p, const char *ptr) { 11217 start_object(p); 11218 p->top->any_frame->before_type_url_start = ptr; 11219 p->top->any_frame->before_type_url_end = ptr; 11220} 11221 11222static bool end_any_object(upb_json_parser *p, const char *ptr) { 11223 const char *value_membername = "value"; 11224 bool is_well_known_packed = false; 11225 const char *packed_end = ptr + 1; 11226 upb_selector_t sel; 11227 upb_jsonparser_frame *inner; 11228 11229 if (json_parser_any_frame_has_value(p->top->any_frame) && 11230 !json_parser_any_frame_has_type_url(p->top->any_frame)) { 11231 upb_status_seterrmsg(p->status, "No valid type url"); 11232 return false; 11233 } 11234 11235 /* Well known types data is represented as value field. */ 11236 if (upb_msgdef_wellknowntype(p->top->any_frame->parser->top->m) != 11237 UPB_WELLKNOWN_UNSPECIFIED) { 11238 is_well_known_packed = true; 11239 11240 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { 11241 p->top->any_frame->before_type_url_start = 11242 memchr(p->top->any_frame->before_type_url_start, ':', 11243 p->top->any_frame->before_type_url_end - 11244 p->top->any_frame->before_type_url_start); 11245 if (p->top->any_frame->before_type_url_start == NULL) { 11246 upb_status_seterrmsg(p->status, "invalid data for well known type."); 11247 return false; 11248 } 11249 p->top->any_frame->before_type_url_start++; 11250 } 11251 11252 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 11253 p->top->any_frame->after_type_url_start = 11254 memchr(p->top->any_frame->after_type_url_start, ':', 11255 (ptr + 1) - 11256 p->top->any_frame->after_type_url_start); 11257 if (p->top->any_frame->after_type_url_start == NULL) { 11258 upb_status_seterrmsg(p->status, "Invalid data for well known type."); 11259 return false; 11260 } 11261 p->top->any_frame->after_type_url_start++; 11262 packed_end = ptr; 11263 } 11264 } 11265 11266 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame)) { 11267 if (!parse(p->top->any_frame->parser, NULL, 11268 p->top->any_frame->before_type_url_start, 11269 p->top->any_frame->before_type_url_end - 11270 p->top->any_frame->before_type_url_start, NULL)) { 11271 return false; 11272 } 11273 } else { 11274 if (!is_well_known_packed) { 11275 if (!parse(p->top->any_frame->parser, NULL, "{", 1, NULL)) { 11276 return false; 11277 } 11278 } 11279 } 11280 11281 if (json_parser_any_frame_has_value_before_type_url(p->top->any_frame) && 11282 json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 11283 if (!parse(p->top->any_frame->parser, NULL, ",", 1, NULL)) { 11284 return false; 11285 } 11286 } 11287 11288 if (json_parser_any_frame_has_value_after_type_url(p->top->any_frame)) { 11289 if (!parse(p->top->any_frame->parser, NULL, 11290 p->top->any_frame->after_type_url_start, 11291 packed_end - p->top->any_frame->after_type_url_start, NULL)) { 11292 return false; 11293 } 11294 } else { 11295 if (!is_well_known_packed) { 11296 if (!parse(p->top->any_frame->parser, NULL, "}", 1, NULL)) { 11297 return false; 11298 } 11299 } 11300 } 11301 11302 if (!end(p->top->any_frame->parser, NULL)) { 11303 return false; 11304 } 11305 11306 p->top->is_any = false; 11307 11308 /* Set value */ 11309 start_member(p); 11310 capture_begin(p, value_membername); 11311 capture_end(p, value_membername + 5); 11312 end_membername(p); 11313 11314 if (!check_stack(p)) return false; 11315 inner = p->top + 1; 11316 11317 sel = getsel_for_handlertype(p, UPB_HANDLER_STARTSTR); 11318 upb_sink_startstr(p->top->sink, sel, 0, &inner->sink); 11319 sel = getsel_for_handlertype(p, UPB_HANDLER_STRING); 11320 upb_sink_putstring(inner->sink, sel, p->top->any_frame->stringsink.ptr, 11321 p->top->any_frame->stringsink.len, NULL); 11322 sel = getsel_for_handlertype(p, UPB_HANDLER_ENDSTR); 11323 upb_sink_endstr(inner->sink, sel); 11324 11325 end_member(p); 11326 11327 end_object(p); 11328 11329 /* Deallocate any parse frame. */ 11330 json_parser_any_frame_free(p->top->any_frame); 11331 11332 return true; 11333} 11334 11335static bool is_string_wrapper(const upb_msgdef *m) { 11336 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); 11337 return type == UPB_WELLKNOWN_STRINGVALUE || 11338 type == UPB_WELLKNOWN_BYTESVALUE; 11339} 11340 11341static bool is_fieldmask(const upb_msgdef *m) { 11342 upb_wellknowntype_t type = upb_msgdef_wellknowntype(m); 11343 return type == UPB_WELLKNOWN_FIELDMASK; 11344} 11345 11346static void start_fieldmask_object(upb_json_parser *p) { 11347 const char *membername = "paths"; 11348 11349 start_object(p); 11350 11351 /* Set up context for parsing value */ 11352 start_member(p); 11353 capture_begin(p, membername); 11354 capture_end(p, membername + 5); 11355 end_membername(p); 11356 11357 start_array(p); 11358} 11359 11360static void end_fieldmask_object(upb_json_parser *p) { 11361 end_array(p); 11362 end_member(p); 11363 end_object(p); 11364} 11365 11366static void start_wrapper_object(upb_json_parser *p) { 11367 const char *membername = "value"; 11368 11369 start_object(p); 11370 11371 /* Set up context for parsing value */ 11372 start_member(p); 11373 capture_begin(p, membername); 11374 capture_end(p, membername + 5); 11375 end_membername(p); 11376} 11377 11378static void end_wrapper_object(upb_json_parser *p) { 11379 end_member(p); 11380 end_object(p); 11381} 11382 11383static void start_value_object(upb_json_parser *p, int value_type) { 11384 const char *nullmember = "null_value"; 11385 const char *numbermember = "number_value"; 11386 const char *stringmember = "string_value"; 11387 const char *boolmember = "bool_value"; 11388 const char *structmember = "struct_value"; 11389 const char *listmember = "list_value"; 11390 const char *membername = ""; 11391 11392 switch (value_type) { 11393 case VALUE_NULLVALUE: 11394 membername = nullmember; 11395 break; 11396 case VALUE_NUMBERVALUE: 11397 membername = numbermember; 11398 break; 11399 case VALUE_STRINGVALUE: 11400 membername = stringmember; 11401 break; 11402 case VALUE_BOOLVALUE: 11403 membername = boolmember; 11404 break; 11405 case VALUE_STRUCTVALUE: 11406 membername = structmember; 11407 break; 11408 case VALUE_LISTVALUE: 11409 membername = listmember; 11410 break; 11411 } 11412 11413 start_object(p); 11414 11415 /* Set up context for parsing value */ 11416 start_member(p); 11417 capture_begin(p, membername); 11418 capture_end(p, membername + strlen(membername)); 11419 end_membername(p); 11420} 11421 11422static void end_value_object(upb_json_parser *p) { 11423 end_member(p); 11424 end_object(p); 11425} 11426 11427static void start_listvalue_object(upb_json_parser *p) { 11428 const char *membername = "values"; 11429 11430 start_object(p); 11431 11432 /* Set up context for parsing value */ 11433 start_member(p); 11434 capture_begin(p, membername); 11435 capture_end(p, membername + strlen(membername)); 11436 end_membername(p); 11437} 11438 11439static void end_listvalue_object(upb_json_parser *p) { 11440 end_member(p); 11441 end_object(p); 11442} 11443 11444static void start_structvalue_object(upb_json_parser *p) { 11445 const char *membername = "fields"; 11446 11447 start_object(p); 11448 11449 /* Set up context for parsing value */ 11450 start_member(p); 11451 capture_begin(p, membername); 11452 capture_end(p, membername + strlen(membername)); 11453 end_membername(p); 11454} 11455 11456static void end_structvalue_object(upb_json_parser *p) { 11457 end_member(p); 11458 end_object(p); 11459} 11460 11461static bool is_top_level(upb_json_parser *p) { 11462 return p->top == p->stack && p->top->f == NULL && !p->top->is_unknown_field; 11463} 11464 11465static bool is_wellknown_msg(upb_json_parser *p, upb_wellknowntype_t type) { 11466 return p->top->m != NULL && upb_msgdef_wellknowntype(p->top->m) == type; 11467} 11468 11469static bool is_wellknown_field(upb_json_parser *p, upb_wellknowntype_t type) { 11470 return p->top->f != NULL && 11471 upb_fielddef_issubmsg(p->top->f) && 11472 (upb_msgdef_wellknowntype(upb_fielddef_msgsubdef(p->top->f)) 11473 == type); 11474} 11475 11476static bool does_number_wrapper_start(upb_json_parser *p) { 11477 return p->top->f != NULL && 11478 upb_fielddef_issubmsg(p->top->f) && 11479 upb_msgdef_isnumberwrapper(upb_fielddef_msgsubdef(p->top->f)); 11480} 11481 11482static bool does_number_wrapper_end(upb_json_parser *p) { 11483 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); 11484} 11485 11486static bool is_number_wrapper_object(upb_json_parser *p) { 11487 return p->top->m != NULL && upb_msgdef_isnumberwrapper(p->top->m); 11488} 11489 11490static bool does_string_wrapper_start(upb_json_parser *p) { 11491 return p->top->f != NULL && 11492 upb_fielddef_issubmsg(p->top->f) && 11493 is_string_wrapper(upb_fielddef_msgsubdef(p->top->f)); 11494} 11495 11496static bool does_string_wrapper_end(upb_json_parser *p) { 11497 return p->top->m != NULL && is_string_wrapper(p->top->m); 11498} 11499 11500static bool is_string_wrapper_object(upb_json_parser *p) { 11501 return p->top->m != NULL && is_string_wrapper(p->top->m); 11502} 11503 11504static bool does_fieldmask_start(upb_json_parser *p) { 11505 return p->top->f != NULL && 11506 upb_fielddef_issubmsg(p->top->f) && 11507 is_fieldmask(upb_fielddef_msgsubdef(p->top->f)); 11508} 11509 11510static bool does_fieldmask_end(upb_json_parser *p) { 11511 return p->top->m != NULL && is_fieldmask(p->top->m); 11512} 11513 11514#define CHECK_RETURN_TOP(x) if (!(x)) goto error 11515 11516 11517/* The actual parser **********************************************************/ 11518 11519/* What follows is the Ragel parser itself. The language is specified in Ragel 11520 * and the actions call our C functions above. 11521 * 11522 * Ragel has an extensive set of functionality, and we use only a small part of 11523 * it. There are many action types but we only use a few: 11524 * 11525 * ">" -- transition into a machine 11526 * "%" -- transition out of a machine 11527 * "@" -- transition into a final state of a machine. 11528 * 11529 * "@" transitions are tricky because a machine can transition into a final 11530 * state repeatedly. But in some cases we know this can't happen, for example 11531 * a string which is delimited by a final '"' can only transition into its 11532 * final state once, when the closing '"' is seen. */ 11533 11534 11535#line 2780 "upb/json/parser.rl" 11536 11537 11538 11539#line 2583 "upb/json/parser.c" 11540static const char _json_actions[] = { 11541 0, 1, 0, 1, 1, 1, 3, 1, 11542 4, 1, 6, 1, 7, 1, 8, 1, 11543 9, 1, 11, 1, 12, 1, 13, 1, 11544 14, 1, 15, 1, 16, 1, 17, 1, 11545 18, 1, 19, 1, 20, 1, 22, 1, 11546 23, 1, 24, 1, 35, 1, 37, 1, 11547 39, 1, 40, 1, 42, 1, 43, 1, 11548 44, 1, 46, 1, 48, 1, 49, 1, 11549 50, 1, 51, 1, 53, 1, 54, 2, 11550 4, 9, 2, 5, 6, 2, 7, 3, 11551 2, 7, 9, 2, 21, 26, 2, 25, 11552 10, 2, 27, 28, 2, 29, 30, 2, 11553 32, 34, 2, 33, 31, 2, 38, 36, 11554 2, 40, 42, 2, 45, 2, 2, 46, 11555 54, 2, 47, 36, 2, 49, 54, 2, 11556 50, 54, 2, 51, 54, 2, 52, 41, 11557 2, 53, 54, 3, 32, 34, 35, 4, 11558 21, 26, 27, 28 11559}; 11560 11561static const short _json_key_offsets[] = { 11562 0, 0, 12, 13, 18, 23, 28, 29, 11563 30, 31, 32, 33, 34, 35, 36, 37, 11564 38, 43, 44, 48, 53, 58, 63, 67, 11565 71, 74, 77, 79, 83, 87, 89, 91, 11566 96, 98, 100, 109, 115, 121, 127, 133, 11567 135, 139, 142, 144, 146, 149, 150, 154, 11568 156, 158, 160, 162, 163, 165, 167, 168, 11569 170, 172, 173, 175, 177, 178, 180, 182, 11570 183, 185, 187, 191, 193, 195, 196, 197, 11571 198, 199, 201, 206, 208, 210, 212, 221, 11572 222, 222, 222, 227, 232, 237, 238, 239, 11573 240, 241, 241, 242, 243, 244, 244, 245, 11574 246, 247, 247, 252, 253, 257, 262, 267, 11575 272, 276, 276, 279, 282, 285, 288, 291, 11576 294, 294, 294, 294, 294, 294 11577}; 11578 11579static const char _json_trans_keys[] = { 11580 32, 34, 45, 91, 102, 110, 116, 123, 11581 9, 13, 48, 57, 34, 32, 93, 125, 11582 9, 13, 32, 44, 93, 9, 13, 32, 11583 93, 125, 9, 13, 97, 108, 115, 101, 11584 117, 108, 108, 114, 117, 101, 32, 34, 11585 125, 9, 13, 34, 32, 58, 9, 13, 11586 32, 93, 125, 9, 13, 32, 44, 125, 11587 9, 13, 32, 44, 125, 9, 13, 32, 11588 34, 9, 13, 45, 48, 49, 57, 48, 11589 49, 57, 46, 69, 101, 48, 57, 69, 11590 101, 48, 57, 43, 45, 48, 57, 48, 11591 57, 48, 57, 46, 69, 101, 48, 57, 11592 34, 92, 34, 92, 34, 47, 92, 98, 11593 102, 110, 114, 116, 117, 48, 57, 65, 11594 70, 97, 102, 48, 57, 65, 70, 97, 11595 102, 48, 57, 65, 70, 97, 102, 48, 11596 57, 65, 70, 97, 102, 34, 92, 45, 11597 48, 49, 57, 48, 49, 57, 46, 115, 11598 48, 57, 115, 48, 57, 34, 46, 115, 11599 48, 57, 48, 57, 48, 57, 48, 57, 11600 48, 57, 45, 48, 57, 48, 57, 45, 11601 48, 57, 48, 57, 84, 48, 57, 48, 11602 57, 58, 48, 57, 48, 57, 58, 48, 11603 57, 48, 57, 43, 45, 46, 90, 48, 11604 57, 48, 57, 58, 48, 48, 34, 48, 11605 57, 43, 45, 90, 48, 57, 34, 44, 11606 34, 44, 34, 44, 34, 45, 91, 102, 11607 110, 116, 123, 48, 57, 34, 32, 93, 11608 125, 9, 13, 32, 44, 93, 9, 13, 11609 32, 93, 125, 9, 13, 97, 108, 115, 11610 101, 117, 108, 108, 114, 117, 101, 32, 11611 34, 125, 9, 13, 34, 32, 58, 9, 11612 13, 32, 93, 125, 9, 13, 32, 44, 11613 125, 9, 13, 32, 44, 125, 9, 13, 11614 32, 34, 9, 13, 32, 9, 13, 32, 11615 9, 13, 32, 9, 13, 32, 9, 13, 11616 32, 9, 13, 32, 9, 13, 0 11617}; 11618 11619static const char _json_single_lengths[] = { 11620 0, 8, 1, 3, 3, 3, 1, 1, 11621 1, 1, 1, 1, 1, 1, 1, 1, 11622 3, 1, 2, 3, 3, 3, 2, 2, 11623 1, 3, 0, 2, 2, 0, 0, 3, 11624 2, 2, 9, 0, 0, 0, 0, 2, 11625 2, 1, 2, 0, 1, 1, 2, 0, 11626 0, 0, 0, 1, 0, 0, 1, 0, 11627 0, 1, 0, 0, 1, 0, 0, 1, 11628 0, 0, 4, 0, 0, 1, 1, 1, 11629 1, 0, 3, 2, 2, 2, 7, 1, 11630 0, 0, 3, 3, 3, 1, 1, 1, 11631 1, 0, 1, 1, 1, 0, 1, 1, 11632 1, 0, 3, 1, 2, 3, 3, 3, 11633 2, 0, 1, 1, 1, 1, 1, 1, 11634 0, 0, 0, 0, 0, 0 11635}; 11636 11637static const char _json_range_lengths[] = { 11638 0, 2, 0, 1, 1, 1, 0, 0, 11639 0, 0, 0, 0, 0, 0, 0, 0, 11640 1, 0, 1, 1, 1, 1, 1, 1, 11641 1, 0, 1, 1, 1, 1, 1, 1, 11642 0, 0, 0, 3, 3, 3, 3, 0, 11643 1, 1, 0, 1, 1, 0, 1, 1, 11644 1, 1, 1, 0, 1, 1, 0, 1, 11645 1, 0, 1, 1, 0, 1, 1, 0, 11646 1, 1, 0, 1, 1, 0, 0, 0, 11647 0, 1, 1, 0, 0, 0, 1, 0, 11648 0, 0, 1, 1, 1, 0, 0, 0, 11649 0, 0, 0, 0, 0, 0, 0, 0, 11650 0, 0, 1, 0, 1, 1, 1, 1, 11651 1, 0, 1, 1, 1, 1, 1, 1, 11652 0, 0, 0, 0, 0, 0 11653}; 11654 11655static const short _json_index_offsets[] = { 11656 0, 0, 11, 13, 18, 23, 28, 30, 11657 32, 34, 36, 38, 40, 42, 44, 46, 11658 48, 53, 55, 59, 64, 69, 74, 78, 11659 82, 85, 89, 91, 95, 99, 101, 103, 11660 108, 111, 114, 124, 128, 132, 136, 140, 11661 143, 147, 150, 153, 155, 158, 160, 164, 11662 166, 168, 170, 172, 174, 176, 178, 180, 11663 182, 184, 186, 188, 190, 192, 194, 196, 11664 198, 200, 202, 207, 209, 211, 213, 215, 11665 217, 219, 221, 226, 229, 232, 235, 244, 11666 246, 247, 248, 253, 258, 263, 265, 267, 11667 269, 271, 272, 274, 276, 278, 279, 281, 11668 283, 285, 286, 291, 293, 297, 302, 307, 11669 312, 316, 317, 320, 323, 326, 329, 332, 11670 335, 336, 337, 338, 339, 340 11671}; 11672 11673static const unsigned char _json_indicies[] = { 11674 0, 2, 3, 4, 5, 6, 7, 8, 11675 0, 3, 1, 9, 1, 11, 12, 1, 11676 11, 10, 13, 14, 12, 13, 1, 14, 11677 1, 1, 14, 10, 15, 1, 16, 1, 11678 17, 1, 18, 1, 19, 1, 20, 1, 11679 21, 1, 22, 1, 23, 1, 24, 1, 11680 25, 26, 27, 25, 1, 28, 1, 29, 11681 30, 29, 1, 30, 1, 1, 30, 31, 11682 32, 33, 34, 32, 1, 35, 36, 27, 11683 35, 1, 36, 26, 36, 1, 37, 38, 11684 39, 1, 38, 39, 1, 41, 42, 42, 11685 40, 43, 1, 42, 42, 43, 40, 44, 11686 44, 45, 1, 45, 1, 45, 40, 41, 11687 42, 42, 39, 40, 47, 48, 46, 50, 11688 51, 49, 52, 52, 52, 52, 52, 52, 11689 52, 52, 53, 1, 54, 54, 54, 1, 11690 55, 55, 55, 1, 56, 56, 56, 1, 11691 57, 57, 57, 1, 59, 60, 58, 61, 11692 62, 63, 1, 64, 65, 1, 66, 67, 11693 1, 68, 1, 67, 68, 1, 69, 1, 11694 66, 67, 65, 1, 70, 1, 71, 1, 11695 72, 1, 73, 1, 74, 1, 75, 1, 11696 76, 1, 77, 1, 78, 1, 79, 1, 11697 80, 1, 81, 1, 82, 1, 83, 1, 11698 84, 1, 85, 1, 86, 1, 87, 1, 11699 88, 1, 89, 89, 90, 91, 1, 92, 11700 1, 93, 1, 94, 1, 95, 1, 96, 11701 1, 97, 1, 98, 1, 99, 99, 100, 11702 98, 1, 102, 1, 101, 104, 105, 103, 11703 1, 1, 101, 106, 107, 108, 109, 110, 11704 111, 112, 107, 1, 113, 1, 114, 115, 11705 117, 118, 1, 117, 116, 119, 120, 118, 11706 119, 1, 120, 1, 1, 120, 116, 121, 11707 1, 122, 1, 123, 1, 124, 1, 125, 11708 126, 1, 127, 1, 128, 1, 129, 130, 11709 1, 131, 1, 132, 1, 133, 134, 135, 11710 136, 134, 1, 137, 1, 138, 139, 138, 11711 1, 139, 1, 1, 139, 140, 141, 142, 11712 143, 141, 1, 144, 145, 136, 144, 1, 11713 145, 135, 145, 1, 146, 147, 147, 1, 11714 148, 148, 1, 149, 149, 1, 150, 150, 11715 1, 151, 151, 1, 152, 152, 1, 1, 11716 1, 1, 1, 1, 1, 0 11717}; 11718 11719static const char _json_trans_targs[] = { 11720 1, 0, 2, 107, 3, 6, 10, 13, 11721 16, 106, 4, 3, 106, 4, 5, 7, 11722 8, 9, 108, 11, 12, 109, 14, 15, 11723 110, 16, 17, 111, 18, 18, 19, 20, 11724 21, 22, 111, 21, 22, 24, 25, 31, 11725 112, 26, 28, 27, 29, 30, 33, 113, 11726 34, 33, 113, 34, 32, 35, 36, 37, 11727 38, 39, 33, 113, 34, 41, 42, 46, 11728 42, 46, 43, 45, 44, 114, 48, 49, 11729 50, 51, 52, 53, 54, 55, 56, 57, 11730 58, 59, 60, 61, 62, 63, 64, 65, 11731 66, 67, 73, 72, 68, 69, 70, 71, 11732 72, 115, 74, 67, 72, 76, 116, 76, 11733 116, 77, 79, 81, 82, 85, 90, 94, 11734 98, 80, 117, 117, 83, 82, 80, 83, 11735 84, 86, 87, 88, 89, 117, 91, 92, 11736 93, 117, 95, 96, 97, 117, 98, 99, 11737 105, 100, 100, 101, 102, 103, 104, 105, 11738 103, 104, 117, 106, 106, 106, 106, 106, 11739 106 11740}; 11741 11742static const unsigned char _json_trans_actions[] = { 11743 0, 0, 113, 107, 53, 0, 0, 0, 11744 125, 59, 45, 0, 55, 0, 0, 0, 11745 0, 0, 0, 0, 0, 0, 0, 0, 11746 0, 0, 101, 51, 47, 0, 0, 45, 11747 49, 49, 104, 0, 0, 0, 0, 0, 11748 3, 0, 0, 0, 0, 0, 5, 15, 11749 0, 0, 71, 7, 13, 0, 74, 9, 11750 9, 9, 77, 80, 11, 37, 37, 37, 11751 0, 0, 0, 39, 0, 41, 86, 0, 11752 0, 0, 17, 19, 0, 21, 23, 0, 11753 25, 27, 0, 29, 31, 0, 33, 35, 11754 0, 135, 83, 135, 0, 0, 0, 0, 11755 0, 92, 0, 89, 89, 98, 43, 0, 11756 131, 95, 113, 107, 53, 0, 0, 0, 11757 125, 59, 69, 110, 45, 0, 55, 0, 11758 0, 0, 0, 0, 0, 119, 0, 0, 11759 0, 122, 0, 0, 0, 116, 0, 101, 11760 51, 47, 0, 0, 45, 49, 49, 104, 11761 0, 0, 128, 0, 57, 63, 65, 61, 11762 67 11763}; 11764 11765static const unsigned char _json_eof_actions[] = { 11766 0, 0, 0, 0, 0, 0, 0, 0, 11767 0, 0, 0, 0, 0, 0, 0, 0, 11768 0, 0, 0, 0, 0, 0, 0, 0, 11769 0, 1, 0, 1, 0, 0, 1, 1, 11770 0, 0, 0, 0, 0, 0, 0, 0, 11771 0, 0, 0, 0, 0, 0, 0, 0, 11772 0, 0, 0, 0, 0, 0, 0, 0, 11773 0, 0, 0, 0, 0, 0, 0, 0, 11774 0, 0, 0, 0, 0, 0, 0, 0, 11775 0, 0, 0, 0, 0, 0, 0, 0, 11776 0, 0, 0, 0, 0, 0, 0, 0, 11777 0, 0, 0, 0, 0, 0, 0, 0, 11778 0, 0, 0, 0, 0, 0, 0, 0, 11779 0, 0, 0, 57, 63, 65, 61, 67, 11780 0, 0, 0, 0, 0, 0 11781}; 11782 11783static const int json_start = 1; 11784 11785static const int json_en_number_machine = 23; 11786static const int json_en_string_machine = 32; 11787static const int json_en_duration_machine = 40; 11788static const int json_en_timestamp_machine = 47; 11789static const int json_en_fieldmask_machine = 75; 11790static const int json_en_value_machine = 78; 11791static const int json_en_main = 1; 11792 11793 11794#line 2783 "upb/json/parser.rl" 11795 11796size_t parse(void *closure, const void *hd, const char *buf, size_t size, 11797 const upb_bufhandle *handle) { 11798 upb_json_parser *parser = closure; 11799 11800 /* Variables used by Ragel's generated code. */ 11801 int cs = parser->current_state; 11802 int *stack = parser->parser_stack; 11803 int top = parser->parser_top; 11804 11805 const char *p = buf; 11806 const char *pe = buf + size; 11807 const char *eof = &eof_ch; 11808 11809 parser->handle = handle; 11810 11811 UPB_UNUSED(hd); 11812 UPB_UNUSED(handle); 11813 11814 capture_resume(parser, buf); 11815 11816 11817#line 2861 "upb/json/parser.c" 11818 { 11819 int _klen; 11820 unsigned int _trans; 11821 const char *_acts; 11822 unsigned int _nacts; 11823 const char *_keys; 11824 11825 if ( p == pe ) 11826 goto _test_eof; 11827 if ( cs == 0 ) 11828 goto _out; 11829_resume: 11830 _keys = _json_trans_keys + _json_key_offsets[cs]; 11831 _trans = _json_index_offsets[cs]; 11832 11833 _klen = _json_single_lengths[cs]; 11834 if ( _klen > 0 ) { 11835 const char *_lower = _keys; 11836 const char *_mid; 11837 const char *_upper = _keys + _klen - 1; 11838 while (1) { 11839 if ( _upper < _lower ) 11840 break; 11841 11842 _mid = _lower + ((_upper-_lower) >> 1); 11843 if ( (*p) < *_mid ) 11844 _upper = _mid - 1; 11845 else if ( (*p) > *_mid ) 11846 _lower = _mid + 1; 11847 else { 11848 _trans += (unsigned int)(_mid - _keys); 11849 goto _match; 11850 } 11851 } 11852 _keys += _klen; 11853 _trans += _klen; 11854 } 11855 11856 _klen = _json_range_lengths[cs]; 11857 if ( _klen > 0 ) { 11858 const char *_lower = _keys; 11859 const char *_mid; 11860 const char *_upper = _keys + (_klen<<1) - 2; 11861 while (1) { 11862 if ( _upper < _lower ) 11863 break; 11864 11865 _mid = _lower + (((_upper-_lower) >> 1) & ~1); 11866 if ( (*p) < _mid[0] ) 11867 _upper = _mid - 2; 11868 else if ( (*p) > _mid[1] ) 11869 _lower = _mid + 2; 11870 else { 11871 _trans += (unsigned int)((_mid - _keys)>>1); 11872 goto _match; 11873 } 11874 } 11875 _trans += _klen; 11876 } 11877 11878_match: 11879 _trans = _json_indicies[_trans]; 11880 cs = _json_trans_targs[_trans]; 11881 11882 if ( _json_trans_actions[_trans] == 0 ) 11883 goto _again; 11884 11885 _acts = _json_actions + _json_trans_actions[_trans]; 11886 _nacts = (unsigned int) *_acts++; 11887 while ( _nacts-- > 0 ) 11888 { 11889 switch ( *_acts++ ) 11890 { 11891 case 1: 11892#line 2588 "upb/json/parser.rl" 11893 { p--; {cs = stack[--top]; goto _again;} } 11894 break; 11895 case 2: 11896#line 2590 "upb/json/parser.rl" 11897 { p--; {stack[top++] = cs; cs = 23;goto _again;} } 11898 break; 11899 case 3: 11900#line 2594 "upb/json/parser.rl" 11901 { start_text(parser, p); } 11902 break; 11903 case 4: 11904#line 2595 "upb/json/parser.rl" 11905 { CHECK_RETURN_TOP(end_text(parser, p)); } 11906 break; 11907 case 5: 11908#line 2601 "upb/json/parser.rl" 11909 { start_hex(parser); } 11910 break; 11911 case 6: 11912#line 2602 "upb/json/parser.rl" 11913 { hexdigit(parser, p); } 11914 break; 11915 case 7: 11916#line 2603 "upb/json/parser.rl" 11917 { CHECK_RETURN_TOP(end_hex(parser)); } 11918 break; 11919 case 8: 11920#line 2609 "upb/json/parser.rl" 11921 { CHECK_RETURN_TOP(escape(parser, p)); } 11922 break; 11923 case 9: 11924#line 2615 "upb/json/parser.rl" 11925 { p--; {cs = stack[--top]; goto _again;} } 11926 break; 11927 case 10: 11928#line 2620 "upb/json/parser.rl" 11929 { start_year(parser, p); } 11930 break; 11931 case 11: 11932#line 2621 "upb/json/parser.rl" 11933 { CHECK_RETURN_TOP(end_year(parser, p)); } 11934 break; 11935 case 12: 11936#line 2625 "upb/json/parser.rl" 11937 { start_month(parser, p); } 11938 break; 11939 case 13: 11940#line 2626 "upb/json/parser.rl" 11941 { CHECK_RETURN_TOP(end_month(parser, p)); } 11942 break; 11943 case 14: 11944#line 2630 "upb/json/parser.rl" 11945 { start_day(parser, p); } 11946 break; 11947 case 15: 11948#line 2631 "upb/json/parser.rl" 11949 { CHECK_RETURN_TOP(end_day(parser, p)); } 11950 break; 11951 case 16: 11952#line 2635 "upb/json/parser.rl" 11953 { start_hour(parser, p); } 11954 break; 11955 case 17: 11956#line 2636 "upb/json/parser.rl" 11957 { CHECK_RETURN_TOP(end_hour(parser, p)); } 11958 break; 11959 case 18: 11960#line 2640 "upb/json/parser.rl" 11961 { start_minute(parser, p); } 11962 break; 11963 case 19: 11964#line 2641 "upb/json/parser.rl" 11965 { CHECK_RETURN_TOP(end_minute(parser, p)); } 11966 break; 11967 case 20: 11968#line 2645 "upb/json/parser.rl" 11969 { start_second(parser, p); } 11970 break; 11971 case 21: 11972#line 2646 "upb/json/parser.rl" 11973 { CHECK_RETURN_TOP(end_second(parser, p)); } 11974 break; 11975 case 22: 11976#line 2651 "upb/json/parser.rl" 11977 { start_duration_base(parser, p); } 11978 break; 11979 case 23: 11980#line 2652 "upb/json/parser.rl" 11981 { CHECK_RETURN_TOP(end_duration_base(parser, p)); } 11982 break; 11983 case 24: 11984#line 2654 "upb/json/parser.rl" 11985 { p--; {cs = stack[--top]; goto _again;} } 11986 break; 11987 case 25: 11988#line 2659 "upb/json/parser.rl" 11989 { start_timestamp_base(parser); } 11990 break; 11991 case 26: 11992#line 2661 "upb/json/parser.rl" 11993 { start_timestamp_fraction(parser, p); } 11994 break; 11995 case 27: 11996#line 2662 "upb/json/parser.rl" 11997 { CHECK_RETURN_TOP(end_timestamp_fraction(parser, p)); } 11998 break; 11999 case 28: 12000#line 2664 "upb/json/parser.rl" 12001 { start_timestamp_zone(parser, p); } 12002 break; 12003 case 29: 12004#line 2665 "upb/json/parser.rl" 12005 { CHECK_RETURN_TOP(end_timestamp_zone(parser, p)); } 12006 break; 12007 case 30: 12008#line 2667 "upb/json/parser.rl" 12009 { p--; {cs = stack[--top]; goto _again;} } 12010 break; 12011 case 31: 12012#line 2672 "upb/json/parser.rl" 12013 { start_fieldmask_path_text(parser, p); } 12014 break; 12015 case 32: 12016#line 2673 "upb/json/parser.rl" 12017 { end_fieldmask_path_text(parser, p); } 12018 break; 12019 case 33: 12020#line 2678 "upb/json/parser.rl" 12021 { start_fieldmask_path(parser); } 12022 break; 12023 case 34: 12024#line 2679 "upb/json/parser.rl" 12025 { end_fieldmask_path(parser); } 12026 break; 12027 case 35: 12028#line 2685 "upb/json/parser.rl" 12029 { p--; {cs = stack[--top]; goto _again;} } 12030 break; 12031 case 36: 12032#line 2690 "upb/json/parser.rl" 12033 { 12034 if (is_wellknown_msg(parser, UPB_WELLKNOWN_TIMESTAMP)) { 12035 {stack[top++] = cs; cs = 47;goto _again;} 12036 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_DURATION)) { 12037 {stack[top++] = cs; cs = 40;goto _again;} 12038 } else if (is_wellknown_msg(parser, UPB_WELLKNOWN_FIELDMASK)) { 12039 {stack[top++] = cs; cs = 75;goto _again;} 12040 } else { 12041 {stack[top++] = cs; cs = 32;goto _again;} 12042 } 12043 } 12044 break; 12045 case 37: 12046#line 2703 "upb/json/parser.rl" 12047 { p--; {stack[top++] = cs; cs = 78;goto _again;} } 12048 break; 12049 case 38: 12050#line 2708 "upb/json/parser.rl" 12051 { 12052 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 12053 start_any_member(parser, p); 12054 } else { 12055 start_member(parser); 12056 } 12057 } 12058 break; 12059 case 39: 12060#line 2715 "upb/json/parser.rl" 12061 { CHECK_RETURN_TOP(end_membername(parser)); } 12062 break; 12063 case 40: 12064#line 2718 "upb/json/parser.rl" 12065 { 12066 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 12067 end_any_member(parser, p); 12068 } else { 12069 end_member(parser); 12070 } 12071 } 12072 break; 12073 case 41: 12074#line 2729 "upb/json/parser.rl" 12075 { 12076 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 12077 start_any_object(parser, p); 12078 } else { 12079 start_object(parser); 12080 } 12081 } 12082 break; 12083 case 42: 12084#line 2738 "upb/json/parser.rl" 12085 { 12086 if (is_wellknown_msg(parser, UPB_WELLKNOWN_ANY)) { 12087 CHECK_RETURN_TOP(end_any_object(parser, p)); 12088 } else { 12089 end_object(parser); 12090 } 12091 } 12092 break; 12093 case 43: 12094#line 2750 "upb/json/parser.rl" 12095 { CHECK_RETURN_TOP(start_array(parser)); } 12096 break; 12097 case 44: 12098#line 2754 "upb/json/parser.rl" 12099 { end_array(parser); } 12100 break; 12101 case 45: 12102#line 2759 "upb/json/parser.rl" 12103 { CHECK_RETURN_TOP(start_number(parser, p)); } 12104 break; 12105 case 46: 12106#line 2760 "upb/json/parser.rl" 12107 { CHECK_RETURN_TOP(end_number(parser, p)); } 12108 break; 12109 case 47: 12110#line 2762 "upb/json/parser.rl" 12111 { CHECK_RETURN_TOP(start_stringval(parser)); } 12112 break; 12113 case 48: 12114#line 2763 "upb/json/parser.rl" 12115 { CHECK_RETURN_TOP(end_stringval(parser)); } 12116 break; 12117 case 49: 12118#line 2765 "upb/json/parser.rl" 12119 { CHECK_RETURN_TOP(end_bool(parser, true)); } 12120 break; 12121 case 50: 12122#line 2767 "upb/json/parser.rl" 12123 { CHECK_RETURN_TOP(end_bool(parser, false)); } 12124 break; 12125 case 51: 12126#line 2769 "upb/json/parser.rl" 12127 { CHECK_RETURN_TOP(end_null(parser)); } 12128 break; 12129 case 52: 12130#line 2771 "upb/json/parser.rl" 12131 { CHECK_RETURN_TOP(start_subobject_full(parser)); } 12132 break; 12133 case 53: 12134#line 2772 "upb/json/parser.rl" 12135 { end_subobject_full(parser); } 12136 break; 12137 case 54: 12138#line 2777 "upb/json/parser.rl" 12139 { p--; {cs = stack[--top]; goto _again;} } 12140 break; 12141#line 3185 "upb/json/parser.c" 12142 } 12143 } 12144 12145_again: 12146 if ( cs == 0 ) 12147 goto _out; 12148 if ( ++p != pe ) 12149 goto _resume; 12150 _test_eof: {} 12151 if ( p == eof ) 12152 { 12153 const char *__acts = _json_actions + _json_eof_actions[cs]; 12154 unsigned int __nacts = (unsigned int) *__acts++; 12155 while ( __nacts-- > 0 ) { 12156 switch ( *__acts++ ) { 12157 case 0: 12158#line 2586 "upb/json/parser.rl" 12159 { p--; {cs = stack[--top]; if ( p == pe ) 12160 goto _test_eof; 12161goto _again;} } 12162 break; 12163 case 46: 12164#line 2760 "upb/json/parser.rl" 12165 { CHECK_RETURN_TOP(end_number(parser, p)); } 12166 break; 12167 case 49: 12168#line 2765 "upb/json/parser.rl" 12169 { CHECK_RETURN_TOP(end_bool(parser, true)); } 12170 break; 12171 case 50: 12172#line 2767 "upb/json/parser.rl" 12173 { CHECK_RETURN_TOP(end_bool(parser, false)); } 12174 break; 12175 case 51: 12176#line 2769 "upb/json/parser.rl" 12177 { CHECK_RETURN_TOP(end_null(parser)); } 12178 break; 12179 case 53: 12180#line 2772 "upb/json/parser.rl" 12181 { end_subobject_full(parser); } 12182 break; 12183#line 3227 "upb/json/parser.c" 12184 } 12185 } 12186 } 12187 12188 _out: {} 12189 } 12190 12191#line 2805 "upb/json/parser.rl" 12192 12193 if (p != pe) { 12194 upb_status_seterrf(parser->status, "Parse error at '%.*s'\n", pe - p, p); 12195 } else { 12196 capture_suspend(parser, &p); 12197 } 12198 12199error: 12200 /* Save parsing state back to parser. */ 12201 parser->current_state = cs; 12202 parser->parser_top = top; 12203 12204 return p - buf; 12205} 12206 12207static bool end(void *closure, const void *hd) { 12208 upb_json_parser *parser = closure; 12209 12210 /* Prevent compile warning on unused static constants. */ 12211 UPB_UNUSED(json_start); 12212 UPB_UNUSED(json_en_duration_machine); 12213 UPB_UNUSED(json_en_fieldmask_machine); 12214 UPB_UNUSED(json_en_number_machine); 12215 UPB_UNUSED(json_en_string_machine); 12216 UPB_UNUSED(json_en_timestamp_machine); 12217 UPB_UNUSED(json_en_value_machine); 12218 UPB_UNUSED(json_en_main); 12219 12220 parse(parser, hd, &eof_ch, 0, NULL); 12221 12222 return parser->current_state >= 106; 12223} 12224 12225static void json_parser_reset(upb_json_parser *p) { 12226 int cs; 12227 int top; 12228 12229 p->top = p->stack; 12230 init_frame(p->top); 12231 12232 /* Emit Ragel initialization of the parser. */ 12233 12234#line 3278 "upb/json/parser.c" 12235 { 12236 cs = json_start; 12237 top = 0; 12238 } 12239 12240#line 2847 "upb/json/parser.rl" 12241 p->current_state = cs; 12242 p->parser_top = top; 12243 accumulate_clear(p); 12244 p->multipart_state = MULTIPART_INACTIVE; 12245 p->capture = NULL; 12246 p->accumulated = NULL; 12247} 12248 12249static upb_json_parsermethod *parsermethod_new(upb_json_codecache *c, 12250 const upb_msgdef *md) { 12251 upb_msg_field_iter i; 12252 upb_alloc *alloc = upb_arena_alloc(c->arena); 12253 12254 upb_json_parsermethod *m = upb_malloc(alloc, sizeof(*m)); 12255 12256 m->cache = c; 12257 12258 upb_byteshandler_init(&m->input_handler_); 12259 upb_byteshandler_setstring(&m->input_handler_, parse, m); 12260 upb_byteshandler_setendstr(&m->input_handler_, end, m); 12261 12262 upb_strtable_init2(&m->name_table, UPB_CTYPE_CONSTPTR, alloc); 12263 12264 /* Build name_table */ 12265 12266 for(upb_msg_field_begin(&i, md); 12267 !upb_msg_field_done(&i); 12268 upb_msg_field_next(&i)) { 12269 const upb_fielddef *f = upb_msg_iter_field(&i); 12270 upb_value v = upb_value_constptr(f); 12271 const char *name; 12272 12273 /* Add an entry for the JSON name. */ 12274 name = upb_fielddef_jsonname(f); 12275 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); 12276 12277 if (strcmp(name, upb_fielddef_name(f)) != 0) { 12278 /* Since the JSON name is different from the regular field name, add an 12279 * entry for the raw name (compliant proto3 JSON parsers must accept 12280 * both). */ 12281 const char *name = upb_fielddef_name(f); 12282 upb_strtable_insert3(&m->name_table, name, strlen(name), v, alloc); 12283 } 12284 } 12285 12286 return m; 12287} 12288 12289/* Public API *****************************************************************/ 12290 12291upb_json_parser *upb_json_parser_create(upb_arena *arena, 12292 const upb_json_parsermethod *method, 12293 const upb_symtab* symtab, 12294 upb_sink output, 12295 upb_status *status, 12296 bool ignore_json_unknown) { 12297#ifndef NDEBUG 12298 const size_t size_before = upb_arena_bytesallocated(arena); 12299#endif 12300 upb_json_parser *p = upb_arena_malloc(arena, sizeof(upb_json_parser)); 12301 if (!p) return false; 12302 12303 p->arena = arena; 12304 p->method = method; 12305 p->status = status; 12306 p->limit = p->stack + UPB_JSON_MAX_DEPTH; 12307 p->accumulate_buf = NULL; 12308 p->accumulate_buf_size = 0; 12309 upb_bytessink_reset(&p->input_, &method->input_handler_, p); 12310 12311 json_parser_reset(p); 12312 p->top->sink = output; 12313 p->top->m = upb_handlers_msgdef(output.handlers); 12314 if (is_wellknown_msg(p, UPB_WELLKNOWN_ANY)) { 12315 p->top->is_any = true; 12316 p->top->any_frame = json_parser_any_frame_new(p); 12317 } else { 12318 p->top->is_any = false; 12319 p->top->any_frame = NULL; 12320 } 12321 set_name_table(p, p->top); 12322 p->symtab = symtab; 12323 12324 p->ignore_json_unknown = ignore_json_unknown; 12325 12326 /* If this fails, uncomment and increase the value in parser.h. */ 12327 /* fprintf(stderr, "%zd\n", upb_arena_bytesallocated(arena) - size_before); */ 12328 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(arena) - size_before <= 12329 UPB_JSON_PARSER_SIZE); 12330 return p; 12331} 12332 12333upb_bytessink upb_json_parser_input(upb_json_parser *p) { 12334 return p->input_; 12335} 12336 12337const upb_byteshandler *upb_json_parsermethod_inputhandler( 12338 const upb_json_parsermethod *m) { 12339 return &m->input_handler_; 12340} 12341 12342upb_json_codecache *upb_json_codecache_new(void) { 12343 upb_alloc *alloc; 12344 upb_json_codecache *c; 12345 12346 c = upb_gmalloc(sizeof(*c)); 12347 12348 c->arena = upb_arena_new(); 12349 alloc = upb_arena_alloc(c->arena); 12350 12351 upb_inttable_init2(&c->methods, UPB_CTYPE_CONSTPTR, alloc); 12352 12353 return c; 12354} 12355 12356void upb_json_codecache_free(upb_json_codecache *c) { 12357 upb_arena_free(c->arena); 12358 upb_gfree(c); 12359} 12360 12361const upb_json_parsermethod *upb_json_codecache_get(upb_json_codecache *c, 12362 const upb_msgdef *md) { 12363 upb_json_parsermethod *m; 12364 upb_value v; 12365 upb_msg_field_iter i; 12366 upb_alloc *alloc = upb_arena_alloc(c->arena); 12367 12368 if (upb_inttable_lookupptr(&c->methods, md, &v)) { 12369 return upb_value_getconstptr(v); 12370 } 12371 12372 m = parsermethod_new(c, md); 12373 v = upb_value_constptr(m); 12374 12375 if (!m) return NULL; 12376 if (!upb_inttable_insertptr2(&c->methods, md, v, alloc)) return NULL; 12377 12378 /* Populate parser methods for all submessages, so the name tables will 12379 * be available during parsing. */ 12380 for(upb_msg_field_begin(&i, md); 12381 !upb_msg_field_done(&i); 12382 upb_msg_field_next(&i)) { 12383 upb_fielddef *f = upb_msg_iter_field(&i); 12384 12385 if (upb_fielddef_issubmsg(f)) { 12386 const upb_msgdef *subdef = upb_fielddef_msgsubdef(f); 12387 const upb_json_parsermethod *sub_method = 12388 upb_json_codecache_get(c, subdef); 12389 12390 if (!sub_method) return NULL; 12391 } 12392 } 12393 12394 return m; 12395} 12396/* 12397** This currently uses snprintf() to format primitives, and could be optimized 12398** further. 12399*/ 12400 12401 12402#include <ctype.h> 12403#include <inttypes.h> 12404#include <stdint.h> 12405#include <string.h> 12406#include <time.h> 12407 12408 12409struct upb_json_printer { 12410 upb_sink input_; 12411 /* BytesSink closure. */ 12412 void *subc_; 12413 upb_bytessink output_; 12414 12415 /* We track the depth so that we know when to emit startstr/endstr on the 12416 * output. */ 12417 int depth_; 12418 12419 /* Have we emitted the first element? This state is necessary to emit commas 12420 * without leaving a trailing comma in arrays/maps. We keep this state per 12421 * frame depth. 12422 * 12423 * Why max_depth * 2? UPB_MAX_HANDLER_DEPTH counts depth as nested messages. 12424 * We count frames (contexts in which we separate elements by commas) as both 12425 * repeated fields and messages (maps), and the worst case is a 12426 * message->repeated field->submessage->repeated field->... nesting. */ 12427 bool first_elem_[UPB_MAX_HANDLER_DEPTH * 2]; 12428 12429 /* To print timestamp, printer needs to cache its seconds and nanos values 12430 * and convert them when ending timestamp message. See comments of 12431 * printer_sethandlers_timestamp for more detail. */ 12432 int64_t seconds; 12433 int32_t nanos; 12434}; 12435 12436/* StringPiece; a pointer plus a length. */ 12437typedef struct { 12438 char *ptr; 12439 size_t len; 12440} strpc; 12441 12442void freestrpc(void *ptr) { 12443 strpc *pc = ptr; 12444 upb_gfree(pc->ptr); 12445 upb_gfree(pc); 12446} 12447 12448typedef struct { 12449 bool preserve_fieldnames; 12450} upb_json_printercache; 12451 12452/* Convert fielddef name to JSON name and return as a string piece. */ 12453strpc *newstrpc(upb_handlers *h, const upb_fielddef *f, 12454 bool preserve_fieldnames) { 12455 /* TODO(haberman): handle malloc failure. */ 12456 strpc *ret = upb_gmalloc(sizeof(*ret)); 12457 if (preserve_fieldnames) { 12458 ret->ptr = upb_gstrdup(upb_fielddef_name(f)); 12459 ret->len = strlen(ret->ptr); 12460 } else { 12461 ret->ptr = upb_gstrdup(upb_fielddef_jsonname(f)); 12462 ret->len = strlen(ret->ptr); 12463 } 12464 12465 upb_handlers_addcleanup(h, ret, freestrpc); 12466 return ret; 12467} 12468 12469/* Convert a null-terminated const char* to a string piece. */ 12470strpc *newstrpc_str(upb_handlers *h, const char * str) { 12471 strpc * ret = upb_gmalloc(sizeof(*ret)); 12472 ret->ptr = upb_gstrdup(str); 12473 ret->len = strlen(str); 12474 upb_handlers_addcleanup(h, ret, freestrpc); 12475 return ret; 12476} 12477 12478/* ------------ JSON string printing: values, maps, arrays ------------------ */ 12479 12480static void print_data( 12481 upb_json_printer *p, const char *buf, size_t len) { 12482 /* TODO: Will need to change if we support pushback from the sink. */ 12483 size_t n = upb_bytessink_putbuf(p->output_, p->subc_, buf, len, NULL); 12484 UPB_ASSERT(n == len); 12485} 12486 12487static void print_comma(upb_json_printer *p) { 12488 if (!p->first_elem_[p->depth_]) { 12489 print_data(p, ",", 1); 12490 } 12491 p->first_elem_[p->depth_] = false; 12492} 12493 12494/* Helpers that print properly formatted elements to the JSON output stream. */ 12495 12496/* Used for escaping control chars in strings. */ 12497static const char kControlCharLimit = 0x20; 12498 12499UPB_INLINE bool is_json_escaped(char c) { 12500 /* See RFC 4627. */ 12501 unsigned char uc = (unsigned char)c; 12502 return uc < kControlCharLimit || uc == '"' || uc == '\\'; 12503} 12504 12505UPB_INLINE const char* json_nice_escape(char c) { 12506 switch (c) { 12507 case '"': return "\\\""; 12508 case '\\': return "\\\\"; 12509 case '\b': return "\\b"; 12510 case '\f': return "\\f"; 12511 case '\n': return "\\n"; 12512 case '\r': return "\\r"; 12513 case '\t': return "\\t"; 12514 default: return NULL; 12515 } 12516} 12517 12518/* Write a properly escaped string chunk. The surrounding quotes are *not* 12519 * printed; this is so that the caller has the option of emitting the string 12520 * content in chunks. */ 12521static void putstring(upb_json_printer *p, const char *buf, size_t len) { 12522 const char* unescaped_run = NULL; 12523 unsigned int i; 12524 for (i = 0; i < len; i++) { 12525 char c = buf[i]; 12526 /* Handle escaping. */ 12527 if (is_json_escaped(c)) { 12528 /* Use a "nice" escape, like \n, if one exists for this character. */ 12529 const char* escape = json_nice_escape(c); 12530 /* If we don't have a specific 'nice' escape code, use a \uXXXX-style 12531 * escape. */ 12532 char escape_buf[8]; 12533 if (!escape) { 12534 unsigned char byte = (unsigned char)c; 12535 _upb_snprintf(escape_buf, sizeof(escape_buf), "\\u%04x", (int)byte); 12536 escape = escape_buf; 12537 } 12538 12539 /* N.B. that we assume that the input encoding is equal to the output 12540 * encoding (both UTF-8 for now), so for chars >= 0x20 and != \, ", we 12541 * can simply pass the bytes through. */ 12542 12543 /* If there's a current run of unescaped chars, print that run first. */ 12544 if (unescaped_run) { 12545 print_data(p, unescaped_run, &buf[i] - unescaped_run); 12546 unescaped_run = NULL; 12547 } 12548 /* Then print the escape code. */ 12549 print_data(p, escape, strlen(escape)); 12550 } else { 12551 /* Add to the current unescaped run of characters. */ 12552 if (unescaped_run == NULL) { 12553 unescaped_run = &buf[i]; 12554 } 12555 } 12556 } 12557 12558 /* If the string ended in a run of unescaped characters, print that last run. */ 12559 if (unescaped_run) { 12560 print_data(p, unescaped_run, &buf[len] - unescaped_run); 12561 } 12562} 12563 12564#define CHKLENGTH(x) if (!(x)) return -1; 12565 12566/* Helpers that format floating point values according to our custom formats. 12567 * Right now we use %.8g and %.17g for float/double, respectively, to match 12568 * proto2::util::JsonFormat's defaults. May want to change this later. */ 12569 12570const char neginf[] = "\"-Infinity\""; 12571const char inf[] = "\"Infinity\""; 12572 12573static size_t fmt_double(double val, char* buf, size_t length) { 12574 if (val == UPB_INFINITY) { 12575 CHKLENGTH(length >= strlen(inf)); 12576 strcpy(buf, inf); 12577 return strlen(inf); 12578 } else if (val == -UPB_INFINITY) { 12579 CHKLENGTH(length >= strlen(neginf)); 12580 strcpy(buf, neginf); 12581 return strlen(neginf); 12582 } else { 12583 size_t n = _upb_snprintf(buf, length, "%.17g", val); 12584 CHKLENGTH(n > 0 && n < length); 12585 return n; 12586 } 12587} 12588 12589static size_t fmt_float(float val, char* buf, size_t length) { 12590 size_t n = _upb_snprintf(buf, length, "%.8g", val); 12591 CHKLENGTH(n > 0 && n < length); 12592 return n; 12593} 12594 12595static size_t fmt_bool(bool val, char* buf, size_t length) { 12596 size_t n = _upb_snprintf(buf, length, "%s", (val ? "true" : "false")); 12597 CHKLENGTH(n > 0 && n < length); 12598 return n; 12599} 12600 12601static size_t fmt_int64_as_number(int64_t val, char* buf, size_t length) { 12602 size_t n = _upb_snprintf(buf, length, "%" PRId64, val); 12603 CHKLENGTH(n > 0 && n < length); 12604 return n; 12605} 12606 12607static size_t fmt_uint64_as_number(uint64_t val, char* buf, size_t length) { 12608 size_t n = _upb_snprintf(buf, length, "%" PRIu64, val); 12609 CHKLENGTH(n > 0 && n < length); 12610 return n; 12611} 12612 12613static size_t fmt_int64_as_string(int64_t val, char* buf, size_t length) { 12614 size_t n = _upb_snprintf(buf, length, "\"%" PRId64 "\"", val); 12615 CHKLENGTH(n > 0 && n < length); 12616 return n; 12617} 12618 12619static size_t fmt_uint64_as_string(uint64_t val, char* buf, size_t length) { 12620 size_t n = _upb_snprintf(buf, length, "\"%" PRIu64 "\"", val); 12621 CHKLENGTH(n > 0 && n < length); 12622 return n; 12623} 12624 12625/* Print a map key given a field name. Called by scalar field handlers and by 12626 * startseq for repeated fields. */ 12627static bool putkey(void *closure, const void *handler_data) { 12628 upb_json_printer *p = closure; 12629 const strpc *key = handler_data; 12630 print_comma(p); 12631 print_data(p, "\"", 1); 12632 putstring(p, key->ptr, key->len); 12633 print_data(p, "\":", 2); 12634 return true; 12635} 12636 12637#define CHKFMT(val) if ((val) == (size_t)-1) return false; 12638#define CHK(val) if (!(val)) return false; 12639 12640#define TYPE_HANDLERS(type, fmt_func) \ 12641 static bool put##type(void *closure, const void *handler_data, type val) { \ 12642 upb_json_printer *p = closure; \ 12643 char data[64]; \ 12644 size_t length = fmt_func(val, data, sizeof(data)); \ 12645 UPB_UNUSED(handler_data); \ 12646 CHKFMT(length); \ 12647 print_data(p, data, length); \ 12648 return true; \ 12649 } \ 12650 static bool scalar_##type(void *closure, const void *handler_data, \ 12651 type val) { \ 12652 CHK(putkey(closure, handler_data)); \ 12653 CHK(put##type(closure, handler_data, val)); \ 12654 return true; \ 12655 } \ 12656 static bool repeated_##type(void *closure, const void *handler_data, \ 12657 type val) { \ 12658 upb_json_printer *p = closure; \ 12659 print_comma(p); \ 12660 CHK(put##type(closure, handler_data, val)); \ 12661 return true; \ 12662 } 12663 12664#define TYPE_HANDLERS_MAPKEY(type, fmt_func) \ 12665 static bool putmapkey_##type(void *closure, const void *handler_data, \ 12666 type val) { \ 12667 upb_json_printer *p = closure; \ 12668 char data[64]; \ 12669 size_t length = fmt_func(val, data, sizeof(data)); \ 12670 UPB_UNUSED(handler_data); \ 12671 print_data(p, "\"", 1); \ 12672 print_data(p, data, length); \ 12673 print_data(p, "\":", 2); \ 12674 return true; \ 12675 } 12676 12677TYPE_HANDLERS(double, fmt_double) 12678TYPE_HANDLERS(float, fmt_float) 12679TYPE_HANDLERS(bool, fmt_bool) 12680TYPE_HANDLERS(int32_t, fmt_int64_as_number) 12681TYPE_HANDLERS(uint32_t, fmt_int64_as_number) 12682TYPE_HANDLERS(int64_t, fmt_int64_as_string) 12683TYPE_HANDLERS(uint64_t, fmt_uint64_as_string) 12684 12685/* double and float are not allowed to be map keys. */ 12686TYPE_HANDLERS_MAPKEY(bool, fmt_bool) 12687TYPE_HANDLERS_MAPKEY(int32_t, fmt_int64_as_number) 12688TYPE_HANDLERS_MAPKEY(uint32_t, fmt_int64_as_number) 12689TYPE_HANDLERS_MAPKEY(int64_t, fmt_int64_as_number) 12690TYPE_HANDLERS_MAPKEY(uint64_t, fmt_uint64_as_number) 12691 12692#undef TYPE_HANDLERS 12693#undef TYPE_HANDLERS_MAPKEY 12694 12695typedef struct { 12696 void *keyname; 12697 const upb_enumdef *enumdef; 12698} EnumHandlerData; 12699 12700static bool scalar_enum(void *closure, const void *handler_data, 12701 int32_t val) { 12702 const EnumHandlerData *hd = handler_data; 12703 upb_json_printer *p = closure; 12704 const char *symbolic_name; 12705 12706 CHK(putkey(closure, hd->keyname)); 12707 12708 symbolic_name = upb_enumdef_iton(hd->enumdef, val); 12709 if (symbolic_name) { 12710 print_data(p, "\"", 1); 12711 putstring(p, symbolic_name, strlen(symbolic_name)); 12712 print_data(p, "\"", 1); 12713 } else { 12714 putint32_t(closure, NULL, val); 12715 } 12716 12717 return true; 12718} 12719 12720static void print_enum_symbolic_name(upb_json_printer *p, 12721 const upb_enumdef *def, 12722 int32_t val) { 12723 const char *symbolic_name = upb_enumdef_iton(def, val); 12724 if (symbolic_name) { 12725 print_data(p, "\"", 1); 12726 putstring(p, symbolic_name, strlen(symbolic_name)); 12727 print_data(p, "\"", 1); 12728 } else { 12729 putint32_t(p, NULL, val); 12730 } 12731} 12732 12733static bool repeated_enum(void *closure, const void *handler_data, 12734 int32_t val) { 12735 const EnumHandlerData *hd = handler_data; 12736 upb_json_printer *p = closure; 12737 print_comma(p); 12738 12739 print_enum_symbolic_name(p, hd->enumdef, val); 12740 12741 return true; 12742} 12743 12744static bool mapvalue_enum(void *closure, const void *handler_data, 12745 int32_t val) { 12746 const EnumHandlerData *hd = handler_data; 12747 upb_json_printer *p = closure; 12748 12749 print_enum_symbolic_name(p, hd->enumdef, val); 12750 12751 return true; 12752} 12753 12754static void *scalar_startsubmsg(void *closure, const void *handler_data) { 12755 return putkey(closure, handler_data) ? closure : UPB_BREAK; 12756} 12757 12758static void *repeated_startsubmsg(void *closure, const void *handler_data) { 12759 upb_json_printer *p = closure; 12760 UPB_UNUSED(handler_data); 12761 print_comma(p); 12762 return closure; 12763} 12764 12765static void start_frame(upb_json_printer *p) { 12766 p->depth_++; 12767 p->first_elem_[p->depth_] = true; 12768 print_data(p, "{", 1); 12769} 12770 12771static void end_frame(upb_json_printer *p) { 12772 print_data(p, "}", 1); 12773 p->depth_--; 12774} 12775 12776static bool printer_startmsg(void *closure, const void *handler_data) { 12777 upb_json_printer *p = closure; 12778 UPB_UNUSED(handler_data); 12779 if (p->depth_ == 0) { 12780 upb_bytessink_start(p->output_, 0, &p->subc_); 12781 } 12782 start_frame(p); 12783 return true; 12784} 12785 12786static bool printer_endmsg(void *closure, const void *handler_data, upb_status *s) { 12787 upb_json_printer *p = closure; 12788 UPB_UNUSED(handler_data); 12789 UPB_UNUSED(s); 12790 end_frame(p); 12791 if (p->depth_ == 0) { 12792 upb_bytessink_end(p->output_); 12793 } 12794 return true; 12795} 12796 12797static void *startseq(void *closure, const void *handler_data) { 12798 upb_json_printer *p = closure; 12799 CHK(putkey(closure, handler_data)); 12800 p->depth_++; 12801 p->first_elem_[p->depth_] = true; 12802 print_data(p, "[", 1); 12803 return closure; 12804} 12805 12806static bool endseq(void *closure, const void *handler_data) { 12807 upb_json_printer *p = closure; 12808 UPB_UNUSED(handler_data); 12809 print_data(p, "]", 1); 12810 p->depth_--; 12811 return true; 12812} 12813 12814static void *startmap(void *closure, const void *handler_data) { 12815 upb_json_printer *p = closure; 12816 CHK(putkey(closure, handler_data)); 12817 p->depth_++; 12818 p->first_elem_[p->depth_] = true; 12819 print_data(p, "{", 1); 12820 return closure; 12821} 12822 12823static bool endmap(void *closure, const void *handler_data) { 12824 upb_json_printer *p = closure; 12825 UPB_UNUSED(handler_data); 12826 print_data(p, "}", 1); 12827 p->depth_--; 12828 return true; 12829} 12830 12831static size_t putstr(void *closure, const void *handler_data, const char *str, 12832 size_t len, const upb_bufhandle *handle) { 12833 upb_json_printer *p = closure; 12834 UPB_UNUSED(handler_data); 12835 UPB_UNUSED(handle); 12836 putstring(p, str, len); 12837 return len; 12838} 12839 12840/* This has to Base64 encode the bytes, because JSON has no "bytes" type. */ 12841static size_t putbytes(void *closure, const void *handler_data, const char *str, 12842 size_t len, const upb_bufhandle *handle) { 12843 upb_json_printer *p = closure; 12844 12845 /* This is the regular base64, not the "web-safe" version. */ 12846 static const char base64[] = 12847 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; 12848 12849 /* Base64-encode. */ 12850 char data[16000]; 12851 const char *limit = data + sizeof(data); 12852 const unsigned char *from = (const unsigned char*)str; 12853 char *to = data; 12854 size_t remaining = len; 12855 size_t bytes; 12856 12857 UPB_UNUSED(handler_data); 12858 UPB_UNUSED(handle); 12859 12860 print_data(p, "\"", 1); 12861 12862 while (remaining > 2) { 12863 if (limit - to < 4) { 12864 bytes = to - data; 12865 putstring(p, data, bytes); 12866 to = data; 12867 } 12868 12869 to[0] = base64[from[0] >> 2]; 12870 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; 12871 to[2] = base64[((from[1] & 0xf) << 2) | (from[2] >> 6)]; 12872 to[3] = base64[from[2] & 0x3f]; 12873 12874 remaining -= 3; 12875 to += 4; 12876 from += 3; 12877 } 12878 12879 switch (remaining) { 12880 case 2: 12881 to[0] = base64[from[0] >> 2]; 12882 to[1] = base64[((from[0] & 0x3) << 4) | (from[1] >> 4)]; 12883 to[2] = base64[(from[1] & 0xf) << 2]; 12884 to[3] = '='; 12885 to += 4; 12886 from += 2; 12887 break; 12888 case 1: 12889 to[0] = base64[from[0] >> 2]; 12890 to[1] = base64[((from[0] & 0x3) << 4)]; 12891 to[2] = '='; 12892 to[3] = '='; 12893 to += 4; 12894 from += 1; 12895 break; 12896 } 12897 12898 bytes = to - data; 12899 putstring(p, data, bytes); 12900 print_data(p, "\"", 1); 12901 return len; 12902} 12903 12904static void *scalar_startstr(void *closure, const void *handler_data, 12905 size_t size_hint) { 12906 upb_json_printer *p = closure; 12907 UPB_UNUSED(handler_data); 12908 UPB_UNUSED(size_hint); 12909 CHK(putkey(closure, handler_data)); 12910 print_data(p, "\"", 1); 12911 return p; 12912} 12913 12914static size_t scalar_str(void *closure, const void *handler_data, 12915 const char *str, size_t len, 12916 const upb_bufhandle *handle) { 12917 CHK(putstr(closure, handler_data, str, len, handle)); 12918 return len; 12919} 12920 12921static bool scalar_endstr(void *closure, const void *handler_data) { 12922 upb_json_printer *p = closure; 12923 UPB_UNUSED(handler_data); 12924 print_data(p, "\"", 1); 12925 return true; 12926} 12927 12928static void *repeated_startstr(void *closure, const void *handler_data, 12929 size_t size_hint) { 12930 upb_json_printer *p = closure; 12931 UPB_UNUSED(handler_data); 12932 UPB_UNUSED(size_hint); 12933 print_comma(p); 12934 print_data(p, "\"", 1); 12935 return p; 12936} 12937 12938static size_t repeated_str(void *closure, const void *handler_data, 12939 const char *str, size_t len, 12940 const upb_bufhandle *handle) { 12941 CHK(putstr(closure, handler_data, str, len, handle)); 12942 return len; 12943} 12944 12945static bool repeated_endstr(void *closure, const void *handler_data) { 12946 upb_json_printer *p = closure; 12947 UPB_UNUSED(handler_data); 12948 print_data(p, "\"", 1); 12949 return true; 12950} 12951 12952static void *mapkeyval_startstr(void *closure, const void *handler_data, 12953 size_t size_hint) { 12954 upb_json_printer *p = closure; 12955 UPB_UNUSED(handler_data); 12956 UPB_UNUSED(size_hint); 12957 print_data(p, "\"", 1); 12958 return p; 12959} 12960 12961static size_t mapkey_str(void *closure, const void *handler_data, 12962 const char *str, size_t len, 12963 const upb_bufhandle *handle) { 12964 CHK(putstr(closure, handler_data, str, len, handle)); 12965 return len; 12966} 12967 12968static bool mapkey_endstr(void *closure, const void *handler_data) { 12969 upb_json_printer *p = closure; 12970 UPB_UNUSED(handler_data); 12971 print_data(p, "\":", 2); 12972 return true; 12973} 12974 12975static bool mapvalue_endstr(void *closure, const void *handler_data) { 12976 upb_json_printer *p = closure; 12977 UPB_UNUSED(handler_data); 12978 print_data(p, "\"", 1); 12979 return true; 12980} 12981 12982static size_t scalar_bytes(void *closure, const void *handler_data, 12983 const char *str, size_t len, 12984 const upb_bufhandle *handle) { 12985 CHK(putkey(closure, handler_data)); 12986 CHK(putbytes(closure, handler_data, str, len, handle)); 12987 return len; 12988} 12989 12990static size_t repeated_bytes(void *closure, const void *handler_data, 12991 const char *str, size_t len, 12992 const upb_bufhandle *handle) { 12993 upb_json_printer *p = closure; 12994 print_comma(p); 12995 CHK(putbytes(closure, handler_data, str, len, handle)); 12996 return len; 12997} 12998 12999static size_t mapkey_bytes(void *closure, const void *handler_data, 13000 const char *str, size_t len, 13001 const upb_bufhandle *handle) { 13002 upb_json_printer *p = closure; 13003 CHK(putbytes(closure, handler_data, str, len, handle)); 13004 print_data(p, ":", 1); 13005 return len; 13006} 13007 13008static void set_enum_hd(upb_handlers *h, 13009 const upb_fielddef *f, 13010 bool preserve_fieldnames, 13011 upb_handlerattr *attr) { 13012 EnumHandlerData *hd = upb_gmalloc(sizeof(EnumHandlerData)); 13013 hd->enumdef = upb_fielddef_enumsubdef(f); 13014 hd->keyname = newstrpc(h, f, preserve_fieldnames); 13015 upb_handlers_addcleanup(h, hd, upb_gfree); 13016 attr->handler_data = hd; 13017} 13018 13019/* Set up handlers for a mapentry submessage (i.e., an individual key/value pair 13020 * in a map). 13021 * 13022 * TODO: Handle missing key, missing value, out-of-order key/value, or repeated 13023 * key or value cases properly. The right way to do this is to allocate a 13024 * temporary structure at the start of a mapentry submessage, store key and 13025 * value data in it as key and value handlers are called, and then print the 13026 * key/value pair once at the end of the submessage. If we don't do this, we 13027 * should at least detect the case and throw an error. However, so far all of 13028 * our sources that emit mapentry messages do so canonically (with one key 13029 * field, and then one value field), so this is not a pressing concern at the 13030 * moment. */ 13031void printer_sethandlers_mapentry(const void *closure, bool preserve_fieldnames, 13032 upb_handlers *h) { 13033 const upb_msgdef *md = upb_handlers_msgdef(h); 13034 13035 /* A mapentry message is printed simply as '"key": value'. Rather than 13036 * special-case key and value for every type below, we just handle both 13037 * fields explicitly here. */ 13038 const upb_fielddef* key_field = upb_msgdef_itof(md, UPB_MAPENTRY_KEY); 13039 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_MAPENTRY_VALUE); 13040 13041 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13042 13043 UPB_UNUSED(closure); 13044 13045 switch (upb_fielddef_type(key_field)) { 13046 case UPB_TYPE_INT32: 13047 upb_handlers_setint32(h, key_field, putmapkey_int32_t, &empty_attr); 13048 break; 13049 case UPB_TYPE_INT64: 13050 upb_handlers_setint64(h, key_field, putmapkey_int64_t, &empty_attr); 13051 break; 13052 case UPB_TYPE_UINT32: 13053 upb_handlers_setuint32(h, key_field, putmapkey_uint32_t, &empty_attr); 13054 break; 13055 case UPB_TYPE_UINT64: 13056 upb_handlers_setuint64(h, key_field, putmapkey_uint64_t, &empty_attr); 13057 break; 13058 case UPB_TYPE_BOOL: 13059 upb_handlers_setbool(h, key_field, putmapkey_bool, &empty_attr); 13060 break; 13061 case UPB_TYPE_STRING: 13062 upb_handlers_setstartstr(h, key_field, mapkeyval_startstr, &empty_attr); 13063 upb_handlers_setstring(h, key_field, mapkey_str, &empty_attr); 13064 upb_handlers_setendstr(h, key_field, mapkey_endstr, &empty_attr); 13065 break; 13066 case UPB_TYPE_BYTES: 13067 upb_handlers_setstring(h, key_field, mapkey_bytes, &empty_attr); 13068 break; 13069 default: 13070 UPB_ASSERT(false); 13071 break; 13072 } 13073 13074 switch (upb_fielddef_type(value_field)) { 13075 case UPB_TYPE_INT32: 13076 upb_handlers_setint32(h, value_field, putint32_t, &empty_attr); 13077 break; 13078 case UPB_TYPE_INT64: 13079 upb_handlers_setint64(h, value_field, putint64_t, &empty_attr); 13080 break; 13081 case UPB_TYPE_UINT32: 13082 upb_handlers_setuint32(h, value_field, putuint32_t, &empty_attr); 13083 break; 13084 case UPB_TYPE_UINT64: 13085 upb_handlers_setuint64(h, value_field, putuint64_t, &empty_attr); 13086 break; 13087 case UPB_TYPE_BOOL: 13088 upb_handlers_setbool(h, value_field, putbool, &empty_attr); 13089 break; 13090 case UPB_TYPE_FLOAT: 13091 upb_handlers_setfloat(h, value_field, putfloat, &empty_attr); 13092 break; 13093 case UPB_TYPE_DOUBLE: 13094 upb_handlers_setdouble(h, value_field, putdouble, &empty_attr); 13095 break; 13096 case UPB_TYPE_STRING: 13097 upb_handlers_setstartstr(h, value_field, mapkeyval_startstr, &empty_attr); 13098 upb_handlers_setstring(h, value_field, putstr, &empty_attr); 13099 upb_handlers_setendstr(h, value_field, mapvalue_endstr, &empty_attr); 13100 break; 13101 case UPB_TYPE_BYTES: 13102 upb_handlers_setstring(h, value_field, putbytes, &empty_attr); 13103 break; 13104 case UPB_TYPE_ENUM: { 13105 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; 13106 set_enum_hd(h, value_field, preserve_fieldnames, &enum_attr); 13107 upb_handlers_setint32(h, value_field, mapvalue_enum, &enum_attr); 13108 break; 13109 } 13110 case UPB_TYPE_MESSAGE: 13111 /* No handler necessary -- the submsg handlers will print the message 13112 * as appropriate. */ 13113 break; 13114 } 13115} 13116 13117static bool putseconds(void *closure, const void *handler_data, 13118 int64_t seconds) { 13119 upb_json_printer *p = closure; 13120 p->seconds = seconds; 13121 UPB_UNUSED(handler_data); 13122 return true; 13123} 13124 13125static bool putnanos(void *closure, const void *handler_data, 13126 int32_t nanos) { 13127 upb_json_printer *p = closure; 13128 p->nanos = nanos; 13129 UPB_UNUSED(handler_data); 13130 return true; 13131} 13132 13133static void *scalar_startstr_nokey(void *closure, const void *handler_data, 13134 size_t size_hint) { 13135 upb_json_printer *p = closure; 13136 UPB_UNUSED(handler_data); 13137 UPB_UNUSED(size_hint); 13138 print_data(p, "\"", 1); 13139 return p; 13140} 13141 13142static size_t putstr_nokey(void *closure, const void *handler_data, 13143 const char *str, size_t len, 13144 const upb_bufhandle *handle) { 13145 upb_json_printer *p = closure; 13146 UPB_UNUSED(handler_data); 13147 UPB_UNUSED(handle); 13148 print_data(p, "\"", 1); 13149 putstring(p, str, len); 13150 print_data(p, "\"", 1); 13151 return len + 2; 13152} 13153 13154static void *startseq_nokey(void *closure, const void *handler_data) { 13155 upb_json_printer *p = closure; 13156 UPB_UNUSED(handler_data); 13157 p->depth_++; 13158 p->first_elem_[p->depth_] = true; 13159 print_data(p, "[", 1); 13160 return closure; 13161} 13162 13163static void *startseq_fieldmask(void *closure, const void *handler_data) { 13164 upb_json_printer *p = closure; 13165 UPB_UNUSED(handler_data); 13166 p->depth_++; 13167 p->first_elem_[p->depth_] = true; 13168 return closure; 13169} 13170 13171static bool endseq_fieldmask(void *closure, const void *handler_data) { 13172 upb_json_printer *p = closure; 13173 UPB_UNUSED(handler_data); 13174 p->depth_--; 13175 return true; 13176} 13177 13178static void *repeated_startstr_fieldmask( 13179 void *closure, const void *handler_data, 13180 size_t size_hint) { 13181 upb_json_printer *p = closure; 13182 UPB_UNUSED(handler_data); 13183 UPB_UNUSED(size_hint); 13184 print_comma(p); 13185 return p; 13186} 13187 13188static size_t repeated_str_fieldmask( 13189 void *closure, const void *handler_data, 13190 const char *str, size_t len, 13191 const upb_bufhandle *handle) { 13192 const char* limit = str + len; 13193 bool upper = false; 13194 size_t result_len = 0; 13195 for (; str < limit; str++) { 13196 if (*str == '_') { 13197 upper = true; 13198 continue; 13199 } 13200 if (upper && *str >= 'a' && *str <= 'z') { 13201 char upper_char = toupper(*str); 13202 CHK(putstr(closure, handler_data, &upper_char, 1, handle)); 13203 } else { 13204 CHK(putstr(closure, handler_data, str, 1, handle)); 13205 } 13206 upper = false; 13207 result_len++; 13208 } 13209 return result_len; 13210} 13211 13212static void *startmap_nokey(void *closure, const void *handler_data) { 13213 upb_json_printer *p = closure; 13214 UPB_UNUSED(handler_data); 13215 p->depth_++; 13216 p->first_elem_[p->depth_] = true; 13217 print_data(p, "{", 1); 13218 return closure; 13219} 13220 13221static bool putnull(void *closure, const void *handler_data, 13222 int32_t null) { 13223 upb_json_printer *p = closure; 13224 print_data(p, "null", 4); 13225 UPB_UNUSED(handler_data); 13226 UPB_UNUSED(null); 13227 return true; 13228} 13229 13230static bool printer_startdurationmsg(void *closure, const void *handler_data) { 13231 upb_json_printer *p = closure; 13232 UPB_UNUSED(handler_data); 13233 if (p->depth_ == 0) { 13234 upb_bytessink_start(p->output_, 0, &p->subc_); 13235 } 13236 return true; 13237} 13238 13239#define UPB_DURATION_MAX_JSON_LEN 23 13240#define UPB_DURATION_MAX_NANO_LEN 9 13241 13242static bool printer_enddurationmsg(void *closure, const void *handler_data, 13243 upb_status *s) { 13244 upb_json_printer *p = closure; 13245 char buffer[UPB_DURATION_MAX_JSON_LEN]; 13246 size_t base_len; 13247 size_t curr; 13248 size_t i; 13249 13250 memset(buffer, 0, UPB_DURATION_MAX_JSON_LEN); 13251 13252 if (p->seconds < -315576000000) { 13253 upb_status_seterrf(s, "error parsing duration: " 13254 "minimum acceptable value is " 13255 "-315576000000"); 13256 return false; 13257 } 13258 13259 if (p->seconds > 315576000000) { 13260 upb_status_seterrf(s, "error serializing duration: " 13261 "maximum acceptable value is " 13262 "315576000000"); 13263 return false; 13264 } 13265 13266 _upb_snprintf(buffer, sizeof(buffer), "%ld", (long)p->seconds); 13267 base_len = strlen(buffer); 13268 13269 if (p->nanos != 0) { 13270 char nanos_buffer[UPB_DURATION_MAX_NANO_LEN + 3]; 13271 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", 13272 p->nanos / 1000000000.0); 13273 /* Remove trailing 0. */ 13274 for (i = UPB_DURATION_MAX_NANO_LEN + 2; 13275 nanos_buffer[i] == '0'; i--) { 13276 nanos_buffer[i] = 0; 13277 } 13278 strcpy(buffer + base_len, nanos_buffer + 1); 13279 } 13280 13281 curr = strlen(buffer); 13282 strcpy(buffer + curr, "s"); 13283 13284 p->seconds = 0; 13285 p->nanos = 0; 13286 13287 print_data(p, "\"", 1); 13288 print_data(p, buffer, strlen(buffer)); 13289 print_data(p, "\"", 1); 13290 13291 if (p->depth_ == 0) { 13292 upb_bytessink_end(p->output_); 13293 } 13294 13295 UPB_UNUSED(handler_data); 13296 return true; 13297} 13298 13299static bool printer_starttimestampmsg(void *closure, const void *handler_data) { 13300 upb_json_printer *p = closure; 13301 UPB_UNUSED(handler_data); 13302 if (p->depth_ == 0) { 13303 upb_bytessink_start(p->output_, 0, &p->subc_); 13304 } 13305 return true; 13306} 13307 13308#define UPB_TIMESTAMP_MAX_JSON_LEN 31 13309#define UPB_TIMESTAMP_BEFORE_NANO_LEN 19 13310#define UPB_TIMESTAMP_MAX_NANO_LEN 9 13311 13312static bool printer_endtimestampmsg(void *closure, const void *handler_data, 13313 upb_status *s) { 13314 upb_json_printer *p = closure; 13315 char buffer[UPB_TIMESTAMP_MAX_JSON_LEN]; 13316 time_t time = p->seconds; 13317 size_t curr; 13318 size_t i; 13319 size_t year_length = 13320 strftime(buffer, UPB_TIMESTAMP_MAX_JSON_LEN, "%Y", gmtime(&time)); 13321 13322 if (p->seconds < -62135596800) { 13323 upb_status_seterrf(s, "error parsing timestamp: " 13324 "minimum acceptable value is " 13325 "0001-01-01T00:00:00Z"); 13326 return false; 13327 } 13328 13329 if (p->seconds > 253402300799) { 13330 upb_status_seterrf(s, "error parsing timestamp: " 13331 "maximum acceptable value is " 13332 "9999-12-31T23:59:59Z"); 13333 return false; 13334 } 13335 13336 /* strftime doesn't guarantee 4 digits for year. Prepend 0 by ourselves. */ 13337 for (i = 0; i < 4 - year_length; i++) { 13338 buffer[i] = '0'; 13339 } 13340 13341 strftime(buffer + (4 - year_length), UPB_TIMESTAMP_MAX_JSON_LEN, 13342 "%Y-%m-%dT%H:%M:%S", gmtime(&time)); 13343 if (p->nanos != 0) { 13344 char nanos_buffer[UPB_TIMESTAMP_MAX_NANO_LEN + 3]; 13345 _upb_snprintf(nanos_buffer, sizeof(nanos_buffer), "%.9f", 13346 p->nanos / 1000000000.0); 13347 /* Remove trailing 0. */ 13348 for (i = UPB_TIMESTAMP_MAX_NANO_LEN + 2; 13349 nanos_buffer[i] == '0'; i--) { 13350 nanos_buffer[i] = 0; 13351 } 13352 strcpy(buffer + UPB_TIMESTAMP_BEFORE_NANO_LEN, nanos_buffer + 1); 13353 } 13354 13355 curr = strlen(buffer); 13356 strcpy(buffer + curr, "Z"); 13357 13358 p->seconds = 0; 13359 p->nanos = 0; 13360 13361 print_data(p, "\"", 1); 13362 print_data(p, buffer, strlen(buffer)); 13363 print_data(p, "\"", 1); 13364 13365 if (p->depth_ == 0) { 13366 upb_bytessink_end(p->output_); 13367 } 13368 13369 UPB_UNUSED(handler_data); 13370 UPB_UNUSED(s); 13371 return true; 13372} 13373 13374static bool printer_startmsg_noframe(void *closure, const void *handler_data) { 13375 upb_json_printer *p = closure; 13376 UPB_UNUSED(handler_data); 13377 if (p->depth_ == 0) { 13378 upb_bytessink_start(p->output_, 0, &p->subc_); 13379 } 13380 return true; 13381} 13382 13383static bool printer_endmsg_noframe( 13384 void *closure, const void *handler_data, upb_status *s) { 13385 upb_json_printer *p = closure; 13386 UPB_UNUSED(handler_data); 13387 UPB_UNUSED(s); 13388 if (p->depth_ == 0) { 13389 upb_bytessink_end(p->output_); 13390 } 13391 return true; 13392} 13393 13394static bool printer_startmsg_fieldmask( 13395 void *closure, const void *handler_data) { 13396 upb_json_printer *p = closure; 13397 UPB_UNUSED(handler_data); 13398 if (p->depth_ == 0) { 13399 upb_bytessink_start(p->output_, 0, &p->subc_); 13400 } 13401 print_data(p, "\"", 1); 13402 return true; 13403} 13404 13405static bool printer_endmsg_fieldmask( 13406 void *closure, const void *handler_data, upb_status *s) { 13407 upb_json_printer *p = closure; 13408 UPB_UNUSED(handler_data); 13409 UPB_UNUSED(s); 13410 print_data(p, "\"", 1); 13411 if (p->depth_ == 0) { 13412 upb_bytessink_end(p->output_); 13413 } 13414 return true; 13415} 13416 13417static void *scalar_startstr_onlykey( 13418 void *closure, const void *handler_data, size_t size_hint) { 13419 upb_json_printer *p = closure; 13420 UPB_UNUSED(size_hint); 13421 CHK(putkey(closure, handler_data)); 13422 return p; 13423} 13424 13425/* Set up handlers for an Any submessage. */ 13426void printer_sethandlers_any(const void *closure, upb_handlers *h) { 13427 const upb_msgdef *md = upb_handlers_msgdef(h); 13428 13429 const upb_fielddef* type_field = upb_msgdef_itof(md, UPB_ANY_TYPE); 13430 const upb_fielddef* value_field = upb_msgdef_itof(md, UPB_ANY_VALUE); 13431 13432 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13433 13434 /* type_url's json name is "@type" */ 13435 upb_handlerattr type_name_attr = UPB_HANDLERATTR_INIT; 13436 upb_handlerattr value_name_attr = UPB_HANDLERATTR_INIT; 13437 strpc *type_url_json_name = newstrpc_str(h, "@type"); 13438 strpc *value_json_name = newstrpc_str(h, "value"); 13439 13440 type_name_attr.handler_data = type_url_json_name; 13441 value_name_attr.handler_data = value_json_name; 13442 13443 /* Set up handlers. */ 13444 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); 13445 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); 13446 13447 upb_handlers_setstartstr(h, type_field, scalar_startstr, &type_name_attr); 13448 upb_handlers_setstring(h, type_field, scalar_str, &empty_attr); 13449 upb_handlers_setendstr(h, type_field, scalar_endstr, &empty_attr); 13450 13451 /* This is not the full and correct JSON encoding for the Any value field. It 13452 * requires further processing by the wrapper code based on the type URL. 13453 */ 13454 upb_handlers_setstartstr(h, value_field, scalar_startstr_onlykey, 13455 &value_name_attr); 13456 13457 UPB_UNUSED(closure); 13458} 13459 13460/* Set up handlers for a fieldmask submessage. */ 13461void printer_sethandlers_fieldmask(const void *closure, upb_handlers *h) { 13462 const upb_msgdef *md = upb_handlers_msgdef(h); 13463 const upb_fielddef* f = upb_msgdef_itof(md, 1); 13464 13465 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13466 13467 upb_handlers_setstartseq(h, f, startseq_fieldmask, &empty_attr); 13468 upb_handlers_setendseq(h, f, endseq_fieldmask, &empty_attr); 13469 13470 upb_handlers_setstartmsg(h, printer_startmsg_fieldmask, &empty_attr); 13471 upb_handlers_setendmsg(h, printer_endmsg_fieldmask, &empty_attr); 13472 13473 upb_handlers_setstartstr(h, f, repeated_startstr_fieldmask, &empty_attr); 13474 upb_handlers_setstring(h, f, repeated_str_fieldmask, &empty_attr); 13475 13476 UPB_UNUSED(closure); 13477} 13478 13479/* Set up handlers for a duration submessage. */ 13480void printer_sethandlers_duration(const void *closure, upb_handlers *h) { 13481 const upb_msgdef *md = upb_handlers_msgdef(h); 13482 13483 const upb_fielddef* seconds_field = 13484 upb_msgdef_itof(md, UPB_DURATION_SECONDS); 13485 const upb_fielddef* nanos_field = 13486 upb_msgdef_itof(md, UPB_DURATION_NANOS); 13487 13488 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13489 13490 upb_handlers_setstartmsg(h, printer_startdurationmsg, &empty_attr); 13491 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); 13492 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); 13493 upb_handlers_setendmsg(h, printer_enddurationmsg, &empty_attr); 13494 13495 UPB_UNUSED(closure); 13496} 13497 13498/* Set up handlers for a timestamp submessage. Instead of printing fields 13499 * separately, the json representation of timestamp follows RFC 3339 */ 13500void printer_sethandlers_timestamp(const void *closure, upb_handlers *h) { 13501 const upb_msgdef *md = upb_handlers_msgdef(h); 13502 13503 const upb_fielddef* seconds_field = 13504 upb_msgdef_itof(md, UPB_TIMESTAMP_SECONDS); 13505 const upb_fielddef* nanos_field = 13506 upb_msgdef_itof(md, UPB_TIMESTAMP_NANOS); 13507 13508 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13509 13510 upb_handlers_setstartmsg(h, printer_starttimestampmsg, &empty_attr); 13511 upb_handlers_setint64(h, seconds_field, putseconds, &empty_attr); 13512 upb_handlers_setint32(h, nanos_field, putnanos, &empty_attr); 13513 upb_handlers_setendmsg(h, printer_endtimestampmsg, &empty_attr); 13514 13515 UPB_UNUSED(closure); 13516} 13517 13518void printer_sethandlers_value(const void *closure, upb_handlers *h) { 13519 const upb_msgdef *md = upb_handlers_msgdef(h); 13520 upb_msg_field_iter i; 13521 13522 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13523 13524 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); 13525 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); 13526 13527 upb_msg_field_begin(&i, md); 13528 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { 13529 const upb_fielddef *f = upb_msg_iter_field(&i); 13530 13531 switch (upb_fielddef_type(f)) { 13532 case UPB_TYPE_ENUM: 13533 upb_handlers_setint32(h, f, putnull, &empty_attr); 13534 break; 13535 case UPB_TYPE_DOUBLE: 13536 upb_handlers_setdouble(h, f, putdouble, &empty_attr); 13537 break; 13538 case UPB_TYPE_STRING: 13539 upb_handlers_setstartstr(h, f, scalar_startstr_nokey, &empty_attr); 13540 upb_handlers_setstring(h, f, scalar_str, &empty_attr); 13541 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); 13542 break; 13543 case UPB_TYPE_BOOL: 13544 upb_handlers_setbool(h, f, putbool, &empty_attr); 13545 break; 13546 case UPB_TYPE_MESSAGE: 13547 break; 13548 default: 13549 UPB_ASSERT(false); 13550 break; 13551 } 13552 } 13553 13554 UPB_UNUSED(closure); 13555} 13556 13557#define WRAPPER_SETHANDLERS(wrapper, type, putmethod) \ 13558void printer_sethandlers_##wrapper(const void *closure, upb_handlers *h) { \ 13559 const upb_msgdef *md = upb_handlers_msgdef(h); \ 13560 const upb_fielddef* f = upb_msgdef_itof(md, 1); \ 13561 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; \ 13562 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); \ 13563 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); \ 13564 upb_handlers_set##type(h, f, putmethod, &empty_attr); \ 13565 UPB_UNUSED(closure); \ 13566} 13567 13568WRAPPER_SETHANDLERS(doublevalue, double, putdouble) 13569WRAPPER_SETHANDLERS(floatvalue, float, putfloat) 13570WRAPPER_SETHANDLERS(int64value, int64, putint64_t) 13571WRAPPER_SETHANDLERS(uint64value, uint64, putuint64_t) 13572WRAPPER_SETHANDLERS(int32value, int32, putint32_t) 13573WRAPPER_SETHANDLERS(uint32value, uint32, putuint32_t) 13574WRAPPER_SETHANDLERS(boolvalue, bool, putbool) 13575WRAPPER_SETHANDLERS(stringvalue, string, putstr_nokey) 13576WRAPPER_SETHANDLERS(bytesvalue, string, putbytes) 13577 13578#undef WRAPPER_SETHANDLERS 13579 13580void printer_sethandlers_listvalue(const void *closure, upb_handlers *h) { 13581 const upb_msgdef *md = upb_handlers_msgdef(h); 13582 const upb_fielddef* f = upb_msgdef_itof(md, 1); 13583 13584 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13585 13586 upb_handlers_setstartseq(h, f, startseq_nokey, &empty_attr); 13587 upb_handlers_setendseq(h, f, endseq, &empty_attr); 13588 13589 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); 13590 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); 13591 13592 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); 13593 13594 UPB_UNUSED(closure); 13595} 13596 13597void printer_sethandlers_structvalue(const void *closure, upb_handlers *h) { 13598 const upb_msgdef *md = upb_handlers_msgdef(h); 13599 const upb_fielddef* f = upb_msgdef_itof(md, 1); 13600 13601 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13602 13603 upb_handlers_setstartseq(h, f, startmap_nokey, &empty_attr); 13604 upb_handlers_setendseq(h, f, endmap, &empty_attr); 13605 13606 upb_handlers_setstartmsg(h, printer_startmsg_noframe, &empty_attr); 13607 upb_handlers_setendmsg(h, printer_endmsg_noframe, &empty_attr); 13608 13609 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &empty_attr); 13610 13611 UPB_UNUSED(closure); 13612} 13613 13614void printer_sethandlers(const void *closure, upb_handlers *h) { 13615 const upb_msgdef *md = upb_handlers_msgdef(h); 13616 bool is_mapentry = upb_msgdef_mapentry(md); 13617 upb_handlerattr empty_attr = UPB_HANDLERATTR_INIT; 13618 upb_msg_field_iter i; 13619 const upb_json_printercache *cache = closure; 13620 const bool preserve_fieldnames = cache->preserve_fieldnames; 13621 13622 if (is_mapentry) { 13623 /* mapentry messages are sufficiently different that we handle them 13624 * separately. */ 13625 printer_sethandlers_mapentry(closure, preserve_fieldnames, h); 13626 return; 13627 } 13628 13629 switch (upb_msgdef_wellknowntype(md)) { 13630 case UPB_WELLKNOWN_UNSPECIFIED: 13631 break; 13632 case UPB_WELLKNOWN_ANY: 13633 printer_sethandlers_any(closure, h); 13634 return; 13635 case UPB_WELLKNOWN_FIELDMASK: 13636 printer_sethandlers_fieldmask(closure, h); 13637 return; 13638 case UPB_WELLKNOWN_DURATION: 13639 printer_sethandlers_duration(closure, h); 13640 return; 13641 case UPB_WELLKNOWN_TIMESTAMP: 13642 printer_sethandlers_timestamp(closure, h); 13643 return; 13644 case UPB_WELLKNOWN_VALUE: 13645 printer_sethandlers_value(closure, h); 13646 return; 13647 case UPB_WELLKNOWN_LISTVALUE: 13648 printer_sethandlers_listvalue(closure, h); 13649 return; 13650 case UPB_WELLKNOWN_STRUCT: 13651 printer_sethandlers_structvalue(closure, h); 13652 return; 13653#define WRAPPER(wellknowntype, name) \ 13654 case wellknowntype: \ 13655 printer_sethandlers_##name(closure, h); \ 13656 return; \ 13657 13658 WRAPPER(UPB_WELLKNOWN_DOUBLEVALUE, doublevalue); 13659 WRAPPER(UPB_WELLKNOWN_FLOATVALUE, floatvalue); 13660 WRAPPER(UPB_WELLKNOWN_INT64VALUE, int64value); 13661 WRAPPER(UPB_WELLKNOWN_UINT64VALUE, uint64value); 13662 WRAPPER(UPB_WELLKNOWN_INT32VALUE, int32value); 13663 WRAPPER(UPB_WELLKNOWN_UINT32VALUE, uint32value); 13664 WRAPPER(UPB_WELLKNOWN_BOOLVALUE, boolvalue); 13665 WRAPPER(UPB_WELLKNOWN_STRINGVALUE, stringvalue); 13666 WRAPPER(UPB_WELLKNOWN_BYTESVALUE, bytesvalue); 13667 13668#undef WRAPPER 13669 } 13670 13671 upb_handlers_setstartmsg(h, printer_startmsg, &empty_attr); 13672 upb_handlers_setendmsg(h, printer_endmsg, &empty_attr); 13673 13674#define TYPE(type, name, ctype) \ 13675 case type: \ 13676 if (upb_fielddef_isseq(f)) { \ 13677 upb_handlers_set##name(h, f, repeated_##ctype, &empty_attr); \ 13678 } else { \ 13679 upb_handlers_set##name(h, f, scalar_##ctype, &name_attr); \ 13680 } \ 13681 break; 13682 13683 upb_msg_field_begin(&i, md); 13684 for(; !upb_msg_field_done(&i); upb_msg_field_next(&i)) { 13685 const upb_fielddef *f = upb_msg_iter_field(&i); 13686 13687 upb_handlerattr name_attr = UPB_HANDLERATTR_INIT; 13688 name_attr.handler_data = newstrpc(h, f, preserve_fieldnames); 13689 13690 if (upb_fielddef_ismap(f)) { 13691 upb_handlers_setstartseq(h, f, startmap, &name_attr); 13692 upb_handlers_setendseq(h, f, endmap, &name_attr); 13693 } else if (upb_fielddef_isseq(f)) { 13694 upb_handlers_setstartseq(h, f, startseq, &name_attr); 13695 upb_handlers_setendseq(h, f, endseq, &empty_attr); 13696 } 13697 13698 switch (upb_fielddef_type(f)) { 13699 TYPE(UPB_TYPE_FLOAT, float, float); 13700 TYPE(UPB_TYPE_DOUBLE, double, double); 13701 TYPE(UPB_TYPE_BOOL, bool, bool); 13702 TYPE(UPB_TYPE_INT32, int32, int32_t); 13703 TYPE(UPB_TYPE_UINT32, uint32, uint32_t); 13704 TYPE(UPB_TYPE_INT64, int64, int64_t); 13705 TYPE(UPB_TYPE_UINT64, uint64, uint64_t); 13706 case UPB_TYPE_ENUM: { 13707 /* For now, we always emit symbolic names for enums. We may want an 13708 * option later to control this behavior, but we will wait for a real 13709 * need first. */ 13710 upb_handlerattr enum_attr = UPB_HANDLERATTR_INIT; 13711 set_enum_hd(h, f, preserve_fieldnames, &enum_attr); 13712 13713 if (upb_fielddef_isseq(f)) { 13714 upb_handlers_setint32(h, f, repeated_enum, &enum_attr); 13715 } else { 13716 upb_handlers_setint32(h, f, scalar_enum, &enum_attr); 13717 } 13718 13719 break; 13720 } 13721 case UPB_TYPE_STRING: 13722 if (upb_fielddef_isseq(f)) { 13723 upb_handlers_setstartstr(h, f, repeated_startstr, &empty_attr); 13724 upb_handlers_setstring(h, f, repeated_str, &empty_attr); 13725 upb_handlers_setendstr(h, f, repeated_endstr, &empty_attr); 13726 } else { 13727 upb_handlers_setstartstr(h, f, scalar_startstr, &name_attr); 13728 upb_handlers_setstring(h, f, scalar_str, &empty_attr); 13729 upb_handlers_setendstr(h, f, scalar_endstr, &empty_attr); 13730 } 13731 break; 13732 case UPB_TYPE_BYTES: 13733 /* XXX: this doesn't support strings that span buffers yet. The base64 13734 * encoder will need to be made resumable for this to work properly. */ 13735 if (upb_fielddef_isseq(f)) { 13736 upb_handlers_setstring(h, f, repeated_bytes, &empty_attr); 13737 } else { 13738 upb_handlers_setstring(h, f, scalar_bytes, &name_attr); 13739 } 13740 break; 13741 case UPB_TYPE_MESSAGE: 13742 if (upb_fielddef_isseq(f)) { 13743 upb_handlers_setstartsubmsg(h, f, repeated_startsubmsg, &name_attr); 13744 } else { 13745 upb_handlers_setstartsubmsg(h, f, scalar_startsubmsg, &name_attr); 13746 } 13747 break; 13748 } 13749 } 13750 13751#undef TYPE 13752} 13753 13754static void json_printer_reset(upb_json_printer *p) { 13755 p->depth_ = 0; 13756} 13757 13758 13759/* Public API *****************************************************************/ 13760 13761upb_json_printer *upb_json_printer_create(upb_arena *a, const upb_handlers *h, 13762 upb_bytessink output) { 13763#ifndef NDEBUG 13764 size_t size_before = upb_arena_bytesallocated(a); 13765#endif 13766 13767 upb_json_printer *p = upb_arena_malloc(a, sizeof(upb_json_printer)); 13768 if (!p) return NULL; 13769 13770 p->output_ = output; 13771 json_printer_reset(p); 13772 upb_sink_reset(&p->input_, h, p); 13773 p->seconds = 0; 13774 p->nanos = 0; 13775 13776 /* If this fails, increase the value in printer.h. */ 13777 UPB_ASSERT_DEBUGVAR(upb_arena_bytesallocated(a) - size_before <= 13778 UPB_JSON_PRINTER_SIZE); 13779 return p; 13780} 13781 13782upb_sink upb_json_printer_input(upb_json_printer *p) { 13783 return p->input_; 13784} 13785 13786upb_handlercache *upb_json_printer_newcache(bool preserve_proto_fieldnames) { 13787 upb_json_printercache *cache = upb_gmalloc(sizeof(*cache)); 13788 upb_handlercache *ret = upb_handlercache_new(printer_sethandlers, cache); 13789 13790 cache->preserve_fieldnames = preserve_proto_fieldnames; 13791 upb_handlercache_addcleanup(ret, cache, upb_gfree); 13792 13793 return ret; 13794} 13795/* See port_def.inc. This should #undef all macros #defined there. */ 13796 13797#undef UPB_MAPTYPE_STRING 13798#undef UPB_SIZE 13799#undef UPB_PTR_AT 13800#undef UPB_READ_ONEOF 13801#undef UPB_WRITE_ONEOF 13802#undef UPB_INLINE 13803#undef UPB_FORCEINLINE 13804#undef UPB_NOINLINE 13805#undef UPB_NORETURN 13806#undef UPB_MAX 13807#undef UPB_MIN 13808#undef UPB_UNUSED 13809#undef UPB_ASSUME 13810#undef UPB_ASSERT 13811#undef UPB_ASSERT_DEBUGVAR 13812#undef UPB_UNREACHABLE 13813#undef UPB_INFINITY 13814#undef UPB_MSVC_VSNPRINTF 13815#undef _upb_snprintf 13816#undef _upb_vsnprintf 13817#undef _upb_va_copy 13818