1/* 2 * sfparse 3 * 4 * Copyright (c) 2023 sfparse contributors 5 * Copyright (c) 2019 nghttp3 contributors 6 * Copyright (c) 2015 nghttp2 contributors 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining 9 * a copy of this software and associated documentation files (the 10 * "Software"), to deal in the Software without restriction, including 11 * without limitation the rights to use, copy, modify, merge, publish, 12 * distribute, sublicense, and/or sell copies of the Software, and to 13 * permit persons to whom the Software is furnished to do so, subject to 14 * the following conditions: 15 * 16 * The above copyright notice and this permission notice shall be 17 * included in all copies or substantial portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 22 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 */ 27#include "sfparse.h" 28 29#include <string.h> 30#include <assert.h> 31#include <stdlib.h> 32 33#define SF_STATE_DICT 0x08u 34#define SF_STATE_LIST 0x10u 35#define SF_STATE_ITEM 0x18u 36 37#define SF_STATE_INNER_LIST 0x04u 38 39#define SF_STATE_BEFORE 0x00u 40#define SF_STATE_BEFORE_PARAMS 0x01u 41#define SF_STATE_PARAMS 0x02u 42#define SF_STATE_AFTER 0x03u 43 44#define SF_STATE_OP_MASK 0x03u 45 46#define SF_SET_STATE_AFTER(NAME) (SF_STATE_##NAME | SF_STATE_AFTER) 47#define SF_SET_STATE_BEFORE_PARAMS(NAME) \ 48 (SF_STATE_##NAME | SF_STATE_BEFORE_PARAMS) 49#define SF_SET_STATE_INNER_LIST_BEFORE(NAME) \ 50 (SF_STATE_##NAME | SF_STATE_INNER_LIST | SF_STATE_BEFORE) 51 52#define SF_STATE_DICT_AFTER SF_SET_STATE_AFTER(DICT) 53#define SF_STATE_DICT_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(DICT) 54#define SF_STATE_DICT_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(DICT) 55 56#define SF_STATE_LIST_AFTER SF_SET_STATE_AFTER(LIST) 57#define SF_STATE_LIST_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(LIST) 58#define SF_STATE_LIST_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(LIST) 59 60#define SF_STATE_ITEM_AFTER SF_SET_STATE_AFTER(ITEM) 61#define SF_STATE_ITEM_BEFORE_PARAMS SF_SET_STATE_BEFORE_PARAMS(ITEM) 62#define SF_STATE_ITEM_INNER_LIST_BEFORE SF_SET_STATE_INNER_LIST_BEFORE(ITEM) 63 64#define SF_STATE_INITIAL 0x00u 65 66#define DIGIT_CASES \ 67 case '0': \ 68 case '1': \ 69 case '2': \ 70 case '3': \ 71 case '4': \ 72 case '5': \ 73 case '6': \ 74 case '7': \ 75 case '8': \ 76 case '9' 77 78#define LCALPHA_CASES \ 79 case 'a': \ 80 case 'b': \ 81 case 'c': \ 82 case 'd': \ 83 case 'e': \ 84 case 'f': \ 85 case 'g': \ 86 case 'h': \ 87 case 'i': \ 88 case 'j': \ 89 case 'k': \ 90 case 'l': \ 91 case 'm': \ 92 case 'n': \ 93 case 'o': \ 94 case 'p': \ 95 case 'q': \ 96 case 'r': \ 97 case 's': \ 98 case 't': \ 99 case 'u': \ 100 case 'v': \ 101 case 'w': \ 102 case 'x': \ 103 case 'y': \ 104 case 'z' 105 106#define UCALPHA_CASES \ 107 case 'A': \ 108 case 'B': \ 109 case 'C': \ 110 case 'D': \ 111 case 'E': \ 112 case 'F': \ 113 case 'G': \ 114 case 'H': \ 115 case 'I': \ 116 case 'J': \ 117 case 'K': \ 118 case 'L': \ 119 case 'M': \ 120 case 'N': \ 121 case 'O': \ 122 case 'P': \ 123 case 'Q': \ 124 case 'R': \ 125 case 'S': \ 126 case 'T': \ 127 case 'U': \ 128 case 'V': \ 129 case 'W': \ 130 case 'X': \ 131 case 'Y': \ 132 case 'Z' 133 134#define ALPHA_CASES \ 135 UCALPHA_CASES: \ 136 LCALPHA_CASES 137 138#define X20_21_CASES \ 139 case ' ': \ 140 case '!' 141 142#define X23_5B_CASES \ 143 case '#': \ 144 case '$': \ 145 case '%': \ 146 case '&': \ 147 case '\'': \ 148 case '(': \ 149 case ')': \ 150 case '*': \ 151 case '+': \ 152 case ',': \ 153 case '-': \ 154 case '.': \ 155 case '/': \ 156 DIGIT_CASES: \ 157 case ':': \ 158 case ';': \ 159 case '<': \ 160 case '=': \ 161 case '>': \ 162 case '?': \ 163 case '@': \ 164 UCALPHA_CASES: \ 165 case '[' 166 167#define X5D_7E_CASES \ 168 case ']': \ 169 case '^': \ 170 case '_': \ 171 case '`': \ 172 LCALPHA_CASES: \ 173 case '{': \ 174 case '|': \ 175 case '}': \ 176 case '~' 177 178static int is_ws(uint8_t c) { 179 switch (c) { 180 case ' ': 181 case '\t': 182 return 1; 183 default: 184 return 0; 185 } 186} 187 188static int parser_eof(sf_parser *sfp) { return sfp->pos == sfp->end; } 189 190static void parser_discard_ows(sf_parser *sfp) { 191 for (; !parser_eof(sfp) && is_ws(*sfp->pos); ++sfp->pos) 192 ; 193} 194 195static void parser_discard_sp(sf_parser *sfp) { 196 for (; !parser_eof(sfp) && *sfp->pos == ' '; ++sfp->pos) 197 ; 198} 199 200static void parser_set_op_state(sf_parser *sfp, uint32_t op) { 201 sfp->state &= ~SF_STATE_OP_MASK; 202 sfp->state |= op; 203} 204 205static void parser_unset_inner_list_state(sf_parser *sfp) { 206 sfp->state &= ~SF_STATE_INNER_LIST; 207} 208 209static int parser_key(sf_parser *sfp, sf_vec *dest) { 210 const uint8_t *base; 211 212 switch (*sfp->pos) { 213 case '*': 214 LCALPHA_CASES: 215 break; 216 default: 217 return SF_ERR_PARSE_ERROR; 218 } 219 220 base = sfp->pos++; 221 222 for (; !parser_eof(sfp); ++sfp->pos) { 223 switch (*sfp->pos) { 224 case '_': 225 case '-': 226 case '.': 227 case '*': 228 DIGIT_CASES: 229 LCALPHA_CASES: 230 continue; 231 } 232 233 break; 234 } 235 236 if (dest) { 237 dest->base = (uint8_t *)base; 238 dest->len = (size_t)(sfp->pos - dest->base); 239 } 240 241 return 0; 242} 243 244static int parser_number(sf_parser *sfp, sf_value *dest) { 245 int sign = 1; 246 int64_t value = 0; 247 size_t len = 0; 248 size_t fpos = 0; 249 250 if (*sfp->pos == '-') { 251 ++sfp->pos; 252 if (parser_eof(sfp)) { 253 return SF_ERR_PARSE_ERROR; 254 } 255 256 sign = -1; 257 } 258 259 assert(!parser_eof(sfp)); 260 261 for (; !parser_eof(sfp); ++sfp->pos) { 262 switch (*sfp->pos) { 263 DIGIT_CASES: 264 if (++len > 15) { 265 return SF_ERR_PARSE_ERROR; 266 } 267 268 value *= 10; 269 value += *sfp->pos - '0'; 270 271 continue; 272 } 273 274 break; 275 } 276 277 if (len == 0) { 278 return SF_ERR_PARSE_ERROR; 279 } 280 281 if (parser_eof(sfp) || *sfp->pos != '.') { 282 if (dest) { 283 dest->type = SF_TYPE_INTEGER; 284 dest->flags = SF_VALUE_FLAG_NONE; 285 dest->integer = value * sign; 286 } 287 288 return 0; 289 } 290 291 /* decimal */ 292 293 if (len > 12) { 294 return SF_ERR_PARSE_ERROR; 295 } 296 297 fpos = len; 298 299 ++sfp->pos; 300 301 for (; !parser_eof(sfp); ++sfp->pos) { 302 switch (*sfp->pos) { 303 DIGIT_CASES: 304 if (++len > 15) { 305 return SF_ERR_PARSE_ERROR; 306 } 307 308 value *= 10; 309 value += *sfp->pos - '0'; 310 311 continue; 312 } 313 314 break; 315 } 316 317 if (fpos == len || len - fpos > 3) { 318 return SF_ERR_PARSE_ERROR; 319 } 320 321 if (dest) { 322 dest->type = SF_TYPE_DECIMAL; 323 dest->flags = SF_VALUE_FLAG_NONE; 324 dest->decimal.numer = value * sign; 325 326 switch (len - fpos) { 327 case 1: 328 dest->decimal.denom = 10; 329 330 break; 331 case 2: 332 dest->decimal.denom = 100; 333 334 break; 335 case 3: 336 dest->decimal.denom = 1000; 337 338 break; 339 } 340 } 341 342 return 0; 343} 344 345static int parser_date(sf_parser *sfp, sf_value *dest) { 346 int rv; 347 sf_value val; 348 349 /* The first byte has already been validated by the caller. */ 350 assert('@' == *sfp->pos); 351 352 ++sfp->pos; 353 354 if (parser_eof(sfp)) { 355 return SF_ERR_PARSE_ERROR; 356 } 357 358 rv = parser_number(sfp, &val); 359 if (rv != 0) { 360 return rv; 361 } 362 363 if (val.type != SF_TYPE_INTEGER) { 364 return SF_ERR_PARSE_ERROR; 365 } 366 367 if (dest) { 368 *dest = val; 369 dest->type = SF_TYPE_DATE; 370 } 371 372 return 0; 373} 374 375static int parser_string(sf_parser *sfp, sf_value *dest) { 376 const uint8_t *base; 377 uint32_t flags = SF_VALUE_FLAG_NONE; 378 379 /* The first byte has already been validated by the caller. */ 380 assert('"' == *sfp->pos); 381 382 base = ++sfp->pos; 383 384 for (; !parser_eof(sfp); ++sfp->pos) { 385 switch (*sfp->pos) { 386 X20_21_CASES: 387 X23_5B_CASES: 388 X5D_7E_CASES: 389 break; 390 case '\\': 391 ++sfp->pos; 392 if (parser_eof(sfp)) { 393 return SF_ERR_PARSE_ERROR; 394 } 395 396 switch (*sfp->pos) { 397 case '"': 398 case '\\': 399 flags = SF_VALUE_FLAG_ESCAPED_STRING; 400 401 break; 402 default: 403 return SF_ERR_PARSE_ERROR; 404 } 405 406 break; 407 case '"': 408 if (dest) { 409 dest->type = SF_TYPE_STRING; 410 dest->flags = flags; 411 dest->vec.len = (size_t)(sfp->pos - base); 412 dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; 413 } 414 415 ++sfp->pos; 416 417 return 0; 418 default: 419 return SF_ERR_PARSE_ERROR; 420 } 421 } 422 423 return SF_ERR_PARSE_ERROR; 424} 425 426static int parser_token(sf_parser *sfp, sf_value *dest) { 427 const uint8_t *base; 428 429 /* The first byte has already been validated by the caller. */ 430 base = sfp->pos++; 431 432 for (; !parser_eof(sfp); ++sfp->pos) { 433 switch (*sfp->pos) { 434 case '!': 435 case '#': 436 case '$': 437 case '%': 438 case '&': 439 case '\'': 440 case '*': 441 case '+': 442 case '-': 443 case '.': 444 case '^': 445 case '_': 446 case '`': 447 case '|': 448 case '~': 449 case ':': 450 case '/': 451 DIGIT_CASES: 452 ALPHA_CASES: 453 continue; 454 } 455 456 break; 457 } 458 459 if (dest) { 460 dest->type = SF_TYPE_TOKEN; 461 dest->flags = SF_VALUE_FLAG_NONE; 462 dest->vec.base = (uint8_t *)base; 463 dest->vec.len = (size_t)(sfp->pos - base); 464 } 465 466 return 0; 467} 468 469static int parser_byteseq(sf_parser *sfp, sf_value *dest) { 470 const uint8_t *base; 471 472 /* The first byte has already been validated by the caller. */ 473 assert(':' == *sfp->pos); 474 475 base = ++sfp->pos; 476 477 for (; !parser_eof(sfp); ++sfp->pos) { 478 switch (*sfp->pos) { 479 case '+': 480 case '/': 481 DIGIT_CASES: 482 ALPHA_CASES: 483 continue; 484 case '=': 485 switch ((sfp->pos - base) & 0x3) { 486 case 0: 487 case 1: 488 return SF_ERR_PARSE_ERROR; 489 case 2: 490 switch (*(sfp->pos - 1)) { 491 case 'A': 492 case 'Q': 493 case 'g': 494 case 'w': 495 break; 496 default: 497 return SF_ERR_PARSE_ERROR; 498 } 499 500 ++sfp->pos; 501 502 if (parser_eof(sfp) || *sfp->pos != '=') { 503 return SF_ERR_PARSE_ERROR; 504 } 505 506 break; 507 case 3: 508 switch (*(sfp->pos - 1)) { 509 case 'A': 510 case 'E': 511 case 'I': 512 case 'M': 513 case 'Q': 514 case 'U': 515 case 'Y': 516 case 'c': 517 case 'g': 518 case 'k': 519 case 'o': 520 case 's': 521 case 'w': 522 case '0': 523 case '4': 524 case '8': 525 break; 526 default: 527 return SF_ERR_PARSE_ERROR; 528 } 529 530 break; 531 } 532 533 ++sfp->pos; 534 535 if (parser_eof(sfp) || *sfp->pos != ':') { 536 return SF_ERR_PARSE_ERROR; 537 } 538 539 goto fin; 540 case ':': 541 if ((sfp->pos - base) & 0x3) { 542 return SF_ERR_PARSE_ERROR; 543 } 544 545 goto fin; 546 default: 547 return SF_ERR_PARSE_ERROR; 548 } 549 } 550 551 return SF_ERR_PARSE_ERROR; 552 553fin: 554 if (dest) { 555 dest->type = SF_TYPE_BYTESEQ; 556 dest->flags = SF_VALUE_FLAG_NONE; 557 dest->vec.len = (size_t)(sfp->pos - base); 558 dest->vec.base = dest->vec.len == 0 ? NULL : (uint8_t *)base; 559 } 560 561 ++sfp->pos; 562 563 return 0; 564} 565 566static int parser_boolean(sf_parser *sfp, sf_value *dest) { 567 int b; 568 569 /* The first byte has already been validated by the caller. */ 570 assert('?' == *sfp->pos); 571 572 ++sfp->pos; 573 574 if (parser_eof(sfp)) { 575 return SF_ERR_PARSE_ERROR; 576 } 577 578 switch (*sfp->pos) { 579 case '0': 580 b = 0; 581 582 break; 583 case '1': 584 b = 1; 585 586 break; 587 default: 588 return SF_ERR_PARSE_ERROR; 589 } 590 591 ++sfp->pos; 592 593 if (dest) { 594 dest->type = SF_TYPE_BOOLEAN; 595 dest->flags = SF_VALUE_FLAG_NONE; 596 dest->boolean = b; 597 } 598 599 return 0; 600} 601 602static int parser_bare_item(sf_parser *sfp, sf_value *dest) { 603 switch (*sfp->pos) { 604 case '"': 605 return parser_string(sfp, dest); 606 case '-': 607 DIGIT_CASES: 608 return parser_number(sfp, dest); 609 case '@': 610 return parser_date(sfp, dest); 611 case ':': 612 return parser_byteseq(sfp, dest); 613 case '?': 614 return parser_boolean(sfp, dest); 615 case '*': 616 ALPHA_CASES: 617 return parser_token(sfp, dest); 618 default: 619 return SF_ERR_PARSE_ERROR; 620 } 621} 622 623static int parser_skip_inner_list(sf_parser *sfp); 624 625int sf_parser_param(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) { 626 int rv; 627 628 switch (sfp->state & SF_STATE_OP_MASK) { 629 case SF_STATE_BEFORE: 630 rv = parser_skip_inner_list(sfp); 631 if (rv != 0) { 632 return rv; 633 } 634 635 /* fall through */ 636 case SF_STATE_BEFORE_PARAMS: 637 parser_set_op_state(sfp, SF_STATE_PARAMS); 638 639 break; 640 case SF_STATE_PARAMS: 641 break; 642 default: 643 assert(0); 644 abort(); 645 } 646 647 if (parser_eof(sfp) || *sfp->pos != ';') { 648 parser_set_op_state(sfp, SF_STATE_AFTER); 649 650 return SF_ERR_EOF; 651 } 652 653 ++sfp->pos; 654 655 parser_discard_sp(sfp); 656 if (parser_eof(sfp)) { 657 return SF_ERR_PARSE_ERROR; 658 } 659 660 rv = parser_key(sfp, dest_key); 661 if (rv != 0) { 662 return rv; 663 } 664 665 if (parser_eof(sfp) || *sfp->pos != '=') { 666 if (dest_value) { 667 dest_value->type = SF_TYPE_BOOLEAN; 668 dest_value->flags = SF_VALUE_FLAG_NONE; 669 dest_value->boolean = 1; 670 } 671 672 return 0; 673 } 674 675 ++sfp->pos; 676 677 if (parser_eof(sfp)) { 678 return SF_ERR_PARSE_ERROR; 679 } 680 681 return parser_bare_item(sfp, dest_value); 682} 683 684static int parser_skip_params(sf_parser *sfp) { 685 int rv; 686 687 for (;;) { 688 rv = sf_parser_param(sfp, NULL, NULL); 689 switch (rv) { 690 case 0: 691 break; 692 case SF_ERR_EOF: 693 return 0; 694 case SF_ERR_PARSE_ERROR: 695 return rv; 696 default: 697 assert(0); 698 abort(); 699 } 700 } 701} 702 703int sf_parser_inner_list(sf_parser *sfp, sf_value *dest) { 704 int rv; 705 706 switch (sfp->state & SF_STATE_OP_MASK) { 707 case SF_STATE_BEFORE: 708 parser_discard_sp(sfp); 709 if (parser_eof(sfp)) { 710 return SF_ERR_PARSE_ERROR; 711 } 712 713 break; 714 case SF_STATE_BEFORE_PARAMS: 715 rv = parser_skip_params(sfp); 716 if (rv != 0) { 717 return rv; 718 } 719 720 /* Technically, we are entering SF_STATE_AFTER, but we will set 721 another state without reading the state. */ 722 /* parser_set_op_state(sfp, SF_STATE_AFTER); */ 723 724 /* fall through */ 725 case SF_STATE_AFTER: 726 if (parser_eof(sfp)) { 727 return SF_ERR_PARSE_ERROR; 728 } 729 730 switch (*sfp->pos) { 731 case ' ': 732 parser_discard_sp(sfp); 733 if (parser_eof(sfp)) { 734 return SF_ERR_PARSE_ERROR; 735 } 736 737 break; 738 case ')': 739 break; 740 default: 741 return SF_ERR_PARSE_ERROR; 742 } 743 744 break; 745 default: 746 assert(0); 747 abort(); 748 } 749 750 if (*sfp->pos == ')') { 751 ++sfp->pos; 752 753 parser_unset_inner_list_state(sfp); 754 parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS); 755 756 return SF_ERR_EOF; 757 } 758 759 rv = parser_bare_item(sfp, dest); 760 if (rv != 0) { 761 return rv; 762 } 763 764 parser_set_op_state(sfp, SF_STATE_BEFORE_PARAMS); 765 766 return 0; 767} 768 769static int parser_skip_inner_list(sf_parser *sfp) { 770 int rv; 771 772 for (;;) { 773 rv = sf_parser_inner_list(sfp, NULL); 774 switch (rv) { 775 case 0: 776 break; 777 case SF_ERR_EOF: 778 return 0; 779 case SF_ERR_PARSE_ERROR: 780 return rv; 781 default: 782 assert(0); 783 abort(); 784 } 785 } 786} 787 788static int parser_next_key_or_item(sf_parser *sfp) { 789 parser_discard_ows(sfp); 790 791 if (parser_eof(sfp)) { 792 return SF_ERR_EOF; 793 } 794 795 if (*sfp->pos != ',') { 796 return SF_ERR_PARSE_ERROR; 797 } 798 799 ++sfp->pos; 800 801 parser_discard_ows(sfp); 802 if (parser_eof(sfp)) { 803 return SF_ERR_PARSE_ERROR; 804 } 805 806 return 0; 807} 808 809static int parser_dict_value(sf_parser *sfp, sf_value *dest) { 810 int rv; 811 812 if (parser_eof(sfp) || *(sfp->pos) != '=') { 813 /* Boolean true */ 814 if (dest) { 815 dest->type = SF_TYPE_BOOLEAN; 816 dest->flags = SF_VALUE_FLAG_NONE; 817 dest->boolean = 1; 818 } 819 820 sfp->state = SF_STATE_DICT_BEFORE_PARAMS; 821 822 return 0; 823 } 824 825 ++sfp->pos; 826 827 if (parser_eof(sfp)) { 828 return SF_ERR_PARSE_ERROR; 829 } 830 831 if (*sfp->pos == '(') { 832 if (dest) { 833 dest->type = SF_TYPE_INNER_LIST; 834 dest->flags = SF_VALUE_FLAG_NONE; 835 } 836 837 ++sfp->pos; 838 839 sfp->state = SF_STATE_DICT_INNER_LIST_BEFORE; 840 841 return 0; 842 } 843 844 rv = parser_bare_item(sfp, dest); 845 if (rv != 0) { 846 return rv; 847 } 848 849 sfp->state = SF_STATE_DICT_BEFORE_PARAMS; 850 851 return 0; 852} 853 854int sf_parser_dict(sf_parser *sfp, sf_vec *dest_key, sf_value *dest_value) { 855 int rv; 856 857 switch (sfp->state) { 858 case SF_STATE_DICT_INNER_LIST_BEFORE: 859 rv = parser_skip_inner_list(sfp); 860 if (rv != 0) { 861 return rv; 862 } 863 864 /* fall through */ 865 case SF_STATE_DICT_BEFORE_PARAMS: 866 rv = parser_skip_params(sfp); 867 if (rv != 0) { 868 return rv; 869 } 870 871 /* fall through */ 872 case SF_STATE_DICT_AFTER: 873 rv = parser_next_key_or_item(sfp); 874 if (rv != 0) { 875 return rv; 876 } 877 878 break; 879 case SF_STATE_INITIAL: 880 parser_discard_sp(sfp); 881 882 if (parser_eof(sfp)) { 883 return SF_ERR_EOF; 884 } 885 886 break; 887 default: 888 assert(0); 889 abort(); 890 } 891 892 rv = parser_key(sfp, dest_key); 893 if (rv != 0) { 894 return rv; 895 } 896 897 return parser_dict_value(sfp, dest_value); 898} 899 900int sf_parser_list(sf_parser *sfp, sf_value *dest) { 901 int rv; 902 903 switch (sfp->state) { 904 case SF_STATE_LIST_INNER_LIST_BEFORE: 905 rv = parser_skip_inner_list(sfp); 906 if (rv != 0) { 907 return rv; 908 } 909 910 /* fall through */ 911 case SF_STATE_LIST_BEFORE_PARAMS: 912 rv = parser_skip_params(sfp); 913 if (rv != 0) { 914 return rv; 915 } 916 917 /* fall through */ 918 case SF_STATE_LIST_AFTER: 919 rv = parser_next_key_or_item(sfp); 920 if (rv != 0) { 921 return rv; 922 } 923 924 break; 925 case SF_STATE_INITIAL: 926 parser_discard_sp(sfp); 927 928 if (parser_eof(sfp)) { 929 return SF_ERR_EOF; 930 } 931 932 break; 933 default: 934 assert(0); 935 abort(); 936 } 937 938 if (*sfp->pos == '(') { 939 if (dest) { 940 dest->type = SF_TYPE_INNER_LIST; 941 dest->flags = SF_VALUE_FLAG_NONE; 942 } 943 944 ++sfp->pos; 945 946 sfp->state = SF_STATE_LIST_INNER_LIST_BEFORE; 947 948 return 0; 949 } 950 951 rv = parser_bare_item(sfp, dest); 952 if (rv != 0) { 953 return rv; 954 } 955 956 sfp->state = SF_STATE_LIST_BEFORE_PARAMS; 957 958 return 0; 959} 960 961int sf_parser_item(sf_parser *sfp, sf_value *dest) { 962 int rv; 963 964 switch (sfp->state) { 965 case SF_STATE_INITIAL: 966 parser_discard_sp(sfp); 967 968 if (parser_eof(sfp)) { 969 return SF_ERR_PARSE_ERROR; 970 } 971 972 break; 973 case SF_STATE_ITEM_INNER_LIST_BEFORE: 974 rv = parser_skip_inner_list(sfp); 975 if (rv != 0) { 976 return rv; 977 } 978 979 /* fall through */ 980 case SF_STATE_ITEM_BEFORE_PARAMS: 981 rv = parser_skip_params(sfp); 982 if (rv != 0) { 983 return rv; 984 } 985 986 /* fall through */ 987 case SF_STATE_ITEM_AFTER: 988 parser_discard_sp(sfp); 989 990 if (!parser_eof(sfp)) { 991 return SF_ERR_PARSE_ERROR; 992 } 993 994 return SF_ERR_EOF; 995 default: 996 assert(0); 997 abort(); 998 } 999 1000 if (*sfp->pos == '(') { 1001 if (dest) { 1002 dest->type = SF_TYPE_INNER_LIST; 1003 dest->flags = SF_VALUE_FLAG_NONE; 1004 } 1005 1006 ++sfp->pos; 1007 1008 sfp->state = SF_STATE_ITEM_INNER_LIST_BEFORE; 1009 1010 return 0; 1011 } 1012 1013 rv = parser_bare_item(sfp, dest); 1014 if (rv != 0) { 1015 return rv; 1016 } 1017 1018 sfp->state = SF_STATE_ITEM_BEFORE_PARAMS; 1019 1020 return 0; 1021} 1022 1023void sf_parser_init(sf_parser *sfp, const uint8_t *data, size_t datalen) { 1024 if (datalen == 0) { 1025 sfp->pos = sfp->end = NULL; 1026 } else { 1027 sfp->pos = data; 1028 sfp->end = data + datalen; 1029 } 1030 1031 sfp->state = SF_STATE_INITIAL; 1032} 1033 1034void sf_unescape(sf_vec *dest, const sf_vec *src) { 1035 const uint8_t *p, *q; 1036 uint8_t *o; 1037 size_t len, slen; 1038 1039 if (src->len == 0) { 1040 *dest = *src; 1041 1042 return; 1043 } 1044 1045 o = dest->base; 1046 p = src->base; 1047 len = src->len; 1048 1049 for (;;) { 1050 q = memchr(p, '\\', len); 1051 if (q == NULL) { 1052 if (len == src->len) { 1053 *dest = *src; 1054 1055 return; 1056 } 1057 1058 memcpy(o, p, len); 1059 o += len; 1060 1061 break; 1062 } 1063 1064 slen = (size_t)(q - p); 1065 memcpy(o, p, slen); 1066 o += slen; 1067 1068 p = q + 1; 1069 *o++ = *p++; 1070 len -= slen + 2; 1071 } 1072 1073 dest->len = (size_t)(o - dest->base); 1074} 1075 1076void sf_base64decode(sf_vec *dest, const sf_vec *src) { 1077 static const int index_tbl[] = { 1078 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1079 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1080 -1, -1, -1, -1, -1, -1, -1, 62, -1, -1, -1, 63, 52, 53, 54, 55, 56, 57, 1081 58, 59, 60, 61, -1, -1, -1, -1, -1, -1, -1, 0, 1, 2, 3, 4, 5, 6, 1082 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 1083 25, -1, -1, -1, -1, -1, -1, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 1084 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, -1, -1, -1, 1085 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1086 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1087 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1088 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1089 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1090 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1091 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 1092 -1, -1, -1, -1}; 1093 uint8_t *o; 1094 const uint8_t *p, *end; 1095 uint32_t n; 1096 size_t i; 1097 int idx; 1098 1099 assert((src->len & 0x3) == 0); 1100 1101 if (src->len == 0) { 1102 *dest = *src; 1103 1104 return; 1105 } 1106 1107 o = dest->base; 1108 p = src->base; 1109 end = src->base + src->len; 1110 1111 for (; p != end;) { 1112 n = 0; 1113 1114 for (i = 1; i <= 4; ++i, ++p) { 1115 idx = index_tbl[*p]; 1116 1117 if (idx == -1) { 1118 assert(i > 2); 1119 1120 if (i == 3) { 1121 assert(*p == '=' && *(p + 1) == '=' && p + 2 == end); 1122 1123 *o++ = (uint8_t)(n >> 16); 1124 1125 goto fin; 1126 } 1127 1128 assert(*p == '=' && p + 1 == end); 1129 1130 *o++ = (uint8_t)(n >> 16); 1131 *o++ = (n >> 8) & 0xffu; 1132 1133 goto fin; 1134 } 1135 1136 n += (uint32_t)(idx << (24 - i * 6)); 1137 } 1138 1139 *o++ = (uint8_t)(n >> 16); 1140 *o++ = (n >> 8) & 0xffu; 1141 *o++ = n & 0xffu; 1142 } 1143 1144fin: 1145 dest->len = (size_t)(o - dest->base); 1146} 1147