1/*************************************************************************** 2 * _ _ ____ _ 3 * Project ___| | | | _ \| | 4 * / __| | | | |_) | | 5 * | (__| |_| | _ <| |___ 6 * \___|\___/|_| \_\_____| 7 * 8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 9 * 10 * This software is licensed as described in the file COPYING, which 11 * you should have received as part of this distribution. The terms 12 * are also available at https://curl.se/docs/copyright.html. 13 * 14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell 15 * copies of the Software, and permit persons to whom the Software is 16 * furnished to do so, under the terms of the COPYING file. 17 * 18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 19 * KIND, either express or implied. 20 * 21 * SPDX-License-Identifier: curl 22 * 23 ***************************************************************************/ 24#include "tool_setup.h" 25 26#define ENABLE_CURLX_PRINTF 27/* use our own printf() functions */ 28#include "curlx.h" 29#include "tool_cfgable.h" 30#include "tool_doswin.h" 31#include "tool_urlglob.h" 32#include "tool_vms.h" 33#include "dynbuf.h" 34 35#include "memdebug.h" /* keep this as LAST include */ 36 37#define GLOBERROR(string, column, code) \ 38 glob->error = string, glob->pos = column, code 39 40static CURLcode glob_fixed(struct URLGlob *glob, char *fixed, size_t len) 41{ 42 struct URLPattern *pat = &glob->pattern[glob->size]; 43 pat->type = UPTSet; 44 pat->content.Set.size = 1; 45 pat->content.Set.ptr_s = 0; 46 pat->globindex = -1; 47 48 pat->content.Set.elements = malloc(sizeof(char *)); 49 50 if(!pat->content.Set.elements) 51 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 52 53 pat->content.Set.elements[0] = malloc(len + 1); 54 if(!pat->content.Set.elements[0]) 55 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 56 57 memcpy(pat->content.Set.elements[0], fixed, len); 58 pat->content.Set.elements[0][len] = 0; 59 60 return CURLE_OK; 61} 62 63/* multiply 64 * 65 * Multiplies and checks for overflow. 66 */ 67static int multiply(curl_off_t *amount, curl_off_t with) 68{ 69 curl_off_t sum; 70 DEBUGASSERT(*amount >= 0); 71 DEBUGASSERT(with >= 0); 72 if((with <= 0) || (*amount <= 0)) { 73 sum = 0; 74 } 75 else { 76#if defined(__GNUC__) && \ 77 ((__GNUC__ > 5) || ((__GNUC__ == 5) && (__GNUC_MINOR__ >= 1))) 78 if(__builtin_mul_overflow(*amount, with, &sum)) 79 return 1; 80#else 81 sum = *amount * with; 82 if(sum/with != *amount) 83 return 1; /* didn't fit, bail out */ 84#endif 85 } 86 *amount = sum; 87 return 0; 88} 89 90static CURLcode glob_set(struct URLGlob *glob, char **patternp, 91 size_t *posp, curl_off_t *amount, 92 int globindex) 93{ 94 /* processes a set expression with the point behind the opening '{' 95 ','-separated elements are collected until the next closing '}' 96 */ 97 struct URLPattern *pat; 98 bool done = FALSE; 99 char *buf = glob->glob_buffer; 100 char *pattern = *patternp; 101 char *opattern = pattern; 102 size_t opos = *posp-1; 103 104 pat = &glob->pattern[glob->size]; 105 /* patterns 0,1,2,... correspond to size=1,3,5,... */ 106 pat->type = UPTSet; 107 pat->content.Set.size = 0; 108 pat->content.Set.ptr_s = 0; 109 pat->content.Set.elements = NULL; 110 pat->globindex = globindex; 111 112 while(!done) { 113 switch(*pattern) { 114 case '\0': /* URL ended while set was still open */ 115 return GLOBERROR("unmatched brace", opos, CURLE_URL_MALFORMAT); 116 117 case '{': 118 case '[': /* no nested expressions at this time */ 119 return GLOBERROR("nested brace", *posp, CURLE_URL_MALFORMAT); 120 121 case '}': /* set element completed */ 122 if(opattern == pattern) 123 return GLOBERROR("empty string within braces", *posp, 124 CURLE_URL_MALFORMAT); 125 126 /* add 1 to size since it'll be incremented below */ 127 if(multiply(amount, pat->content.Set.size + 1)) 128 return GLOBERROR("range overflow", 0, CURLE_URL_MALFORMAT); 129 130 FALLTHROUGH(); 131 case ',': 132 133 *buf = '\0'; 134 if(pat->content.Set.elements) { 135 char **new_arr = realloc(pat->content.Set.elements, 136 (size_t)(pat->content.Set.size + 1) * 137 sizeof(char *)); 138 if(!new_arr) 139 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 140 141 pat->content.Set.elements = new_arr; 142 } 143 else 144 pat->content.Set.elements = malloc(sizeof(char *)); 145 146 if(!pat->content.Set.elements) 147 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 148 149 pat->content.Set.elements[pat->content.Set.size] = 150 strdup(glob->glob_buffer); 151 if(!pat->content.Set.elements[pat->content.Set.size]) 152 return GLOBERROR("out of memory", 0, CURLE_OUT_OF_MEMORY); 153 ++pat->content.Set.size; 154 155 if(*pattern == '}') { 156 pattern++; /* pass the closing brace */ 157 done = TRUE; 158 continue; 159 } 160 161 buf = glob->glob_buffer; 162 ++pattern; 163 ++(*posp); 164 break; 165 166 case ']': /* illegal closing bracket */ 167 return GLOBERROR("unexpected close bracket", *posp, CURLE_URL_MALFORMAT); 168 169 case '\\': /* escaped character, skip '\' */ 170 if(pattern[1]) { 171 ++pattern; 172 ++(*posp); 173 } 174 FALLTHROUGH(); 175 default: 176 *buf++ = *pattern++; /* copy character to set element */ 177 ++(*posp); 178 } 179 } 180 181 *patternp = pattern; /* return with the new position */ 182 return CURLE_OK; 183} 184 185static CURLcode glob_range(struct URLGlob *glob, char **patternp, 186 size_t *posp, curl_off_t *amount, 187 int globindex) 188{ 189 /* processes a range expression with the point behind the opening '[' 190 - char range: e.g. "a-z]", "B-Q]" 191 - num range: e.g. "0-9]", "17-2000]" 192 - num range with leading zeros: e.g. "001-999]" 193 expression is checked for well-formedness and collected until the next ']' 194 */ 195 struct URLPattern *pat; 196 int rc; 197 char *pattern = *patternp; 198 char *c; 199 200 pat = &glob->pattern[glob->size]; 201 pat->globindex = globindex; 202 203 if(ISALPHA(*pattern)) { 204 /* character range detected */ 205 char min_c; 206 char max_c; 207 char end_c; 208 unsigned long step = 1; 209 210 pat->type = UPTCharRange; 211 212 rc = sscanf(pattern, "%c-%c%c", &min_c, &max_c, &end_c); 213 214 if(rc == 3) { 215 if(end_c == ':') { 216 char *endp; 217 errno = 0; 218 step = strtoul(&pattern[4], &endp, 10); 219 if(errno || &pattern[4] == endp || *endp != ']') 220 step = 0; 221 else 222 pattern = endp + 1; 223 } 224 else if(end_c != ']') 225 /* then this is wrong */ 226 rc = 0; 227 else 228 /* end_c == ']' */ 229 pattern += 4; 230 } 231 232 *posp += (pattern - *patternp); 233 234 if(rc != 3 || !step || step > (unsigned)INT_MAX || 235 (min_c == max_c && step != 1) || 236 (min_c != max_c && (min_c > max_c || step > (unsigned)(max_c - min_c) || 237 (max_c - min_c) > ('z' - 'a')))) 238 /* the pattern is not well-formed */ 239 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 240 241 /* if there was a ":[num]" thing, use that as step or else use 1 */ 242 pat->content.CharRange.step = (int)step; 243 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c = min_c; 244 pat->content.CharRange.max_c = max_c; 245 246 if(multiply(amount, ((pat->content.CharRange.max_c - 247 pat->content.CharRange.min_c) / 248 pat->content.CharRange.step + 1))) 249 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 250 } 251 else if(ISDIGIT(*pattern)) { 252 /* numeric range detected */ 253 unsigned long min_n; 254 unsigned long max_n = 0; 255 unsigned long step_n = 0; 256 char *endp; 257 258 pat->type = UPTNumRange; 259 pat->content.NumRange.padlength = 0; 260 261 if(*pattern == '0') { 262 /* leading zero specified, count them! */ 263 c = pattern; 264 while(ISDIGIT(*c)) { 265 c++; 266 ++pat->content.NumRange.padlength; /* padding length is set for all 267 instances of this pattern */ 268 } 269 } 270 271 errno = 0; 272 min_n = strtoul(pattern, &endp, 10); 273 if(errno || (endp == pattern)) 274 endp = NULL; 275 else { 276 if(*endp != '-') 277 endp = NULL; 278 else { 279 pattern = endp + 1; 280 while(*pattern && ISBLANK(*pattern)) 281 pattern++; 282 if(!ISDIGIT(*pattern)) { 283 endp = NULL; 284 goto fail; 285 } 286 errno = 0; 287 max_n = strtoul(pattern, &endp, 10); 288 if(errno) 289 /* overflow */ 290 endp = NULL; 291 else if(*endp == ':') { 292 pattern = endp + 1; 293 errno = 0; 294 step_n = strtoul(pattern, &endp, 10); 295 if(errno) 296 /* over/underflow situation */ 297 endp = NULL; 298 } 299 else 300 step_n = 1; 301 if(endp && (*endp == ']')) { 302 pattern = endp + 1; 303 } 304 else 305 endp = NULL; 306 } 307 } 308 309fail: 310 *posp += (pattern - *patternp); 311 312 if(!endp || !step_n || 313 (min_n == max_n && step_n != 1) || 314 (min_n != max_n && (min_n > max_n || step_n > (max_n - min_n)))) 315 /* the pattern is not well-formed */ 316 return GLOBERROR("bad range", *posp, CURLE_URL_MALFORMAT); 317 318 /* typecasting to ints are fine here since we make sure above that we 319 are within 31 bits */ 320 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n = min_n; 321 pat->content.NumRange.max_n = max_n; 322 pat->content.NumRange.step = step_n; 323 324 if(multiply(amount, ((pat->content.NumRange.max_n - 325 pat->content.NumRange.min_n) / 326 pat->content.NumRange.step + 1))) 327 return GLOBERROR("range overflow", *posp, CURLE_URL_MALFORMAT); 328 } 329 else 330 return GLOBERROR("bad range specification", *posp, CURLE_URL_MALFORMAT); 331 332 *patternp = pattern; 333 return CURLE_OK; 334} 335 336#define MAX_IP6LEN 128 337 338static bool peek_ipv6(const char *str, size_t *skip) 339{ 340 /* 341 * Scan for a potential IPv6 literal. 342 * - Valid globs contain a hyphen and <= 1 colon. 343 * - IPv6 literals contain no hyphens and >= 2 colons. 344 */ 345 char hostname[MAX_IP6LEN]; 346 CURLU *u; 347 char *endbr = strchr(str, ']'); 348 size_t hlen; 349 CURLUcode rc; 350 if(!endbr) 351 return FALSE; 352 353 hlen = endbr - str + 1; 354 if(hlen >= MAX_IP6LEN) 355 return FALSE; 356 357 u = curl_url(); 358 if(!u) 359 return FALSE; 360 361 memcpy(hostname, str, hlen); 362 hostname[hlen] = 0; 363 364 /* ask to "guess scheme" as then it works without an https:// prefix */ 365 rc = curl_url_set(u, CURLUPART_URL, hostname, CURLU_GUESS_SCHEME); 366 367 curl_url_cleanup(u); 368 if(!rc) 369 *skip = hlen; 370 return rc ? FALSE : TRUE; 371} 372 373static CURLcode glob_parse(struct URLGlob *glob, char *pattern, 374 size_t pos, curl_off_t *amount) 375{ 376 /* processes a literal string component of a URL 377 special characters '{' and '[' branch to set/range processing functions 378 */ 379 CURLcode res = CURLE_OK; 380 int globindex = 0; /* count "actual" globs */ 381 382 *amount = 1; 383 384 while(*pattern && !res) { 385 char *buf = glob->glob_buffer; 386 size_t sublen = 0; 387 while(*pattern && *pattern != '{') { 388 if(*pattern == '[') { 389 /* skip over IPv6 literals and [] */ 390 size_t skip = 0; 391 if(!peek_ipv6(pattern, &skip) && (pattern[1] == ']')) 392 skip = 2; 393 if(skip) { 394 memcpy(buf, pattern, skip); 395 buf += skip; 396 pattern += skip; 397 sublen += skip; 398 continue; 399 } 400 break; 401 } 402 if(*pattern == '}' || *pattern == ']') 403 return GLOBERROR("unmatched close brace/bracket", pos, 404 CURLE_URL_MALFORMAT); 405 406 /* only allow \ to escape known "special letters" */ 407 if(*pattern == '\\' && 408 (*(pattern + 1) == '{' || *(pattern + 1) == '[' || 409 *(pattern + 1) == '}' || *(pattern + 1) == ']') ) { 410 411 /* escape character, skip '\' */ 412 ++pattern; 413 ++pos; 414 } 415 *buf++ = *pattern++; /* copy character to literal */ 416 ++pos; 417 sublen++; 418 } 419 if(sublen) { 420 /* we got a literal string, add it as a single-item list */ 421 *buf = '\0'; 422 res = glob_fixed(glob, glob->glob_buffer, sublen); 423 } 424 else { 425 switch(*pattern) { 426 case '\0': /* done */ 427 break; 428 429 case '{': 430 /* process set pattern */ 431 pattern++; 432 pos++; 433 res = glob_set(glob, &pattern, &pos, amount, globindex++); 434 break; 435 436 case '[': 437 /* process range pattern */ 438 pattern++; 439 pos++; 440 res = glob_range(glob, &pattern, &pos, amount, globindex++); 441 break; 442 } 443 } 444 445 if(++glob->size >= GLOB_PATTERN_NUM) 446 return GLOBERROR("too many globs", pos, CURLE_URL_MALFORMAT); 447 } 448 return res; 449} 450 451CURLcode glob_url(struct URLGlob **glob, char *url, curl_off_t *urlnum, 452 FILE *error) 453{ 454 /* 455 * We can deal with any-size, just make a buffer with the same length 456 * as the specified URL! 457 */ 458 struct URLGlob *glob_expand; 459 curl_off_t amount = 0; 460 char *glob_buffer; 461 CURLcode res; 462 463 *glob = NULL; 464 465 glob_buffer = malloc(strlen(url) + 1); 466 if(!glob_buffer) 467 return CURLE_OUT_OF_MEMORY; 468 glob_buffer[0] = 0; 469 470 glob_expand = calloc(1, sizeof(struct URLGlob)); 471 if(!glob_expand) { 472 Curl_safefree(glob_buffer); 473 return CURLE_OUT_OF_MEMORY; 474 } 475 glob_expand->urllen = strlen(url); 476 glob_expand->glob_buffer = glob_buffer; 477 478 res = glob_parse(glob_expand, url, 1, &amount); 479 if(!res) 480 *urlnum = amount; 481 else { 482 if(error && glob_expand->error) { 483 char text[512]; 484 const char *t; 485 if(glob_expand->pos) { 486 msnprintf(text, sizeof(text), "%s in URL position %zu:\n%s\n%*s^", 487 glob_expand->error, 488 glob_expand->pos, url, (int)glob_expand->pos - 1, " "); 489 t = text; 490 } 491 else 492 t = glob_expand->error; 493 494 /* send error description to the error-stream */ 495 fprintf(error, "curl: (%d) %s\n", res, t); 496 } 497 /* it failed, we cleanup */ 498 glob_cleanup(glob_expand); 499 *urlnum = 1; 500 return res; 501 } 502 503 *glob = glob_expand; 504 return CURLE_OK; 505} 506 507void glob_cleanup(struct URLGlob *glob) 508{ 509 size_t i; 510 curl_off_t elem; 511 512 if(!glob) 513 return; 514 515 for(i = 0; i < glob->size; i++) { 516 if((glob->pattern[i].type == UPTSet) && 517 (glob->pattern[i].content.Set.elements)) { 518 for(elem = glob->pattern[i].content.Set.size - 1; 519 elem >= 0; 520 --elem) { 521 Curl_safefree(glob->pattern[i].content.Set.elements[elem]); 522 } 523 Curl_safefree(glob->pattern[i].content.Set.elements); 524 } 525 } 526 Curl_safefree(glob->glob_buffer); 527 Curl_safefree(glob); 528} 529 530CURLcode glob_next_url(char **globbed, struct URLGlob *glob) 531{ 532 struct URLPattern *pat; 533 size_t i; 534 size_t len; 535 size_t buflen = glob->urllen + 1; 536 char *buf = glob->glob_buffer; 537 538 *globbed = NULL; 539 540 if(!glob->beenhere) 541 glob->beenhere = 1; 542 else { 543 bool carry = TRUE; 544 545 /* implement a counter over the index ranges of all patterns, starting 546 with the rightmost pattern */ 547 for(i = 0; carry && (i < glob->size); i++) { 548 carry = FALSE; 549 pat = &glob->pattern[glob->size - 1 - i]; 550 switch(pat->type) { 551 case UPTSet: 552 if((pat->content.Set.elements) && 553 (++pat->content.Set.ptr_s == pat->content.Set.size)) { 554 pat->content.Set.ptr_s = 0; 555 carry = TRUE; 556 } 557 break; 558 case UPTCharRange: 559 pat->content.CharRange.ptr_c = 560 (char)(pat->content.CharRange.step + 561 (int)((unsigned char)pat->content.CharRange.ptr_c)); 562 if(pat->content.CharRange.ptr_c > pat->content.CharRange.max_c) { 563 pat->content.CharRange.ptr_c = pat->content.CharRange.min_c; 564 carry = TRUE; 565 } 566 break; 567 case UPTNumRange: 568 pat->content.NumRange.ptr_n += pat->content.NumRange.step; 569 if(pat->content.NumRange.ptr_n > pat->content.NumRange.max_n) { 570 pat->content.NumRange.ptr_n = pat->content.NumRange.min_n; 571 carry = TRUE; 572 } 573 break; 574 default: 575 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 576 return CURLE_FAILED_INIT; 577 } 578 } 579 if(carry) { /* first pattern ptr has run into overflow, done! */ 580 return CURLE_OK; 581 } 582 } 583 584 for(i = 0; i < glob->size; ++i) { 585 pat = &glob->pattern[i]; 586 switch(pat->type) { 587 case UPTSet: 588 if(pat->content.Set.elements) { 589 msnprintf(buf, buflen, "%s", 590 pat->content.Set.elements[pat->content.Set.ptr_s]); 591 len = strlen(buf); 592 buf += len; 593 buflen -= len; 594 } 595 break; 596 case UPTCharRange: 597 if(buflen) { 598 *buf++ = pat->content.CharRange.ptr_c; 599 *buf = '\0'; 600 buflen--; 601 } 602 break; 603 case UPTNumRange: 604 msnprintf(buf, buflen, "%0*" CURL_FORMAT_CURL_OFF_T, 605 pat->content.NumRange.padlength, 606 pat->content.NumRange.ptr_n); 607 len = strlen(buf); 608 buf += len; 609 buflen -= len; 610 break; 611 default: 612 printf("internal error: invalid pattern type (%d)\n", (int)pat->type); 613 return CURLE_FAILED_INIT; 614 } 615 } 616 617 *globbed = strdup(glob->glob_buffer); 618 if(!*globbed) 619 return CURLE_OUT_OF_MEMORY; 620 621 return CURLE_OK; 622} 623 624#define MAX_OUTPUT_GLOB_LENGTH (10*1024) 625 626CURLcode glob_match_url(char **result, char *filename, struct URLGlob *glob) 627{ 628 char numbuf[18]; 629 char *appendthis = (char *)""; 630 size_t appendlen = 0; 631 struct curlx_dynbuf dyn; 632 633 *result = NULL; 634 635 /* We cannot use the glob_buffer for storage since the filename may be 636 * longer than the URL we use. 637 */ 638 curlx_dyn_init(&dyn, MAX_OUTPUT_GLOB_LENGTH); 639 640 while(*filename) { 641 if(*filename == '#' && ISDIGIT(filename[1])) { 642 char *ptr = filename; 643 unsigned long num = strtoul(&filename[1], &filename, 10); 644 struct URLPattern *pat = NULL; 645 646 if(num && (num < glob->size)) { 647 unsigned long i; 648 num--; /* make it zero based */ 649 /* find the correct glob entry */ 650 for(i = 0; i<glob->size; i++) { 651 if(glob->pattern[i].globindex == (int)num) { 652 pat = &glob->pattern[i]; 653 break; 654 } 655 } 656 } 657 658 if(pat) { 659 switch(pat->type) { 660 case UPTSet: 661 if(pat->content.Set.elements) { 662 appendthis = pat->content.Set.elements[pat->content.Set.ptr_s]; 663 appendlen = 664 strlen(pat->content.Set.elements[pat->content.Set.ptr_s]); 665 } 666 break; 667 case UPTCharRange: 668 numbuf[0] = pat->content.CharRange.ptr_c; 669 numbuf[1] = 0; 670 appendthis = numbuf; 671 appendlen = 1; 672 break; 673 case UPTNumRange: 674 msnprintf(numbuf, sizeof(numbuf), "%0*" CURL_FORMAT_CURL_OFF_T, 675 pat->content.NumRange.padlength, 676 pat->content.NumRange.ptr_n); 677 appendthis = numbuf; 678 appendlen = strlen(numbuf); 679 break; 680 default: 681 fprintf(tool_stderr, "internal error: invalid pattern type (%d)\n", 682 (int)pat->type); 683 curlx_dyn_free(&dyn); 684 return CURLE_FAILED_INIT; 685 } 686 } 687 else { 688 /* #[num] out of range, use the #[num] in the output */ 689 filename = ptr; 690 appendthis = filename++; 691 appendlen = 1; 692 } 693 } 694 else { 695 appendthis = filename++; 696 appendlen = 1; 697 } 698 if(curlx_dyn_addn(&dyn, appendthis, appendlen)) 699 return CURLE_OUT_OF_MEMORY; 700 } 701 702 if(curlx_dyn_addn(&dyn, "", 0)) 703 return CURLE_OUT_OF_MEMORY; 704 705#if defined(_WIN32) || defined(MSDOS) 706 { 707 char *sanitized; 708 SANITIZEcode sc = sanitize_file_name(&sanitized, curlx_dyn_ptr(&dyn), 709 (SANITIZE_ALLOW_PATH | 710 SANITIZE_ALLOW_RESERVED)); 711 curlx_dyn_free(&dyn); 712 if(sc) 713 return CURLE_URL_MALFORMAT; 714 *result = sanitized; 715 return CURLE_OK; 716 } 717#else 718 *result = curlx_dyn_ptr(&dyn); 719 return CURLE_OK; 720#endif /* _WIN32 || MSDOS */ 721} 722