xref: /third_party/curl/lib/urlapi.c (revision 13498266)
1/***************************************************************************
2 *                                  _   _ ____  _
3 *  Project                     ___| | | |  _ \| |
4 *                             / __| | | | |_) | |
5 *                            | (__| |_| |  _ <| |___
6 *                             \___|\___/|_| \_\_____|
7 *
8 * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al.
9 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
12 * are also available at https://curl.se/docs/copyright.html.
13 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 * SPDX-License-Identifier: curl
22 *
23 ***************************************************************************/
24
25#include "curl_setup.h"
26
27#include "urldata.h"
28#include "urlapi-int.h"
29#include "strcase.h"
30#include "url.h"
31#include "escape.h"
32#include "curl_ctype.h"
33#include "inet_pton.h"
34#include "inet_ntop.h"
35#include "strdup.h"
36#include "idn.h"
37#include "curl_memrchr.h"
38
39/* The last 3 #include files should be in this order */
40#include "curl_printf.h"
41#include "curl_memory.h"
42#include "memdebug.h"
43
44  /* MSDOS/Windows style drive prefix, eg c: in c:foo */
45#define STARTS_WITH_DRIVE_PREFIX(str) \
46  ((('a' <= str[0] && str[0] <= 'z') || \
47    ('A' <= str[0] && str[0] <= 'Z')) && \
48   (str[1] == ':'))
49
50  /* MSDOS/Windows style drive prefix, optionally with
51   * a '|' instead of ':', followed by a slash or NUL */
52#define STARTS_WITH_URL_DRIVE_PREFIX(str) \
53  ((('a' <= (str)[0] && (str)[0] <= 'z') || \
54    ('A' <= (str)[0] && (str)[0] <= 'Z')) && \
55   ((str)[1] == ':' || (str)[1] == '|') && \
56   ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0))
57
58/* scheme is not URL encoded, the longest libcurl supported ones are... */
59#define MAX_SCHEME_LEN 40
60
61/*
62 * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make
63 * sure we have _some_ value for AF_INET6 without polluting our fake value
64 * everywhere.
65 */
66#if !defined(ENABLE_IPV6) && !defined(AF_INET6)
67#define AF_INET6 (AF_INET + 1)
68#endif
69
70/* Internal representation of CURLU. Point to URL-encoded strings. */
71struct Curl_URL {
72  char *scheme;
73  char *user;
74  char *password;
75  char *options; /* IMAP only? */
76  char *host;
77  char *zoneid; /* for numerical IPv6 addresses */
78  char *port;
79  char *path;
80  char *query;
81  char *fragment;
82  long portnum; /* the numerical version */
83};
84
85#define DEFAULT_SCHEME "https"
86
87static void free_urlhandle(struct Curl_URL *u)
88{
89  free(u->scheme);
90  free(u->user);
91  free(u->password);
92  free(u->options);
93  free(u->host);
94  free(u->zoneid);
95  free(u->port);
96  free(u->path);
97  free(u->query);
98  free(u->fragment);
99}
100
101/*
102 * Find the separator at the end of the host name, or the '?' in cases like
103 * http://www.example.com?id=2380
104 */
105static const char *find_host_sep(const char *url)
106{
107  const char *sep;
108  const char *query;
109
110  /* Find the start of the hostname */
111  sep = strstr(url, "//");
112  if(!sep)
113    sep = url;
114  else
115    sep += 2;
116
117  query = strchr(sep, '?');
118  sep = strchr(sep, '/');
119
120  if(!sep)
121    sep = url + strlen(url);
122
123  if(!query)
124    query = url + strlen(url);
125
126  return sep < query ? sep : query;
127}
128
129/* convert CURLcode to CURLUcode */
130#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE :   \
131                  CURLUE_OUT_OF_MEMORY)
132/*
133 * Decide whether a character in a URL must be escaped.
134 */
135#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c)))
136
137static const char hexdigits[] = "0123456789abcdef";
138/* urlencode_str() writes data into an output dynbuf and URL-encodes the
139 * spaces in the source URL accordingly.
140 *
141 * URL encoding should be skipped for host names, otherwise IDN resolution
142 * will fail.
143 */
144static CURLUcode urlencode_str(struct dynbuf *o, const char *url,
145                               size_t len, bool relative,
146                               bool query)
147{
148  /* we must add this with whitespace-replacing */
149  bool left = !query;
150  const unsigned char *iptr;
151  const unsigned char *host_sep = (const unsigned char *) url;
152  CURLcode result;
153
154  if(!relative)
155    host_sep = (const unsigned char *) find_host_sep(url);
156
157  for(iptr = (unsigned char *)url;    /* read from here */
158      len; iptr++, len--) {
159
160    if(iptr < host_sep) {
161      result = Curl_dyn_addn(o, iptr, 1);
162      if(result)
163        return cc2cu(result);
164      continue;
165    }
166
167    if(*iptr == ' ') {
168      if(left)
169        result = Curl_dyn_addn(o, "%20", 3);
170      else
171        result = Curl_dyn_addn(o, "+", 1);
172      if(result)
173        return cc2cu(result);
174      continue;
175    }
176
177    if(*iptr == '?')
178      left = FALSE;
179
180    if(urlchar_needs_escaping(*iptr)) {
181      char out[3]={'%'};
182      out[1] = hexdigits[*iptr>>4];
183      out[2] = hexdigits[*iptr & 0xf];
184      result = Curl_dyn_addn(o, out, 3);
185    }
186    else
187      result = Curl_dyn_addn(o, iptr, 1);
188    if(result)
189      return cc2cu(result);
190  }
191
192  return CURLUE_OK;
193}
194
195/*
196 * Returns the length of the scheme if the given URL is absolute (as opposed
197 * to relative). Stores the scheme in the buffer if TRUE and 'buf' is
198 * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set.
199 *
200 * If 'guess_scheme' is TRUE, it means the URL might be provided without
201 * scheme.
202 */
203size_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen,
204                            bool guess_scheme)
205{
206  int i = 0;
207  DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN));
208  (void)buflen; /* only used in debug-builds */
209  if(buf)
210    buf[0] = 0; /* always leave a defined value in buf */
211#ifdef _WIN32
212  if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url))
213    return 0;
214#endif
215  if(ISALPHA(url[0]))
216    for(i = 1; i < MAX_SCHEME_LEN; ++i) {
217      char s = url[i];
218      if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) {
219        /* RFC 3986 3.1 explains:
220           scheme      = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." )
221        */
222      }
223      else {
224        break;
225      }
226    }
227  if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) {
228    /* If this does not guess scheme, the scheme always ends with the colon so
229       that this also detects data: URLs etc. In guessing mode, data: could
230       be the host name "data" with a specified port number. */
231
232    /* the length of the scheme is the name part only */
233    size_t len = i;
234    if(buf) {
235      buf[i] = 0;
236      while(i--) {
237        buf[i] = Curl_raw_tolower(url[i]);
238      }
239    }
240    return len;
241  }
242  return 0;
243}
244
245/*
246 * Concatenate a relative URL to a base URL making it absolute.
247 * URL-encodes any spaces.
248 * The returned pointer must be freed by the caller unless NULL
249 * (returns NULL on out of memory).
250 *
251 * Note that this function destroys the 'base' string.
252 */
253static CURLcode concat_url(char *base, const char *relurl, char **newurl)
254{
255  /***
256   TRY to append this new path to the old URL
257   to the right of the host part. Oh crap, this is doomed to cause
258   problems in the future...
259  */
260  struct dynbuf newest;
261  char *protsep;
262  char *pathsep;
263  bool host_changed = FALSE;
264  const char *useurl = relurl;
265  CURLcode result = CURLE_OK;
266  CURLUcode uc;
267  *newurl = NULL;
268
269  /* protsep points to the start of the host name */
270  protsep = strstr(base, "//");
271  if(!protsep)
272    protsep = base;
273  else
274    protsep += 2; /* pass the slashes */
275
276  if('/' != relurl[0]) {
277    int level = 0;
278
279    /* First we need to find out if there's a ?-letter in the URL,
280       and cut it and the right-side of that off */
281    pathsep = strchr(protsep, '?');
282    if(pathsep)
283      *pathsep = 0;
284
285    /* we have a relative path to append to the last slash if there's one
286       available, or if the new URL is just a query string (starts with a
287       '?')  we append the new one at the end of the entire currently worked
288       out URL */
289    if(useurl[0] != '?') {
290      pathsep = strrchr(protsep, '/');
291      if(pathsep)
292        *pathsep = 0;
293    }
294
295    /* Check if there's any slash after the host name, and if so, remember
296       that position instead */
297    pathsep = strchr(protsep, '/');
298    if(pathsep)
299      protsep = pathsep + 1;
300    else
301      protsep = NULL;
302
303    /* now deal with one "./" or any amount of "../" in the newurl
304       and act accordingly */
305
306    if((useurl[0] == '.') && (useurl[1] == '/'))
307      useurl += 2; /* just skip the "./" */
308
309    while((useurl[0] == '.') &&
310          (useurl[1] == '.') &&
311          (useurl[2] == '/')) {
312      level++;
313      useurl += 3; /* pass the "../" */
314    }
315
316    if(protsep) {
317      while(level--) {
318        /* cut off one more level from the right of the original URL */
319        pathsep = strrchr(protsep, '/');
320        if(pathsep)
321          *pathsep = 0;
322        else {
323          *protsep = 0;
324          break;
325        }
326      }
327    }
328  }
329  else {
330    /* We got a new absolute path for this server */
331
332    if(relurl[1] == '/') {
333      /* the new URL starts with //, just keep the protocol part from the
334         original one */
335      *protsep = 0;
336      useurl = &relurl[2]; /* we keep the slashes from the original, so we
337                              skip the new ones */
338      host_changed = TRUE;
339    }
340    else {
341      /* cut off the original URL from the first slash, or deal with URLs
342         without slash */
343      pathsep = strchr(protsep, '/');
344      if(pathsep) {
345        /* When people use badly formatted URLs, such as
346           "http://www.example.com?dir=/home/daniel" we must not use the first
347           slash, if there's a ?-letter before it! */
348        char *sep = strchr(protsep, '?');
349        if(sep && (sep < pathsep))
350          pathsep = sep;
351        *pathsep = 0;
352      }
353      else {
354        /* There was no slash. Now, since we might be operating on a badly
355           formatted URL, such as "http://www.example.com?id=2380" which
356           doesn't use a slash separator as it is supposed to, we need to check
357           for a ?-letter as well! */
358        pathsep = strchr(protsep, '?');
359        if(pathsep)
360          *pathsep = 0;
361      }
362    }
363  }
364
365  Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH);
366
367  /* copy over the root url part */
368  result = Curl_dyn_add(&newest, base);
369  if(result)
370    return result;
371
372  /* check if we need to append a slash */
373  if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0]))
374    ;
375  else {
376    result = Curl_dyn_addn(&newest, "/", 1);
377    if(result)
378      return result;
379  }
380
381  /* then append the new piece on the right side */
382  uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed,
383                     FALSE);
384  if(uc)
385    return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY;
386
387  *newurl = Curl_dyn_ptr(&newest);
388  return CURLE_OK;
389}
390
391/* scan for byte values <= 31, 127 and sometimes space */
392static CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags)
393{
394  static const char badbytes[]={
395    /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
396    0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
397    0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
398    0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
399    0x7f, 0x00 /* null-terminate */
400  };
401  size_t n = strlen(url);
402  size_t nfine;
403
404  if(n > CURL_MAX_INPUT_LENGTH)
405    /* excessive input length */
406    return CURLUE_MALFORMED_INPUT;
407
408  nfine = strcspn(url, badbytes);
409  if((nfine != n) ||
410     (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' ')))
411    return CURLUE_MALFORMED_INPUT;
412
413  *urllen = n;
414  return CURLUE_OK;
415}
416
417/*
418 * parse_hostname_login()
419 *
420 * Parse the login details (user name, password and options) from the URL and
421 * strip them out of the host name
422 *
423 */
424static CURLUcode parse_hostname_login(struct Curl_URL *u,
425                                      const char *login,
426                                      size_t len,
427                                      unsigned int flags,
428                                      size_t *offset) /* to the host name */
429{
430  CURLUcode result = CURLUE_OK;
431  CURLcode ccode;
432  char *userp = NULL;
433  char *passwdp = NULL;
434  char *optionsp = NULL;
435  const struct Curl_handler *h = NULL;
436
437  /* At this point, we assume all the other special cases have been taken
438   * care of, so the host is at most
439   *
440   *   [user[:password][;options]]@]hostname
441   *
442   * We need somewhere to put the embedded details, so do that first.
443   */
444  char *ptr;
445
446  DEBUGASSERT(login);
447
448  *offset = 0;
449  ptr = memchr(login, '@', len);
450  if(!ptr)
451    goto out;
452
453  /* We will now try to extract the
454   * possible login information in a string like:
455   * ftp://user:password@ftp.my.site:8021/README */
456  ptr++;
457
458  /* if this is a known scheme, get some details */
459  if(u->scheme)
460    h = Curl_get_scheme_handler(u->scheme);
461
462  /* We could use the login information in the URL so extract it. Only parse
463     options if the handler says we should. Note that 'h' might be NULL! */
464  ccode = Curl_parse_login_details(login, ptr - login - 1,
465                                   &userp, &passwdp,
466                                   (h && (h->flags & PROTOPT_URLOPTIONS)) ?
467                                   &optionsp:NULL);
468  if(ccode) {
469    result = CURLUE_BAD_LOGIN;
470    goto out;
471  }
472
473  if(userp) {
474    if(flags & CURLU_DISALLOW_USER) {
475      /* Option DISALLOW_USER is set and url contains username. */
476      result = CURLUE_USER_NOT_ALLOWED;
477      goto out;
478    }
479    free(u->user);
480    u->user = userp;
481  }
482
483  if(passwdp) {
484    free(u->password);
485    u->password = passwdp;
486  }
487
488  if(optionsp) {
489    free(u->options);
490    u->options = optionsp;
491  }
492
493  /* the host name starts at this offset */
494  *offset = ptr - login;
495  return CURLUE_OK;
496
497out:
498
499  free(userp);
500  free(passwdp);
501  free(optionsp);
502  u->user = NULL;
503  u->password = NULL;
504  u->options = NULL;
505
506  return result;
507}
508
509UNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host,
510                                   bool has_scheme)
511{
512  char *portptr;
513  char *hostname = Curl_dyn_ptr(host);
514  /*
515   * Find the end of an IPv6 address on the ']' ending bracket.
516   */
517  if(hostname[0] == '[') {
518    portptr = strchr(hostname, ']');
519    if(!portptr)
520      return CURLUE_BAD_IPV6;
521    portptr++;
522    /* this is a RFC2732-style specified IP-address */
523    if(*portptr) {
524      if(*portptr != ':')
525        return CURLUE_BAD_PORT_NUMBER;
526    }
527    else
528      portptr = NULL;
529  }
530  else
531    portptr = strchr(hostname, ':');
532
533  if(portptr) {
534    char *rest;
535    long port;
536    size_t keep = portptr - hostname;
537
538    /* Browser behavior adaptation. If there's a colon with no digits after,
539       just cut off the name there which makes us ignore the colon and just
540       use the default port. Firefox, Chrome and Safari all do that.
541
542       Don't do it if the URL has no scheme, to make something that looks like
543       a scheme not work!
544    */
545    Curl_dyn_setlen(host, keep);
546    portptr++;
547    if(!*portptr)
548      return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER;
549
550    if(!ISDIGIT(*portptr))
551      return CURLUE_BAD_PORT_NUMBER;
552
553    port = strtol(portptr, &rest, 10);  /* Port number must be decimal */
554
555    if(port > 0xffff)
556      return CURLUE_BAD_PORT_NUMBER;
557
558    if(rest[0])
559      return CURLUE_BAD_PORT_NUMBER;
560
561    u->portnum = port;
562    /* generate a new port number string to get rid of leading zeroes etc */
563    free(u->port);
564    u->port = aprintf("%ld", port);
565    if(!u->port)
566      return CURLUE_OUT_OF_MEMORY;
567  }
568
569  return CURLUE_OK;
570}
571
572/* this assumes 'hostname' now starts with [ */
573static CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname,
574                            size_t hlen) /* length of hostname */
575{
576  size_t len;
577  DEBUGASSERT(*hostname == '[');
578  if(hlen < 4) /* '[::]' is the shortest possible valid string */
579    return CURLUE_BAD_IPV6;
580  hostname++;
581  hlen -= 2;
582
583  /* only valid IPv6 letters are ok */
584  len = strspn(hostname, "0123456789abcdefABCDEF:.");
585
586  if(hlen != len) {
587    hlen = len;
588    if(hostname[len] == '%') {
589      /* this could now be '%[zone id]' */
590      char zoneid[16];
591      int i = 0;
592      char *h = &hostname[len + 1];
593      /* pass '25' if present and is a url encoded percent sign */
594      if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']'))
595        h += 2;
596      while(*h && (*h != ']') && (i < 15))
597        zoneid[i++] = *h++;
598      if(!i || (']' != *h))
599        return CURLUE_BAD_IPV6;
600      zoneid[i] = 0;
601      u->zoneid = strdup(zoneid);
602      if(!u->zoneid)
603        return CURLUE_OUT_OF_MEMORY;
604      hostname[len] = ']'; /* insert end bracket */
605      hostname[len + 1] = 0; /* terminate the hostname */
606    }
607    else
608      return CURLUE_BAD_IPV6;
609    /* hostname is fine */
610  }
611
612  /* Check the IPv6 address. */
613  {
614    char dest[16]; /* fits a binary IPv6 address */
615    char norm[MAX_IPADR_LEN];
616    hostname[hlen] = 0; /* end the address there */
617    if(1 != Curl_inet_pton(AF_INET6, hostname, dest))
618      return CURLUE_BAD_IPV6;
619
620    /* check if it can be done shorter */
621    if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) &&
622       (strlen(norm) < hlen)) {
623      strcpy(hostname, norm);
624      hlen = strlen(norm);
625      hostname[hlen + 1] = 0;
626    }
627    hostname[hlen] = ']'; /* restore ending bracket */
628  }
629  return CURLUE_OK;
630}
631
632static CURLUcode hostname_check(struct Curl_URL *u, char *hostname,
633                                size_t hlen) /* length of hostname */
634{
635  size_t len;
636  DEBUGASSERT(hostname);
637
638  if(!hlen)
639    return CURLUE_NO_HOST;
640  else if(hostname[0] == '[')
641    return ipv6_parse(u, hostname, hlen);
642  else {
643    /* letters from the second string are not ok */
644    len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%");
645    if(hlen != len)
646      /* hostname with bad content */
647      return CURLUE_BAD_HOSTNAME;
648  }
649  return CURLUE_OK;
650}
651
652/*
653 * Handle partial IPv4 numerical addresses and different bases, like
654 * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc.
655 *
656 * If the given input string is syntactically wrong IPv4 or any part for
657 * example is too big, this function returns HOST_NAME.
658 *
659 * Output the "normalized" version of that input string in plain quad decimal
660 * integers.
661 *
662 * Returns the host type.
663 */
664
665#define HOST_ERROR   -1 /* out of memory */
666#define HOST_BAD     -2 /* bad IPv4 address */
667
668#define HOST_NAME    1
669#define HOST_IPV4    2
670#define HOST_IPV6    3
671
672static int ipv4_normalize(struct dynbuf *host)
673{
674  bool done = FALSE;
675  int n = 0;
676  const char *c = Curl_dyn_ptr(host);
677  unsigned long parts[4] = {0, 0, 0, 0};
678  CURLcode result = CURLE_OK;
679
680  if(*c == '[')
681    return HOST_IPV6;
682
683  while(!done) {
684    char *endp;
685    unsigned long l;
686    if(!ISDIGIT(*c))
687      /* most importantly this doesn't allow a leading plus or minus */
688      return HOST_NAME;
689    l = strtoul(c, &endp, 0);
690
691    parts[n] = l;
692    c = endp;
693
694    switch(*c) {
695    case '.':
696      if(n == 3)
697        return HOST_NAME;
698      n++;
699      c++;
700      break;
701
702    case '\0':
703      done = TRUE;
704      break;
705
706    default:
707      return HOST_NAME;
708    }
709
710    /* overflow */
711    if((l == ULONG_MAX) && (errno == ERANGE))
712      return HOST_NAME;
713
714#if SIZEOF_LONG > 4
715    /* a value larger than 32 bits */
716    if(l > UINT_MAX)
717      return HOST_NAME;
718#endif
719  }
720
721  switch(n) {
722  case 0: /* a -- 32 bits */
723    Curl_dyn_reset(host);
724
725    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
726                           (unsigned int)(parts[0] >> 24),
727                           (unsigned int)((parts[0] >> 16) & 0xff),
728                           (unsigned int)((parts[0] >> 8) & 0xff),
729                           (unsigned int)(parts[0] & 0xff));
730    break;
731  case 1: /* a.b -- 8.24 bits */
732    if((parts[0] > 0xff) || (parts[1] > 0xffffff))
733      return HOST_NAME;
734    Curl_dyn_reset(host);
735    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
736                           (unsigned int)(parts[0]),
737                           (unsigned int)((parts[1] >> 16) & 0xff),
738                           (unsigned int)((parts[1] >> 8) & 0xff),
739                           (unsigned int)(parts[1] & 0xff));
740    break;
741  case 2: /* a.b.c -- 8.8.16 bits */
742    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff))
743      return HOST_NAME;
744    Curl_dyn_reset(host);
745    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
746                           (unsigned int)(parts[0]),
747                           (unsigned int)(parts[1]),
748                           (unsigned int)((parts[2] >> 8) & 0xff),
749                           (unsigned int)(parts[2] & 0xff));
750    break;
751  case 3: /* a.b.c.d -- 8.8.8.8 bits */
752    if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) ||
753       (parts[3] > 0xff))
754      return HOST_NAME;
755    Curl_dyn_reset(host);
756    result = Curl_dyn_addf(host, "%u.%u.%u.%u",
757                           (unsigned int)(parts[0]),
758                           (unsigned int)(parts[1]),
759                           (unsigned int)(parts[2]),
760                           (unsigned int)(parts[3]));
761    break;
762  }
763  if(result)
764    return HOST_ERROR;
765  return HOST_IPV4;
766}
767
768/* if necessary, replace the host content with a URL decoded version */
769static CURLUcode urldecode_host(struct dynbuf *host)
770{
771  char *per = NULL;
772  const char *hostname = Curl_dyn_ptr(host);
773  per = strchr(hostname, '%');
774  if(!per)
775    /* nothing to decode */
776    return CURLUE_OK;
777  else {
778    /* encoded */
779    size_t dlen;
780    char *decoded;
781    CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen,
782                                     REJECT_CTRL);
783    if(result)
784      return CURLUE_BAD_HOSTNAME;
785    Curl_dyn_reset(host);
786    result = Curl_dyn_addn(host, decoded, dlen);
787    free(decoded);
788    if(result)
789      return cc2cu(result);
790  }
791
792  return CURLUE_OK;
793}
794
795static CURLUcode parse_authority(struct Curl_URL *u,
796                                 const char *auth, size_t authlen,
797                                 unsigned int flags,
798                                 struct dynbuf *host,
799                                 bool has_scheme)
800{
801  size_t offset;
802  CURLUcode uc;
803  CURLcode result;
804
805  /*
806   * Parse the login details and strip them out of the host name.
807   */
808  uc = parse_hostname_login(u, auth, authlen, flags, &offset);
809  if(uc)
810    goto out;
811
812  result = Curl_dyn_addn(host, auth + offset, authlen - offset);
813  if(result) {
814    uc = cc2cu(result);
815    goto out;
816  }
817
818  uc = Curl_parse_port(u, host, has_scheme);
819  if(uc)
820    goto out;
821
822  if(!Curl_dyn_len(host))
823    return CURLUE_NO_HOST;
824
825  switch(ipv4_normalize(host)) {
826  case HOST_IPV4:
827    break;
828  case HOST_IPV6:
829    uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
830    break;
831  case HOST_NAME:
832    uc = urldecode_host(host);
833    if(!uc)
834      uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host));
835    break;
836  case HOST_ERROR:
837    uc = CURLUE_OUT_OF_MEMORY;
838    break;
839  case HOST_BAD:
840  default:
841    uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */
842    break;
843  }
844
845out:
846  return uc;
847}
848
849CURLUcode Curl_url_set_authority(CURLU *u, const char *authority,
850                                 unsigned int flags)
851{
852  CURLUcode result;
853  struct dynbuf host;
854
855  DEBUGASSERT(authority);
856  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
857
858  result = parse_authority(u, authority, strlen(authority), flags,
859                           &host, !!u->scheme);
860  if(result)
861    Curl_dyn_free(&host);
862  else {
863    free(u->host);
864    u->host = Curl_dyn_ptr(&host);
865  }
866  return result;
867}
868
869/*
870 * "Remove Dot Segments"
871 * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4
872 */
873
874/*
875 * dedotdotify()
876 * @unittest: 1395
877 *
878 * This function gets a null-terminated path with dot and dotdot sequences
879 * passed in and strips them off according to the rules in RFC 3986 section
880 * 5.2.4.
881 *
882 * The function handles a query part ('?' + stuff) appended but it expects
883 * that fragments ('#' + stuff) have already been cut off.
884 *
885 * RETURNS
886 *
887 * Zero for success and 'out' set to an allocated dedotdotified string.
888 */
889UNITTEST int dedotdotify(const char *input, size_t clen, char **outp);
890UNITTEST int dedotdotify(const char *input, size_t clen, char **outp)
891{
892  char *outptr;
893  const char *endp = &input[clen];
894  char *out;
895
896  *outp = NULL;
897  /* the path always starts with a slash, and a slash has not dot */
898  if((clen < 2) || !memchr(input, '.', clen))
899    return 0;
900
901  out = malloc(clen + 1);
902  if(!out)
903    return 1; /* out of memory */
904
905  *out = 0; /* null-terminates, for inputs like "./" */
906  outptr = out;
907
908  do {
909    bool dotdot = TRUE;
910    if(*input == '.') {
911      /*  A.  If the input buffer begins with a prefix of "../" or "./", then
912          remove that prefix from the input buffer; otherwise, */
913
914      if(!strncmp("./", input, 2)) {
915        input += 2;
916        clen -= 2;
917      }
918      else if(!strncmp("../", input, 3)) {
919        input += 3;
920        clen -= 3;
921      }
922      /*  D.  if the input buffer consists only of "." or "..", then remove
923          that from the input buffer; otherwise, */
924
925      else if(!strcmp(".", input) || !strcmp("..", input) ||
926              !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) {
927        *out = 0;
928        break;
929      }
930      else
931        dotdot = FALSE;
932    }
933    else if(*input == '/') {
934      /*  B.  if the input buffer begins with a prefix of "/./" or "/.", where
935          "."  is a complete path segment, then replace that prefix with "/" in
936          the input buffer; otherwise, */
937      if(!strncmp("/./", input, 3)) {
938        input += 2;
939        clen -= 2;
940      }
941      else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) {
942        *outptr++ = '/';
943        *outptr = 0;
944        break;
945      }
946
947      /*  C.  if the input buffer begins with a prefix of "/../" or "/..",
948          where ".." is a complete path segment, then replace that prefix with
949          "/" in the input buffer and remove the last segment and its
950          preceding "/" (if any) from the output buffer; otherwise, */
951
952      else if(!strncmp("/../", input, 4)) {
953        input += 3;
954        clen -= 3;
955        /* remove the last segment from the output buffer */
956        while(outptr > out) {
957          outptr--;
958          if(*outptr == '/')
959            break;
960        }
961        *outptr = 0; /* null-terminate where it stops */
962      }
963      else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) {
964        /* remove the last segment from the output buffer */
965        while(outptr > out) {
966          outptr--;
967          if(*outptr == '/')
968            break;
969        }
970        *outptr++ = '/';
971        *outptr = 0; /* null-terminate where it stops */
972        break;
973      }
974      else
975        dotdot = FALSE;
976    }
977    else
978      dotdot = FALSE;
979
980    if(!dotdot) {
981      /*  E.  move the first path segment in the input buffer to the end of
982          the output buffer, including the initial "/" character (if any) and
983          any subsequent characters up to, but not including, the next "/"
984          character or the end of the input buffer. */
985
986      do {
987        *outptr++ = *input++;
988        clen--;
989      } while(*input && (*input != '/') && (*input != '?'));
990      *outptr = 0;
991    }
992
993    /* continue until end of path */
994  } while(input < endp);
995
996  *outp = out;
997  return 0; /* success */
998}
999
1000static CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags)
1001{
1002  const char *path;
1003  size_t pathlen;
1004  char *query = NULL;
1005  char *fragment = NULL;
1006  char schemebuf[MAX_SCHEME_LEN + 1];
1007  size_t schemelen = 0;
1008  size_t urllen;
1009  CURLUcode result = CURLUE_OK;
1010  size_t fraglen = 0;
1011  struct dynbuf host;
1012
1013  DEBUGASSERT(url);
1014
1015  Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH);
1016
1017  result = junkscan(url, &urllen, flags);
1018  if(result)
1019    goto fail;
1020
1021  schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf),
1022                                   flags & (CURLU_GUESS_SCHEME|
1023                                            CURLU_DEFAULT_SCHEME));
1024
1025  /* handle the file: scheme */
1026  if(schemelen && !strcmp(schemebuf, "file")) {
1027    bool uncpath = FALSE;
1028    if(urllen <= 6) {
1029      /* file:/ is not enough to actually be a complete file: URL */
1030      result = CURLUE_BAD_FILE_URL;
1031      goto fail;
1032    }
1033
1034    /* path has been allocated large enough to hold this */
1035    path = (char *)&url[5];
1036    pathlen = urllen - 5;
1037
1038    u->scheme = strdup("file");
1039    if(!u->scheme) {
1040      result = CURLUE_OUT_OF_MEMORY;
1041      goto fail;
1042    }
1043
1044    /* Extra handling URLs with an authority component (i.e. that start with
1045     * "file://")
1046     *
1047     * We allow omitted hostname (e.g. file:/<path>) -- valid according to
1048     * RFC 8089, but not the (current) WHAT-WG URL spec.
1049     */
1050    if(path[0] == '/' && path[1] == '/') {
1051      /* swallow the two slashes */
1052      const char *ptr = &path[2];
1053
1054      /*
1055       * According to RFC 8089, a file: URL can be reliably dereferenced if:
1056       *
1057       *  o it has no/blank hostname, or
1058       *
1059       *  o the hostname matches "localhost" (case-insensitively), or
1060       *
1061       *  o the hostname is a FQDN that resolves to this machine, or
1062       *
1063       *  o it is an UNC String transformed to an URI (Windows only, RFC 8089
1064       *    Appendix E.3).
1065       *
1066       * For brevity, we only consider URLs with empty, "localhost", or
1067       * "127.0.0.1" hostnames as local, otherwise as an UNC String.
1068       *
1069       * Additionally, there is an exception for URLs with a Windows drive
1070       * letter in the authority (which was accidentally omitted from RFC 8089
1071       * Appendix E, but believe me, it was meant to be there. --MK)
1072       */
1073      if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) {
1074        /* the URL includes a host name, it must match "localhost" or
1075           "127.0.0.1" to be valid */
1076        if(checkprefix("localhost/", ptr) ||
1077           checkprefix("127.0.0.1/", ptr)) {
1078          ptr += 9; /* now points to the slash after the host */
1079        }
1080        else {
1081#if defined(_WIN32)
1082          size_t len;
1083
1084          /* the host name, NetBIOS computer name, can not contain disallowed
1085             chars, and the delimiting slash character must be appended to the
1086             host name */
1087          path = strpbrk(ptr, "/\\:*?\"<>|");
1088          if(!path || *path != '/') {
1089            result = CURLUE_BAD_FILE_URL;
1090            goto fail;
1091          }
1092
1093          len = path - ptr;
1094          if(len) {
1095            CURLcode code = Curl_dyn_addn(&host, ptr, len);
1096            if(code) {
1097              result = cc2cu(code);
1098              goto fail;
1099            }
1100            uncpath = TRUE;
1101          }
1102
1103          ptr -= 2; /* now points to the // before the host in UNC */
1104#else
1105          /* Invalid file://hostname/, expected localhost or 127.0.0.1 or
1106             none */
1107          result = CURLUE_BAD_FILE_URL;
1108          goto fail;
1109#endif
1110        }
1111      }
1112
1113      path = ptr;
1114      pathlen = urllen - (ptr - url);
1115    }
1116
1117    if(!uncpath)
1118      /* no host for file: URLs by default */
1119      Curl_dyn_reset(&host);
1120
1121#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__)
1122    /* Don't allow Windows drive letters when not in Windows.
1123     * This catches both "file:/c:" and "file:c:" */
1124    if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) ||
1125       STARTS_WITH_URL_DRIVE_PREFIX(path)) {
1126      /* File drive letters are only accepted in MSDOS/Windows */
1127      result = CURLUE_BAD_FILE_URL;
1128      goto fail;
1129    }
1130#else
1131    /* If the path starts with a slash and a drive letter, ditch the slash */
1132    if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) {
1133      /* This cannot be done with strcpy, as the memory chunks overlap! */
1134      path++;
1135      pathlen--;
1136    }
1137#endif
1138
1139  }
1140  else {
1141    /* clear path */
1142    const char *schemep = NULL;
1143    const char *hostp;
1144    size_t hostlen;
1145
1146    if(schemelen) {
1147      int i = 0;
1148      const char *p = &url[schemelen + 1];
1149      while((*p == '/') && (i < 4)) {
1150        p++;
1151        i++;
1152      }
1153
1154      schemep = schemebuf;
1155      if(!Curl_get_scheme_handler(schemep) &&
1156         !(flags & CURLU_NON_SUPPORT_SCHEME)) {
1157        result = CURLUE_UNSUPPORTED_SCHEME;
1158        goto fail;
1159      }
1160
1161      if((i < 1) || (i > 3)) {
1162        /* less than one or more than three slashes */
1163        result = CURLUE_BAD_SLASHES;
1164        goto fail;
1165      }
1166      hostp = p; /* host name starts here */
1167    }
1168    else {
1169      /* no scheme! */
1170
1171      if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) {
1172        result = CURLUE_BAD_SCHEME;
1173        goto fail;
1174      }
1175      if(flags & CURLU_DEFAULT_SCHEME)
1176        schemep = DEFAULT_SCHEME;
1177
1178      /*
1179       * The URL was badly formatted, let's try without scheme specified.
1180       */
1181      hostp = url;
1182    }
1183
1184    if(schemep) {
1185      u->scheme = strdup(schemep);
1186      if(!u->scheme) {
1187        result = CURLUE_OUT_OF_MEMORY;
1188        goto fail;
1189      }
1190    }
1191
1192    /* find the end of the host name + port number */
1193    hostlen = strcspn(hostp, "/?#");
1194    path = &hostp[hostlen];
1195
1196    /* this pathlen also contains the query and the fragment */
1197    pathlen = urllen - (path - url);
1198    if(hostlen) {
1199
1200      result = parse_authority(u, hostp, hostlen, flags, &host, schemelen);
1201      if(result)
1202        goto fail;
1203
1204      if((flags & CURLU_GUESS_SCHEME) && !schemep) {
1205        const char *hostname = Curl_dyn_ptr(&host);
1206        /* legacy curl-style guess based on host name */
1207        if(checkprefix("ftp.", hostname))
1208          schemep = "ftp";
1209        else if(checkprefix("dict.", hostname))
1210          schemep = "dict";
1211        else if(checkprefix("ldap.", hostname))
1212          schemep = "ldap";
1213        else if(checkprefix("imap.", hostname))
1214          schemep = "imap";
1215        else if(checkprefix("smtp.", hostname))
1216          schemep = "smtp";
1217        else if(checkprefix("pop3.", hostname))
1218          schemep = "pop3";
1219        else
1220          schemep = "http";
1221
1222        u->scheme = strdup(schemep);
1223        if(!u->scheme) {
1224          result = CURLUE_OUT_OF_MEMORY;
1225          goto fail;
1226        }
1227      }
1228    }
1229    else if(flags & CURLU_NO_AUTHORITY) {
1230      /* allowed to be empty. */
1231      if(Curl_dyn_add(&host, "")) {
1232        result = CURLUE_OUT_OF_MEMORY;
1233        goto fail;
1234      }
1235    }
1236    else {
1237      result = CURLUE_NO_HOST;
1238      goto fail;
1239    }
1240  }
1241
1242  fragment = strchr(path, '#');
1243  if(fragment) {
1244    fraglen = pathlen - (fragment - path);
1245    if(fraglen > 1) {
1246      /* skip the leading '#' in the copy but include the terminating null */
1247      if(flags & CURLU_URLENCODE) {
1248        struct dynbuf enc;
1249        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1250        result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE);
1251        if(result)
1252          goto fail;
1253        u->fragment = Curl_dyn_ptr(&enc);
1254      }
1255      else {
1256        u->fragment = Curl_memdup0(fragment + 1, fraglen - 1);
1257        if(!u->fragment) {
1258          result = CURLUE_OUT_OF_MEMORY;
1259          goto fail;
1260        }
1261      }
1262    }
1263    /* after this, pathlen still contains the query */
1264    pathlen -= fraglen;
1265  }
1266
1267  query = memchr(path, '?', pathlen);
1268  if(query) {
1269    size_t qlen = fragment ? (size_t)(fragment - query) :
1270      pathlen - (query - path);
1271    pathlen -= qlen;
1272    if(qlen > 1) {
1273      if(flags & CURLU_URLENCODE) {
1274        struct dynbuf enc;
1275        Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1276        /* skip the leading question mark */
1277        result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE);
1278        if(result)
1279          goto fail;
1280        u->query = Curl_dyn_ptr(&enc);
1281      }
1282      else {
1283        u->query = Curl_memdup0(query + 1, qlen - 1);
1284        if(!u->query) {
1285          result = CURLUE_OUT_OF_MEMORY;
1286          goto fail;
1287        }
1288      }
1289    }
1290    else {
1291      /* single byte query */
1292      u->query = strdup("");
1293      if(!u->query) {
1294        result = CURLUE_OUT_OF_MEMORY;
1295        goto fail;
1296      }
1297    }
1298  }
1299
1300  if(pathlen && (flags & CURLU_URLENCODE)) {
1301    struct dynbuf enc;
1302    Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1303    result = urlencode_str(&enc, path, pathlen, TRUE, FALSE);
1304    if(result)
1305      goto fail;
1306    pathlen = Curl_dyn_len(&enc);
1307    path = u->path = Curl_dyn_ptr(&enc);
1308  }
1309
1310  if(pathlen <= 1) {
1311    /* there is no path left or just the slash, unset */
1312    path = NULL;
1313  }
1314  else {
1315    if(!u->path) {
1316      u->path = Curl_memdup0(path, pathlen);
1317      if(!u->path) {
1318        result = CURLUE_OUT_OF_MEMORY;
1319        goto fail;
1320      }
1321      path = u->path;
1322    }
1323    else if(flags & CURLU_URLENCODE)
1324      /* it might have encoded more than just the path so cut it */
1325      u->path[pathlen] = 0;
1326
1327    if(!(flags & CURLU_PATH_AS_IS)) {
1328      /* remove ../ and ./ sequences according to RFC3986 */
1329      char *dedot;
1330      int err = dedotdotify((char *)path, pathlen, &dedot);
1331      if(err) {
1332        result = CURLUE_OUT_OF_MEMORY;
1333        goto fail;
1334      }
1335      if(dedot) {
1336        free(u->path);
1337        u->path = dedot;
1338      }
1339    }
1340  }
1341
1342  u->host = Curl_dyn_ptr(&host);
1343
1344  return result;
1345fail:
1346  Curl_dyn_free(&host);
1347  free_urlhandle(u);
1348  return result;
1349}
1350
1351/*
1352 * Parse the URL and, if successful, replace everything in the Curl_URL struct.
1353 */
1354static CURLUcode parseurl_and_replace(const char *url, CURLU *u,
1355                                      unsigned int flags)
1356{
1357  CURLUcode result;
1358  CURLU tmpurl;
1359  memset(&tmpurl, 0, sizeof(tmpurl));
1360  result = parseurl(url, &tmpurl, flags);
1361  if(!result) {
1362    free_urlhandle(u);
1363    *u = tmpurl;
1364  }
1365  return result;
1366}
1367
1368/*
1369 */
1370CURLU *curl_url(void)
1371{
1372  return calloc(1, sizeof(struct Curl_URL));
1373}
1374
1375void curl_url_cleanup(CURLU *u)
1376{
1377  if(u) {
1378    free_urlhandle(u);
1379    free(u);
1380  }
1381}
1382
1383#define DUP(dest, src, name)                    \
1384  do {                                          \
1385    if(src->name) {                             \
1386      dest->name = strdup(src->name);           \
1387      if(!dest->name)                           \
1388        goto fail;                              \
1389    }                                           \
1390  } while(0)
1391
1392CURLU *curl_url_dup(const CURLU *in)
1393{
1394  struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL));
1395  if(u) {
1396    DUP(u, in, scheme);
1397    DUP(u, in, user);
1398    DUP(u, in, password);
1399    DUP(u, in, options);
1400    DUP(u, in, host);
1401    DUP(u, in, port);
1402    DUP(u, in, path);
1403    DUP(u, in, query);
1404    DUP(u, in, fragment);
1405    DUP(u, in, zoneid);
1406    u->portnum = in->portnum;
1407  }
1408  return u;
1409fail:
1410  curl_url_cleanup(u);
1411  return NULL;
1412}
1413
1414CURLUcode curl_url_get(const CURLU *u, CURLUPart what,
1415                       char **part, unsigned int flags)
1416{
1417  const char *ptr;
1418  CURLUcode ifmissing = CURLUE_UNKNOWN_PART;
1419  char portbuf[7];
1420  bool urldecode = (flags & CURLU_URLDECODE)?1:0;
1421  bool urlencode = (flags & CURLU_URLENCODE)?1:0;
1422  bool punycode = FALSE;
1423  bool depunyfy = FALSE;
1424  bool plusdecode = FALSE;
1425  (void)flags;
1426  if(!u)
1427    return CURLUE_BAD_HANDLE;
1428  if(!part)
1429    return CURLUE_BAD_PARTPOINTER;
1430  *part = NULL;
1431
1432  switch(what) {
1433  case CURLUPART_SCHEME:
1434    ptr = u->scheme;
1435    ifmissing = CURLUE_NO_SCHEME;
1436    urldecode = FALSE; /* never for schemes */
1437    break;
1438  case CURLUPART_USER:
1439    ptr = u->user;
1440    ifmissing = CURLUE_NO_USER;
1441    break;
1442  case CURLUPART_PASSWORD:
1443    ptr = u->password;
1444    ifmissing = CURLUE_NO_PASSWORD;
1445    break;
1446  case CURLUPART_OPTIONS:
1447    ptr = u->options;
1448    ifmissing = CURLUE_NO_OPTIONS;
1449    break;
1450  case CURLUPART_HOST:
1451    ptr = u->host;
1452    ifmissing = CURLUE_NO_HOST;
1453    punycode = (flags & CURLU_PUNYCODE)?1:0;
1454    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1455    break;
1456  case CURLUPART_ZONEID:
1457    ptr = u->zoneid;
1458    ifmissing = CURLUE_NO_ZONEID;
1459    break;
1460  case CURLUPART_PORT:
1461    ptr = u->port;
1462    ifmissing = CURLUE_NO_PORT;
1463    urldecode = FALSE; /* never for port */
1464    if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) {
1465      /* there's no stored port number, but asked to deliver
1466         a default one for the scheme */
1467      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1468      if(h) {
1469        msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1470        ptr = portbuf;
1471      }
1472    }
1473    else if(ptr && u->scheme) {
1474      /* there is a stored port number, but ask to inhibit if
1475         it matches the default one for the scheme */
1476      const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme);
1477      if(h && (h->defport == u->portnum) &&
1478         (flags & CURLU_NO_DEFAULT_PORT))
1479        ptr = NULL;
1480    }
1481    break;
1482  case CURLUPART_PATH:
1483    ptr = u->path;
1484    if(!ptr)
1485      ptr = "/";
1486    break;
1487  case CURLUPART_QUERY:
1488    ptr = u->query;
1489    ifmissing = CURLUE_NO_QUERY;
1490    plusdecode = urldecode;
1491    break;
1492  case CURLUPART_FRAGMENT:
1493    ptr = u->fragment;
1494    ifmissing = CURLUE_NO_FRAGMENT;
1495    break;
1496  case CURLUPART_URL: {
1497    char *url;
1498    char *scheme;
1499    char *options = u->options;
1500    char *port = u->port;
1501    char *allochost = NULL;
1502    punycode = (flags & CURLU_PUNYCODE)?1:0;
1503    depunyfy = (flags & CURLU_PUNY2IDN)?1:0;
1504    if(u->scheme && strcasecompare("file", u->scheme)) {
1505      url = aprintf("file://%s%s%s",
1506                    u->path,
1507                    u->fragment? "#": "",
1508                    u->fragment? u->fragment : "");
1509    }
1510    else if(!u->host)
1511      return CURLUE_NO_HOST;
1512    else {
1513      const struct Curl_handler *h = NULL;
1514      if(u->scheme)
1515        scheme = u->scheme;
1516      else if(flags & CURLU_DEFAULT_SCHEME)
1517        scheme = (char *) DEFAULT_SCHEME;
1518      else
1519        return CURLUE_NO_SCHEME;
1520
1521      h = Curl_get_scheme_handler(scheme);
1522      if(!port && (flags & CURLU_DEFAULT_PORT)) {
1523        /* there's no stored port number, but asked to deliver
1524           a default one for the scheme */
1525        if(h) {
1526          msnprintf(portbuf, sizeof(portbuf), "%u", h->defport);
1527          port = portbuf;
1528        }
1529      }
1530      else if(port) {
1531        /* there is a stored port number, but asked to inhibit if it matches
1532           the default one for the scheme */
1533        if(h && (h->defport == u->portnum) &&
1534           (flags & CURLU_NO_DEFAULT_PORT))
1535          port = NULL;
1536      }
1537
1538      if(h && !(h->flags & PROTOPT_URLOPTIONS))
1539        options = NULL;
1540
1541      if(u->host[0] == '[') {
1542        if(u->zoneid) {
1543          /* make it '[ host %25 zoneid ]' */
1544          struct dynbuf enc;
1545          size_t hostlen = strlen(u->host);
1546          Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1547          if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host,
1548                           u->zoneid))
1549            return CURLUE_OUT_OF_MEMORY;
1550          allochost = Curl_dyn_ptr(&enc);
1551        }
1552      }
1553      else if(urlencode) {
1554        allochost = curl_easy_escape(NULL, u->host, 0);
1555        if(!allochost)
1556          return CURLUE_OUT_OF_MEMORY;
1557      }
1558      else if(punycode) {
1559        if(!Curl_is_ASCII_name(u->host)) {
1560#ifndef USE_IDN
1561          return CURLUE_LACKS_IDN;
1562#else
1563          CURLcode result = Curl_idn_decode(u->host, &allochost);
1564          if(result)
1565            return (result == CURLE_OUT_OF_MEMORY) ?
1566              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1567#endif
1568        }
1569      }
1570      else if(depunyfy) {
1571        if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) {
1572#ifndef USE_IDN
1573          return CURLUE_LACKS_IDN;
1574#else
1575          CURLcode result = Curl_idn_encode(u->host, &allochost);
1576          if(result)
1577            /* this is the most likely error */
1578            return (result == CURLE_OUT_OF_MEMORY) ?
1579              CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1580#endif
1581        }
1582      }
1583
1584      url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
1585                    scheme,
1586                    u->user ? u->user : "",
1587                    u->password ? ":": "",
1588                    u->password ? u->password : "",
1589                    options ? ";" : "",
1590                    options ? options : "",
1591                    (u->user || u->password || options) ? "@": "",
1592                    allochost ? allochost : u->host,
1593                    port ? ":": "",
1594                    port ? port : "",
1595                    u->path ? u->path : "/",
1596                    (u->query && u->query[0]) ? "?": "",
1597                    (u->query && u->query[0]) ? u->query : "",
1598                    u->fragment? "#": "",
1599                    u->fragment? u->fragment : "");
1600      free(allochost);
1601    }
1602    if(!url)
1603      return CURLUE_OUT_OF_MEMORY;
1604    *part = url;
1605    return CURLUE_OK;
1606  }
1607  default:
1608    ptr = NULL;
1609    break;
1610  }
1611  if(ptr) {
1612    size_t partlen = strlen(ptr);
1613    size_t i = 0;
1614    *part = Curl_memdup0(ptr, partlen);
1615    if(!*part)
1616      return CURLUE_OUT_OF_MEMORY;
1617    if(plusdecode) {
1618      /* convert + to space */
1619      char *plus = *part;
1620      for(i = 0; i < partlen; ++plus, i++) {
1621        if(*plus == '+')
1622          *plus = ' ';
1623      }
1624    }
1625    if(urldecode) {
1626      char *decoded;
1627      size_t dlen;
1628      /* this unconditional rejection of control bytes is documented
1629         API behavior */
1630      CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL);
1631      free(*part);
1632      if(res) {
1633        *part = NULL;
1634        return CURLUE_URLDECODE;
1635      }
1636      *part = decoded;
1637      partlen = dlen;
1638    }
1639    if(urlencode) {
1640      struct dynbuf enc;
1641      CURLUcode uc;
1642      Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH);
1643      uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY);
1644      if(uc)
1645        return uc;
1646      free(*part);
1647      *part = Curl_dyn_ptr(&enc);
1648    }
1649    else if(punycode) {
1650      if(!Curl_is_ASCII_name(u->host)) {
1651#ifndef USE_IDN
1652        return CURLUE_LACKS_IDN;
1653#else
1654        char *allochost;
1655        CURLcode result = Curl_idn_decode(*part, &allochost);
1656        if(result)
1657          return (result == CURLE_OUT_OF_MEMORY) ?
1658            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1659        free(*part);
1660        *part = allochost;
1661#endif
1662      }
1663    }
1664    else if(depunyfy) {
1665      if(Curl_is_ASCII_name(u->host)  && !strncmp("xn--", u->host, 4)) {
1666#ifndef USE_IDN
1667        return CURLUE_LACKS_IDN;
1668#else
1669        char *allochost;
1670        CURLcode result = Curl_idn_encode(*part, &allochost);
1671        if(result)
1672          return (result == CURLE_OUT_OF_MEMORY) ?
1673            CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME;
1674        free(*part);
1675        *part = allochost;
1676#endif
1677      }
1678    }
1679
1680    return CURLUE_OK;
1681  }
1682  else
1683    return ifmissing;
1684}
1685
1686CURLUcode curl_url_set(CURLU *u, CURLUPart what,
1687                       const char *part, unsigned int flags)
1688{
1689  char **storep = NULL;
1690  long port = 0;
1691  bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0;
1692  bool plusencode = FALSE;
1693  bool urlskipslash = FALSE;
1694  bool leadingslash = FALSE;
1695  bool appendquery = FALSE;
1696  bool equalsencode = FALSE;
1697  size_t nalloc;
1698
1699  if(!u)
1700    return CURLUE_BAD_HANDLE;
1701  if(!part) {
1702    /* setting a part to NULL clears it */
1703    switch(what) {
1704    case CURLUPART_URL:
1705      break;
1706    case CURLUPART_SCHEME:
1707      storep = &u->scheme;
1708      break;
1709    case CURLUPART_USER:
1710      storep = &u->user;
1711      break;
1712    case CURLUPART_PASSWORD:
1713      storep = &u->password;
1714      break;
1715    case CURLUPART_OPTIONS:
1716      storep = &u->options;
1717      break;
1718    case CURLUPART_HOST:
1719      storep = &u->host;
1720      break;
1721    case CURLUPART_ZONEID:
1722      storep = &u->zoneid;
1723      break;
1724    case CURLUPART_PORT:
1725      u->portnum = 0;
1726      storep = &u->port;
1727      break;
1728    case CURLUPART_PATH:
1729      storep = &u->path;
1730      break;
1731    case CURLUPART_QUERY:
1732      storep = &u->query;
1733      break;
1734    case CURLUPART_FRAGMENT:
1735      storep = &u->fragment;
1736      break;
1737    default:
1738      return CURLUE_UNKNOWN_PART;
1739    }
1740    if(storep && *storep) {
1741      Curl_safefree(*storep);
1742    }
1743    else if(!storep) {
1744      free_urlhandle(u);
1745      memset(u, 0, sizeof(struct Curl_URL));
1746    }
1747    return CURLUE_OK;
1748  }
1749
1750  nalloc = strlen(part);
1751  if(nalloc > CURL_MAX_INPUT_LENGTH)
1752    /* excessive input length */
1753    return CURLUE_MALFORMED_INPUT;
1754
1755  switch(what) {
1756  case CURLUPART_SCHEME: {
1757    size_t plen = strlen(part);
1758    const char *s = part;
1759    if((plen > MAX_SCHEME_LEN) || (plen < 1))
1760      /* too long or too short */
1761      return CURLUE_BAD_SCHEME;
1762   /* verify that it is a fine scheme */
1763    if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part))
1764      return CURLUE_UNSUPPORTED_SCHEME;
1765    storep = &u->scheme;
1766    urlencode = FALSE; /* never */
1767    if(ISALPHA(*s)) {
1768      /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */
1769      while(--plen) {
1770        if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.'))
1771          s++; /* fine */
1772        else
1773          return CURLUE_BAD_SCHEME;
1774      }
1775    }
1776    else
1777      return CURLUE_BAD_SCHEME;
1778    break;
1779  }
1780  case CURLUPART_USER:
1781    storep = &u->user;
1782    break;
1783  case CURLUPART_PASSWORD:
1784    storep = &u->password;
1785    break;
1786  case CURLUPART_OPTIONS:
1787    storep = &u->options;
1788    break;
1789  case CURLUPART_HOST:
1790    storep = &u->host;
1791    Curl_safefree(u->zoneid);
1792    break;
1793  case CURLUPART_ZONEID:
1794    storep = &u->zoneid;
1795    break;
1796  case CURLUPART_PORT:
1797  {
1798    char *endp;
1799    urlencode = FALSE; /* never */
1800    port = strtol(part, &endp, 10);  /* Port number must be decimal */
1801    if((port <= 0) || (port > 0xffff))
1802      return CURLUE_BAD_PORT_NUMBER;
1803    if(*endp)
1804      /* weirdly provided number, not good! */
1805      return CURLUE_BAD_PORT_NUMBER;
1806    storep = &u->port;
1807  }
1808  break;
1809  case CURLUPART_PATH:
1810    urlskipslash = TRUE;
1811    leadingslash = TRUE; /* enforce */
1812    storep = &u->path;
1813    break;
1814  case CURLUPART_QUERY:
1815    plusencode = urlencode;
1816    appendquery = (flags & CURLU_APPENDQUERY)?1:0;
1817    equalsencode = appendquery;
1818    storep = &u->query;
1819    break;
1820  case CURLUPART_FRAGMENT:
1821    storep = &u->fragment;
1822    break;
1823  case CURLUPART_URL: {
1824    /*
1825     * Allow a new URL to replace the existing (if any) contents.
1826     *
1827     * If the existing contents is enough for a URL, allow a relative URL to
1828     * replace it.
1829     */
1830    CURLcode result;
1831    CURLUcode uc;
1832    char *oldurl;
1833    char *redired_url;
1834
1835    if(!nalloc)
1836      /* a blank URL is not a valid URL */
1837      return CURLUE_MALFORMED_INPUT;
1838
1839    /* if the new thing is absolute or the old one is not
1840     * (we could not get an absolute url in 'oldurl'),
1841     * then replace the existing with the new. */
1842    if(Curl_is_absolute_url(part, NULL, 0,
1843                            flags & (CURLU_GUESS_SCHEME|
1844                                     CURLU_DEFAULT_SCHEME))
1845       || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) {
1846      return parseurl_and_replace(part, u, flags);
1847    }
1848
1849    /* apply the relative part to create a new URL
1850     * and replace the existing one with it. */
1851    result = concat_url(oldurl, part, &redired_url);
1852    free(oldurl);
1853    if(result)
1854      return cc2cu(result);
1855
1856    uc = parseurl_and_replace(redired_url, u, flags);
1857    free(redired_url);
1858    return uc;
1859  }
1860  default:
1861    return CURLUE_UNKNOWN_PART;
1862  }
1863  DEBUGASSERT(storep);
1864  {
1865    const char *newp;
1866    struct dynbuf enc;
1867    Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash);
1868
1869    if(leadingslash && (part[0] != '/')) {
1870      CURLcode result = Curl_dyn_addn(&enc, "/", 1);
1871      if(result)
1872        return cc2cu(result);
1873    }
1874    if(urlencode) {
1875      const unsigned char *i;
1876
1877      for(i = (const unsigned char *)part; *i; i++) {
1878        CURLcode result;
1879        if((*i == ' ') && plusencode) {
1880          result = Curl_dyn_addn(&enc, "+", 1);
1881          if(result)
1882            return CURLUE_OUT_OF_MEMORY;
1883        }
1884        else if(ISUNRESERVED(*i) ||
1885                ((*i == '/') && urlskipslash) ||
1886                ((*i == '=') && equalsencode)) {
1887          if((*i == '=') && equalsencode)
1888            /* only skip the first equals sign */
1889            equalsencode = FALSE;
1890          result = Curl_dyn_addn(&enc, i, 1);
1891          if(result)
1892            return cc2cu(result);
1893        }
1894        else {
1895          char out[3]={'%'};
1896          out[1] = hexdigits[*i>>4];
1897          out[2] = hexdigits[*i & 0xf];
1898          result = Curl_dyn_addn(&enc, out, 3);
1899          if(result)
1900            return cc2cu(result);
1901        }
1902      }
1903    }
1904    else {
1905      char *p;
1906      CURLcode result = Curl_dyn_add(&enc, part);
1907      if(result)
1908        return cc2cu(result);
1909      p = Curl_dyn_ptr(&enc);
1910      while(*p) {
1911        /* make sure percent encoded are lower case */
1912        if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) &&
1913           (ISUPPER(p[1]) || ISUPPER(p[2]))) {
1914          p[1] = Curl_raw_tolower(p[1]);
1915          p[2] = Curl_raw_tolower(p[2]);
1916          p += 3;
1917        }
1918        else
1919          p++;
1920      }
1921    }
1922    newp = Curl_dyn_ptr(&enc);
1923
1924    if(appendquery && newp) {
1925      /* Append the 'newp' string onto the old query. Add a '&' separator if
1926         none is present at the end of the existing query already */
1927
1928      size_t querylen = u->query ? strlen(u->query) : 0;
1929      bool addamperand = querylen && (u->query[querylen -1] != '&');
1930      if(querylen) {
1931        struct dynbuf qbuf;
1932        Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH);
1933
1934        if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */
1935          goto nomem;
1936
1937        if(addamperand) {
1938          if(Curl_dyn_addn(&qbuf, "&", 1))
1939            goto nomem;
1940        }
1941        if(Curl_dyn_add(&qbuf, newp))
1942          goto nomem;
1943        Curl_dyn_free(&enc);
1944        free(*storep);
1945        *storep = Curl_dyn_ptr(&qbuf);
1946        return CURLUE_OK;
1947nomem:
1948        Curl_dyn_free(&enc);
1949        return CURLUE_OUT_OF_MEMORY;
1950      }
1951    }
1952
1953    else if(what == CURLUPART_HOST) {
1954      size_t n = Curl_dyn_len(&enc);
1955      if(!n && (flags & CURLU_NO_AUTHORITY)) {
1956        /* Skip hostname check, it's allowed to be empty. */
1957      }
1958      else {
1959        if(!n || hostname_check(u, (char *)newp, n)) {
1960          Curl_dyn_free(&enc);
1961          return CURLUE_BAD_HOSTNAME;
1962        }
1963      }
1964    }
1965
1966    free(*storep);
1967    *storep = (char *)newp;
1968  }
1969  /* set after the string, to make it not assigned if the allocation above
1970     fails */
1971  if(port)
1972    u->portnum = port;
1973  return CURLUE_OK;
1974}
1975