113498266Sopenharmony_ci/*************************************************************************** 213498266Sopenharmony_ci * _ _ ____ _ 313498266Sopenharmony_ci * Project ___| | | | _ \| | 413498266Sopenharmony_ci * / __| | | | |_) | | 513498266Sopenharmony_ci * | (__| |_| | _ <| |___ 613498266Sopenharmony_ci * \___|\___/|_| \_\_____| 713498266Sopenharmony_ci * 813498266Sopenharmony_ci * Copyright (C) Daniel Stenberg, <daniel@haxx.se>, et al. 913498266Sopenharmony_ci * 1013498266Sopenharmony_ci * This software is licensed as described in the file COPYING, which 1113498266Sopenharmony_ci * you should have received as part of this distribution. The terms 1213498266Sopenharmony_ci * are also available at https://curl.se/docs/copyright.html. 1313498266Sopenharmony_ci * 1413498266Sopenharmony_ci * You may opt to use, copy, modify, merge, publish, distribute and/or sell 1513498266Sopenharmony_ci * copies of the Software, and permit persons to whom the Software is 1613498266Sopenharmony_ci * furnished to do so, under the terms of the COPYING file. 1713498266Sopenharmony_ci * 1813498266Sopenharmony_ci * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY 1913498266Sopenharmony_ci * KIND, either express or implied. 2013498266Sopenharmony_ci * 2113498266Sopenharmony_ci * SPDX-License-Identifier: curl 2213498266Sopenharmony_ci * 2313498266Sopenharmony_ci ***************************************************************************/ 2413498266Sopenharmony_ci 2513498266Sopenharmony_ci#include "curl_setup.h" 2613498266Sopenharmony_ci 2713498266Sopenharmony_ci#include "urldata.h" 2813498266Sopenharmony_ci#include "urlapi-int.h" 2913498266Sopenharmony_ci#include "strcase.h" 3013498266Sopenharmony_ci#include "url.h" 3113498266Sopenharmony_ci#include "escape.h" 3213498266Sopenharmony_ci#include "curl_ctype.h" 3313498266Sopenharmony_ci#include "inet_pton.h" 3413498266Sopenharmony_ci#include "inet_ntop.h" 3513498266Sopenharmony_ci#include "strdup.h" 3613498266Sopenharmony_ci#include "idn.h" 3713498266Sopenharmony_ci#include "curl_memrchr.h" 3813498266Sopenharmony_ci 3913498266Sopenharmony_ci/* The last 3 #include files should be in this order */ 4013498266Sopenharmony_ci#include "curl_printf.h" 4113498266Sopenharmony_ci#include "curl_memory.h" 4213498266Sopenharmony_ci#include "memdebug.h" 4313498266Sopenharmony_ci 4413498266Sopenharmony_ci /* MSDOS/Windows style drive prefix, eg c: in c:foo */ 4513498266Sopenharmony_ci#define STARTS_WITH_DRIVE_PREFIX(str) \ 4613498266Sopenharmony_ci ((('a' <= str[0] && str[0] <= 'z') || \ 4713498266Sopenharmony_ci ('A' <= str[0] && str[0] <= 'Z')) && \ 4813498266Sopenharmony_ci (str[1] == ':')) 4913498266Sopenharmony_ci 5013498266Sopenharmony_ci /* MSDOS/Windows style drive prefix, optionally with 5113498266Sopenharmony_ci * a '|' instead of ':', followed by a slash or NUL */ 5213498266Sopenharmony_ci#define STARTS_WITH_URL_DRIVE_PREFIX(str) \ 5313498266Sopenharmony_ci ((('a' <= (str)[0] && (str)[0] <= 'z') || \ 5413498266Sopenharmony_ci ('A' <= (str)[0] && (str)[0] <= 'Z')) && \ 5513498266Sopenharmony_ci ((str)[1] == ':' || (str)[1] == '|') && \ 5613498266Sopenharmony_ci ((str)[2] == '/' || (str)[2] == '\\' || (str)[2] == 0)) 5713498266Sopenharmony_ci 5813498266Sopenharmony_ci/* scheme is not URL encoded, the longest libcurl supported ones are... */ 5913498266Sopenharmony_ci#define MAX_SCHEME_LEN 40 6013498266Sopenharmony_ci 6113498266Sopenharmony_ci/* 6213498266Sopenharmony_ci * If ENABLE_IPV6 is disabled, we still want to parse IPv6 addresses, so make 6313498266Sopenharmony_ci * sure we have _some_ value for AF_INET6 without polluting our fake value 6413498266Sopenharmony_ci * everywhere. 6513498266Sopenharmony_ci */ 6613498266Sopenharmony_ci#if !defined(ENABLE_IPV6) && !defined(AF_INET6) 6713498266Sopenharmony_ci#define AF_INET6 (AF_INET + 1) 6813498266Sopenharmony_ci#endif 6913498266Sopenharmony_ci 7013498266Sopenharmony_ci/* Internal representation of CURLU. Point to URL-encoded strings. */ 7113498266Sopenharmony_cistruct Curl_URL { 7213498266Sopenharmony_ci char *scheme; 7313498266Sopenharmony_ci char *user; 7413498266Sopenharmony_ci char *password; 7513498266Sopenharmony_ci char *options; /* IMAP only? */ 7613498266Sopenharmony_ci char *host; 7713498266Sopenharmony_ci char *zoneid; /* for numerical IPv6 addresses */ 7813498266Sopenharmony_ci char *port; 7913498266Sopenharmony_ci char *path; 8013498266Sopenharmony_ci char *query; 8113498266Sopenharmony_ci char *fragment; 8213498266Sopenharmony_ci long portnum; /* the numerical version */ 8313498266Sopenharmony_ci}; 8413498266Sopenharmony_ci 8513498266Sopenharmony_ci#define DEFAULT_SCHEME "https" 8613498266Sopenharmony_ci 8713498266Sopenharmony_cistatic void free_urlhandle(struct Curl_URL *u) 8813498266Sopenharmony_ci{ 8913498266Sopenharmony_ci free(u->scheme); 9013498266Sopenharmony_ci free(u->user); 9113498266Sopenharmony_ci free(u->password); 9213498266Sopenharmony_ci free(u->options); 9313498266Sopenharmony_ci free(u->host); 9413498266Sopenharmony_ci free(u->zoneid); 9513498266Sopenharmony_ci free(u->port); 9613498266Sopenharmony_ci free(u->path); 9713498266Sopenharmony_ci free(u->query); 9813498266Sopenharmony_ci free(u->fragment); 9913498266Sopenharmony_ci} 10013498266Sopenharmony_ci 10113498266Sopenharmony_ci/* 10213498266Sopenharmony_ci * Find the separator at the end of the host name, or the '?' in cases like 10313498266Sopenharmony_ci * http://www.example.com?id=2380 10413498266Sopenharmony_ci */ 10513498266Sopenharmony_cistatic const char *find_host_sep(const char *url) 10613498266Sopenharmony_ci{ 10713498266Sopenharmony_ci const char *sep; 10813498266Sopenharmony_ci const char *query; 10913498266Sopenharmony_ci 11013498266Sopenharmony_ci /* Find the start of the hostname */ 11113498266Sopenharmony_ci sep = strstr(url, "//"); 11213498266Sopenharmony_ci if(!sep) 11313498266Sopenharmony_ci sep = url; 11413498266Sopenharmony_ci else 11513498266Sopenharmony_ci sep += 2; 11613498266Sopenharmony_ci 11713498266Sopenharmony_ci query = strchr(sep, '?'); 11813498266Sopenharmony_ci sep = strchr(sep, '/'); 11913498266Sopenharmony_ci 12013498266Sopenharmony_ci if(!sep) 12113498266Sopenharmony_ci sep = url + strlen(url); 12213498266Sopenharmony_ci 12313498266Sopenharmony_ci if(!query) 12413498266Sopenharmony_ci query = url + strlen(url); 12513498266Sopenharmony_ci 12613498266Sopenharmony_ci return sep < query ? sep : query; 12713498266Sopenharmony_ci} 12813498266Sopenharmony_ci 12913498266Sopenharmony_ci/* convert CURLcode to CURLUcode */ 13013498266Sopenharmony_ci#define cc2cu(x) ((x) == CURLE_TOO_LARGE ? CURLUE_TOO_LARGE : \ 13113498266Sopenharmony_ci CURLUE_OUT_OF_MEMORY) 13213498266Sopenharmony_ci/* 13313498266Sopenharmony_ci * Decide whether a character in a URL must be escaped. 13413498266Sopenharmony_ci */ 13513498266Sopenharmony_ci#define urlchar_needs_escaping(c) (!(ISCNTRL(c) || ISSPACE(c) || ISGRAPH(c))) 13613498266Sopenharmony_ci 13713498266Sopenharmony_cistatic const char hexdigits[] = "0123456789abcdef"; 13813498266Sopenharmony_ci/* urlencode_str() writes data into an output dynbuf and URL-encodes the 13913498266Sopenharmony_ci * spaces in the source URL accordingly. 14013498266Sopenharmony_ci * 14113498266Sopenharmony_ci * URL encoding should be skipped for host names, otherwise IDN resolution 14213498266Sopenharmony_ci * will fail. 14313498266Sopenharmony_ci */ 14413498266Sopenharmony_cistatic CURLUcode urlencode_str(struct dynbuf *o, const char *url, 14513498266Sopenharmony_ci size_t len, bool relative, 14613498266Sopenharmony_ci bool query) 14713498266Sopenharmony_ci{ 14813498266Sopenharmony_ci /* we must add this with whitespace-replacing */ 14913498266Sopenharmony_ci bool left = !query; 15013498266Sopenharmony_ci const unsigned char *iptr; 15113498266Sopenharmony_ci const unsigned char *host_sep = (const unsigned char *) url; 15213498266Sopenharmony_ci CURLcode result; 15313498266Sopenharmony_ci 15413498266Sopenharmony_ci if(!relative) 15513498266Sopenharmony_ci host_sep = (const unsigned char *) find_host_sep(url); 15613498266Sopenharmony_ci 15713498266Sopenharmony_ci for(iptr = (unsigned char *)url; /* read from here */ 15813498266Sopenharmony_ci len; iptr++, len--) { 15913498266Sopenharmony_ci 16013498266Sopenharmony_ci if(iptr < host_sep) { 16113498266Sopenharmony_ci result = Curl_dyn_addn(o, iptr, 1); 16213498266Sopenharmony_ci if(result) 16313498266Sopenharmony_ci return cc2cu(result); 16413498266Sopenharmony_ci continue; 16513498266Sopenharmony_ci } 16613498266Sopenharmony_ci 16713498266Sopenharmony_ci if(*iptr == ' ') { 16813498266Sopenharmony_ci if(left) 16913498266Sopenharmony_ci result = Curl_dyn_addn(o, "%20", 3); 17013498266Sopenharmony_ci else 17113498266Sopenharmony_ci result = Curl_dyn_addn(o, "+", 1); 17213498266Sopenharmony_ci if(result) 17313498266Sopenharmony_ci return cc2cu(result); 17413498266Sopenharmony_ci continue; 17513498266Sopenharmony_ci } 17613498266Sopenharmony_ci 17713498266Sopenharmony_ci if(*iptr == '?') 17813498266Sopenharmony_ci left = FALSE; 17913498266Sopenharmony_ci 18013498266Sopenharmony_ci if(urlchar_needs_escaping(*iptr)) { 18113498266Sopenharmony_ci char out[3]={'%'}; 18213498266Sopenharmony_ci out[1] = hexdigits[*iptr>>4]; 18313498266Sopenharmony_ci out[2] = hexdigits[*iptr & 0xf]; 18413498266Sopenharmony_ci result = Curl_dyn_addn(o, out, 3); 18513498266Sopenharmony_ci } 18613498266Sopenharmony_ci else 18713498266Sopenharmony_ci result = Curl_dyn_addn(o, iptr, 1); 18813498266Sopenharmony_ci if(result) 18913498266Sopenharmony_ci return cc2cu(result); 19013498266Sopenharmony_ci } 19113498266Sopenharmony_ci 19213498266Sopenharmony_ci return CURLUE_OK; 19313498266Sopenharmony_ci} 19413498266Sopenharmony_ci 19513498266Sopenharmony_ci/* 19613498266Sopenharmony_ci * Returns the length of the scheme if the given URL is absolute (as opposed 19713498266Sopenharmony_ci * to relative). Stores the scheme in the buffer if TRUE and 'buf' is 19813498266Sopenharmony_ci * non-NULL. The buflen must be larger than MAX_SCHEME_LEN if buf is set. 19913498266Sopenharmony_ci * 20013498266Sopenharmony_ci * If 'guess_scheme' is TRUE, it means the URL might be provided without 20113498266Sopenharmony_ci * scheme. 20213498266Sopenharmony_ci */ 20313498266Sopenharmony_cisize_t Curl_is_absolute_url(const char *url, char *buf, size_t buflen, 20413498266Sopenharmony_ci bool guess_scheme) 20513498266Sopenharmony_ci{ 20613498266Sopenharmony_ci int i = 0; 20713498266Sopenharmony_ci DEBUGASSERT(!buf || (buflen > MAX_SCHEME_LEN)); 20813498266Sopenharmony_ci (void)buflen; /* only used in debug-builds */ 20913498266Sopenharmony_ci if(buf) 21013498266Sopenharmony_ci buf[0] = 0; /* always leave a defined value in buf */ 21113498266Sopenharmony_ci#ifdef _WIN32 21213498266Sopenharmony_ci if(guess_scheme && STARTS_WITH_DRIVE_PREFIX(url)) 21313498266Sopenharmony_ci return 0; 21413498266Sopenharmony_ci#endif 21513498266Sopenharmony_ci if(ISALPHA(url[0])) 21613498266Sopenharmony_ci for(i = 1; i < MAX_SCHEME_LEN; ++i) { 21713498266Sopenharmony_ci char s = url[i]; 21813498266Sopenharmony_ci if(s && (ISALNUM(s) || (s == '+') || (s == '-') || (s == '.') )) { 21913498266Sopenharmony_ci /* RFC 3986 3.1 explains: 22013498266Sopenharmony_ci scheme = ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) 22113498266Sopenharmony_ci */ 22213498266Sopenharmony_ci } 22313498266Sopenharmony_ci else { 22413498266Sopenharmony_ci break; 22513498266Sopenharmony_ci } 22613498266Sopenharmony_ci } 22713498266Sopenharmony_ci if(i && (url[i] == ':') && ((url[i + 1] == '/') || !guess_scheme)) { 22813498266Sopenharmony_ci /* If this does not guess scheme, the scheme always ends with the colon so 22913498266Sopenharmony_ci that this also detects data: URLs etc. In guessing mode, data: could 23013498266Sopenharmony_ci be the host name "data" with a specified port number. */ 23113498266Sopenharmony_ci 23213498266Sopenharmony_ci /* the length of the scheme is the name part only */ 23313498266Sopenharmony_ci size_t len = i; 23413498266Sopenharmony_ci if(buf) { 23513498266Sopenharmony_ci buf[i] = 0; 23613498266Sopenharmony_ci while(i--) { 23713498266Sopenharmony_ci buf[i] = Curl_raw_tolower(url[i]); 23813498266Sopenharmony_ci } 23913498266Sopenharmony_ci } 24013498266Sopenharmony_ci return len; 24113498266Sopenharmony_ci } 24213498266Sopenharmony_ci return 0; 24313498266Sopenharmony_ci} 24413498266Sopenharmony_ci 24513498266Sopenharmony_ci/* 24613498266Sopenharmony_ci * Concatenate a relative URL to a base URL making it absolute. 24713498266Sopenharmony_ci * URL-encodes any spaces. 24813498266Sopenharmony_ci * The returned pointer must be freed by the caller unless NULL 24913498266Sopenharmony_ci * (returns NULL on out of memory). 25013498266Sopenharmony_ci * 25113498266Sopenharmony_ci * Note that this function destroys the 'base' string. 25213498266Sopenharmony_ci */ 25313498266Sopenharmony_cistatic CURLcode concat_url(char *base, const char *relurl, char **newurl) 25413498266Sopenharmony_ci{ 25513498266Sopenharmony_ci /*** 25613498266Sopenharmony_ci TRY to append this new path to the old URL 25713498266Sopenharmony_ci to the right of the host part. Oh crap, this is doomed to cause 25813498266Sopenharmony_ci problems in the future... 25913498266Sopenharmony_ci */ 26013498266Sopenharmony_ci struct dynbuf newest; 26113498266Sopenharmony_ci char *protsep; 26213498266Sopenharmony_ci char *pathsep; 26313498266Sopenharmony_ci bool host_changed = FALSE; 26413498266Sopenharmony_ci const char *useurl = relurl; 26513498266Sopenharmony_ci CURLcode result = CURLE_OK; 26613498266Sopenharmony_ci CURLUcode uc; 26713498266Sopenharmony_ci *newurl = NULL; 26813498266Sopenharmony_ci 26913498266Sopenharmony_ci /* protsep points to the start of the host name */ 27013498266Sopenharmony_ci protsep = strstr(base, "//"); 27113498266Sopenharmony_ci if(!protsep) 27213498266Sopenharmony_ci protsep = base; 27313498266Sopenharmony_ci else 27413498266Sopenharmony_ci protsep += 2; /* pass the slashes */ 27513498266Sopenharmony_ci 27613498266Sopenharmony_ci if('/' != relurl[0]) { 27713498266Sopenharmony_ci int level = 0; 27813498266Sopenharmony_ci 27913498266Sopenharmony_ci /* First we need to find out if there's a ?-letter in the URL, 28013498266Sopenharmony_ci and cut it and the right-side of that off */ 28113498266Sopenharmony_ci pathsep = strchr(protsep, '?'); 28213498266Sopenharmony_ci if(pathsep) 28313498266Sopenharmony_ci *pathsep = 0; 28413498266Sopenharmony_ci 28513498266Sopenharmony_ci /* we have a relative path to append to the last slash if there's one 28613498266Sopenharmony_ci available, or if the new URL is just a query string (starts with a 28713498266Sopenharmony_ci '?') we append the new one at the end of the entire currently worked 28813498266Sopenharmony_ci out URL */ 28913498266Sopenharmony_ci if(useurl[0] != '?') { 29013498266Sopenharmony_ci pathsep = strrchr(protsep, '/'); 29113498266Sopenharmony_ci if(pathsep) 29213498266Sopenharmony_ci *pathsep = 0; 29313498266Sopenharmony_ci } 29413498266Sopenharmony_ci 29513498266Sopenharmony_ci /* Check if there's any slash after the host name, and if so, remember 29613498266Sopenharmony_ci that position instead */ 29713498266Sopenharmony_ci pathsep = strchr(protsep, '/'); 29813498266Sopenharmony_ci if(pathsep) 29913498266Sopenharmony_ci protsep = pathsep + 1; 30013498266Sopenharmony_ci else 30113498266Sopenharmony_ci protsep = NULL; 30213498266Sopenharmony_ci 30313498266Sopenharmony_ci /* now deal with one "./" or any amount of "../" in the newurl 30413498266Sopenharmony_ci and act accordingly */ 30513498266Sopenharmony_ci 30613498266Sopenharmony_ci if((useurl[0] == '.') && (useurl[1] == '/')) 30713498266Sopenharmony_ci useurl += 2; /* just skip the "./" */ 30813498266Sopenharmony_ci 30913498266Sopenharmony_ci while((useurl[0] == '.') && 31013498266Sopenharmony_ci (useurl[1] == '.') && 31113498266Sopenharmony_ci (useurl[2] == '/')) { 31213498266Sopenharmony_ci level++; 31313498266Sopenharmony_ci useurl += 3; /* pass the "../" */ 31413498266Sopenharmony_ci } 31513498266Sopenharmony_ci 31613498266Sopenharmony_ci if(protsep) { 31713498266Sopenharmony_ci while(level--) { 31813498266Sopenharmony_ci /* cut off one more level from the right of the original URL */ 31913498266Sopenharmony_ci pathsep = strrchr(protsep, '/'); 32013498266Sopenharmony_ci if(pathsep) 32113498266Sopenharmony_ci *pathsep = 0; 32213498266Sopenharmony_ci else { 32313498266Sopenharmony_ci *protsep = 0; 32413498266Sopenharmony_ci break; 32513498266Sopenharmony_ci } 32613498266Sopenharmony_ci } 32713498266Sopenharmony_ci } 32813498266Sopenharmony_ci } 32913498266Sopenharmony_ci else { 33013498266Sopenharmony_ci /* We got a new absolute path for this server */ 33113498266Sopenharmony_ci 33213498266Sopenharmony_ci if(relurl[1] == '/') { 33313498266Sopenharmony_ci /* the new URL starts with //, just keep the protocol part from the 33413498266Sopenharmony_ci original one */ 33513498266Sopenharmony_ci *protsep = 0; 33613498266Sopenharmony_ci useurl = &relurl[2]; /* we keep the slashes from the original, so we 33713498266Sopenharmony_ci skip the new ones */ 33813498266Sopenharmony_ci host_changed = TRUE; 33913498266Sopenharmony_ci } 34013498266Sopenharmony_ci else { 34113498266Sopenharmony_ci /* cut off the original URL from the first slash, or deal with URLs 34213498266Sopenharmony_ci without slash */ 34313498266Sopenharmony_ci pathsep = strchr(protsep, '/'); 34413498266Sopenharmony_ci if(pathsep) { 34513498266Sopenharmony_ci /* When people use badly formatted URLs, such as 34613498266Sopenharmony_ci "http://www.example.com?dir=/home/daniel" we must not use the first 34713498266Sopenharmony_ci slash, if there's a ?-letter before it! */ 34813498266Sopenharmony_ci char *sep = strchr(protsep, '?'); 34913498266Sopenharmony_ci if(sep && (sep < pathsep)) 35013498266Sopenharmony_ci pathsep = sep; 35113498266Sopenharmony_ci *pathsep = 0; 35213498266Sopenharmony_ci } 35313498266Sopenharmony_ci else { 35413498266Sopenharmony_ci /* There was no slash. Now, since we might be operating on a badly 35513498266Sopenharmony_ci formatted URL, such as "http://www.example.com?id=2380" which 35613498266Sopenharmony_ci doesn't use a slash separator as it is supposed to, we need to check 35713498266Sopenharmony_ci for a ?-letter as well! */ 35813498266Sopenharmony_ci pathsep = strchr(protsep, '?'); 35913498266Sopenharmony_ci if(pathsep) 36013498266Sopenharmony_ci *pathsep = 0; 36113498266Sopenharmony_ci } 36213498266Sopenharmony_ci } 36313498266Sopenharmony_ci } 36413498266Sopenharmony_ci 36513498266Sopenharmony_ci Curl_dyn_init(&newest, CURL_MAX_INPUT_LENGTH); 36613498266Sopenharmony_ci 36713498266Sopenharmony_ci /* copy over the root url part */ 36813498266Sopenharmony_ci result = Curl_dyn_add(&newest, base); 36913498266Sopenharmony_ci if(result) 37013498266Sopenharmony_ci return result; 37113498266Sopenharmony_ci 37213498266Sopenharmony_ci /* check if we need to append a slash */ 37313498266Sopenharmony_ci if(('/' == useurl[0]) || (protsep && !*protsep) || ('?' == useurl[0])) 37413498266Sopenharmony_ci ; 37513498266Sopenharmony_ci else { 37613498266Sopenharmony_ci result = Curl_dyn_addn(&newest, "/", 1); 37713498266Sopenharmony_ci if(result) 37813498266Sopenharmony_ci return result; 37913498266Sopenharmony_ci } 38013498266Sopenharmony_ci 38113498266Sopenharmony_ci /* then append the new piece on the right side */ 38213498266Sopenharmony_ci uc = urlencode_str(&newest, useurl, strlen(useurl), !host_changed, 38313498266Sopenharmony_ci FALSE); 38413498266Sopenharmony_ci if(uc) 38513498266Sopenharmony_ci return (uc == CURLUE_TOO_LARGE) ? CURLE_TOO_LARGE : CURLE_OUT_OF_MEMORY; 38613498266Sopenharmony_ci 38713498266Sopenharmony_ci *newurl = Curl_dyn_ptr(&newest); 38813498266Sopenharmony_ci return CURLE_OK; 38913498266Sopenharmony_ci} 39013498266Sopenharmony_ci 39113498266Sopenharmony_ci/* scan for byte values <= 31, 127 and sometimes space */ 39213498266Sopenharmony_cistatic CURLUcode junkscan(const char *url, size_t *urllen, unsigned int flags) 39313498266Sopenharmony_ci{ 39413498266Sopenharmony_ci static const char badbytes[]={ 39513498266Sopenharmony_ci /* */ 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 39613498266Sopenharmony_ci 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, 39713498266Sopenharmony_ci 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 39813498266Sopenharmony_ci 0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f, 39913498266Sopenharmony_ci 0x7f, 0x00 /* null-terminate */ 40013498266Sopenharmony_ci }; 40113498266Sopenharmony_ci size_t n = strlen(url); 40213498266Sopenharmony_ci size_t nfine; 40313498266Sopenharmony_ci 40413498266Sopenharmony_ci if(n > CURL_MAX_INPUT_LENGTH) 40513498266Sopenharmony_ci /* excessive input length */ 40613498266Sopenharmony_ci return CURLUE_MALFORMED_INPUT; 40713498266Sopenharmony_ci 40813498266Sopenharmony_ci nfine = strcspn(url, badbytes); 40913498266Sopenharmony_ci if((nfine != n) || 41013498266Sopenharmony_ci (!(flags & CURLU_ALLOW_SPACE) && strchr(url, ' '))) 41113498266Sopenharmony_ci return CURLUE_MALFORMED_INPUT; 41213498266Sopenharmony_ci 41313498266Sopenharmony_ci *urllen = n; 41413498266Sopenharmony_ci return CURLUE_OK; 41513498266Sopenharmony_ci} 41613498266Sopenharmony_ci 41713498266Sopenharmony_ci/* 41813498266Sopenharmony_ci * parse_hostname_login() 41913498266Sopenharmony_ci * 42013498266Sopenharmony_ci * Parse the login details (user name, password and options) from the URL and 42113498266Sopenharmony_ci * strip them out of the host name 42213498266Sopenharmony_ci * 42313498266Sopenharmony_ci */ 42413498266Sopenharmony_cistatic CURLUcode parse_hostname_login(struct Curl_URL *u, 42513498266Sopenharmony_ci const char *login, 42613498266Sopenharmony_ci size_t len, 42713498266Sopenharmony_ci unsigned int flags, 42813498266Sopenharmony_ci size_t *offset) /* to the host name */ 42913498266Sopenharmony_ci{ 43013498266Sopenharmony_ci CURLUcode result = CURLUE_OK; 43113498266Sopenharmony_ci CURLcode ccode; 43213498266Sopenharmony_ci char *userp = NULL; 43313498266Sopenharmony_ci char *passwdp = NULL; 43413498266Sopenharmony_ci char *optionsp = NULL; 43513498266Sopenharmony_ci const struct Curl_handler *h = NULL; 43613498266Sopenharmony_ci 43713498266Sopenharmony_ci /* At this point, we assume all the other special cases have been taken 43813498266Sopenharmony_ci * care of, so the host is at most 43913498266Sopenharmony_ci * 44013498266Sopenharmony_ci * [user[:password][;options]]@]hostname 44113498266Sopenharmony_ci * 44213498266Sopenharmony_ci * We need somewhere to put the embedded details, so do that first. 44313498266Sopenharmony_ci */ 44413498266Sopenharmony_ci char *ptr; 44513498266Sopenharmony_ci 44613498266Sopenharmony_ci DEBUGASSERT(login); 44713498266Sopenharmony_ci 44813498266Sopenharmony_ci *offset = 0; 44913498266Sopenharmony_ci ptr = memchr(login, '@', len); 45013498266Sopenharmony_ci if(!ptr) 45113498266Sopenharmony_ci goto out; 45213498266Sopenharmony_ci 45313498266Sopenharmony_ci /* We will now try to extract the 45413498266Sopenharmony_ci * possible login information in a string like: 45513498266Sopenharmony_ci * ftp://user:password@ftp.my.site:8021/README */ 45613498266Sopenharmony_ci ptr++; 45713498266Sopenharmony_ci 45813498266Sopenharmony_ci /* if this is a known scheme, get some details */ 45913498266Sopenharmony_ci if(u->scheme) 46013498266Sopenharmony_ci h = Curl_get_scheme_handler(u->scheme); 46113498266Sopenharmony_ci 46213498266Sopenharmony_ci /* We could use the login information in the URL so extract it. Only parse 46313498266Sopenharmony_ci options if the handler says we should. Note that 'h' might be NULL! */ 46413498266Sopenharmony_ci ccode = Curl_parse_login_details(login, ptr - login - 1, 46513498266Sopenharmony_ci &userp, &passwdp, 46613498266Sopenharmony_ci (h && (h->flags & PROTOPT_URLOPTIONS)) ? 46713498266Sopenharmony_ci &optionsp:NULL); 46813498266Sopenharmony_ci if(ccode) { 46913498266Sopenharmony_ci result = CURLUE_BAD_LOGIN; 47013498266Sopenharmony_ci goto out; 47113498266Sopenharmony_ci } 47213498266Sopenharmony_ci 47313498266Sopenharmony_ci if(userp) { 47413498266Sopenharmony_ci if(flags & CURLU_DISALLOW_USER) { 47513498266Sopenharmony_ci /* Option DISALLOW_USER is set and url contains username. */ 47613498266Sopenharmony_ci result = CURLUE_USER_NOT_ALLOWED; 47713498266Sopenharmony_ci goto out; 47813498266Sopenharmony_ci } 47913498266Sopenharmony_ci free(u->user); 48013498266Sopenharmony_ci u->user = userp; 48113498266Sopenharmony_ci } 48213498266Sopenharmony_ci 48313498266Sopenharmony_ci if(passwdp) { 48413498266Sopenharmony_ci free(u->password); 48513498266Sopenharmony_ci u->password = passwdp; 48613498266Sopenharmony_ci } 48713498266Sopenharmony_ci 48813498266Sopenharmony_ci if(optionsp) { 48913498266Sopenharmony_ci free(u->options); 49013498266Sopenharmony_ci u->options = optionsp; 49113498266Sopenharmony_ci } 49213498266Sopenharmony_ci 49313498266Sopenharmony_ci /* the host name starts at this offset */ 49413498266Sopenharmony_ci *offset = ptr - login; 49513498266Sopenharmony_ci return CURLUE_OK; 49613498266Sopenharmony_ci 49713498266Sopenharmony_ciout: 49813498266Sopenharmony_ci 49913498266Sopenharmony_ci free(userp); 50013498266Sopenharmony_ci free(passwdp); 50113498266Sopenharmony_ci free(optionsp); 50213498266Sopenharmony_ci u->user = NULL; 50313498266Sopenharmony_ci u->password = NULL; 50413498266Sopenharmony_ci u->options = NULL; 50513498266Sopenharmony_ci 50613498266Sopenharmony_ci return result; 50713498266Sopenharmony_ci} 50813498266Sopenharmony_ci 50913498266Sopenharmony_ciUNITTEST CURLUcode Curl_parse_port(struct Curl_URL *u, struct dynbuf *host, 51013498266Sopenharmony_ci bool has_scheme) 51113498266Sopenharmony_ci{ 51213498266Sopenharmony_ci char *portptr; 51313498266Sopenharmony_ci char *hostname = Curl_dyn_ptr(host); 51413498266Sopenharmony_ci /* 51513498266Sopenharmony_ci * Find the end of an IPv6 address on the ']' ending bracket. 51613498266Sopenharmony_ci */ 51713498266Sopenharmony_ci if(hostname[0] == '[') { 51813498266Sopenharmony_ci portptr = strchr(hostname, ']'); 51913498266Sopenharmony_ci if(!portptr) 52013498266Sopenharmony_ci return CURLUE_BAD_IPV6; 52113498266Sopenharmony_ci portptr++; 52213498266Sopenharmony_ci /* this is a RFC2732-style specified IP-address */ 52313498266Sopenharmony_ci if(*portptr) { 52413498266Sopenharmony_ci if(*portptr != ':') 52513498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 52613498266Sopenharmony_ci } 52713498266Sopenharmony_ci else 52813498266Sopenharmony_ci portptr = NULL; 52913498266Sopenharmony_ci } 53013498266Sopenharmony_ci else 53113498266Sopenharmony_ci portptr = strchr(hostname, ':'); 53213498266Sopenharmony_ci 53313498266Sopenharmony_ci if(portptr) { 53413498266Sopenharmony_ci char *rest; 53513498266Sopenharmony_ci long port; 53613498266Sopenharmony_ci size_t keep = portptr - hostname; 53713498266Sopenharmony_ci 53813498266Sopenharmony_ci /* Browser behavior adaptation. If there's a colon with no digits after, 53913498266Sopenharmony_ci just cut off the name there which makes us ignore the colon and just 54013498266Sopenharmony_ci use the default port. Firefox, Chrome and Safari all do that. 54113498266Sopenharmony_ci 54213498266Sopenharmony_ci Don't do it if the URL has no scheme, to make something that looks like 54313498266Sopenharmony_ci a scheme not work! 54413498266Sopenharmony_ci */ 54513498266Sopenharmony_ci Curl_dyn_setlen(host, keep); 54613498266Sopenharmony_ci portptr++; 54713498266Sopenharmony_ci if(!*portptr) 54813498266Sopenharmony_ci return has_scheme ? CURLUE_OK : CURLUE_BAD_PORT_NUMBER; 54913498266Sopenharmony_ci 55013498266Sopenharmony_ci if(!ISDIGIT(*portptr)) 55113498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 55213498266Sopenharmony_ci 55313498266Sopenharmony_ci port = strtol(portptr, &rest, 10); /* Port number must be decimal */ 55413498266Sopenharmony_ci 55513498266Sopenharmony_ci if(port > 0xffff) 55613498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 55713498266Sopenharmony_ci 55813498266Sopenharmony_ci if(rest[0]) 55913498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 56013498266Sopenharmony_ci 56113498266Sopenharmony_ci u->portnum = port; 56213498266Sopenharmony_ci /* generate a new port number string to get rid of leading zeroes etc */ 56313498266Sopenharmony_ci free(u->port); 56413498266Sopenharmony_ci u->port = aprintf("%ld", port); 56513498266Sopenharmony_ci if(!u->port) 56613498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 56713498266Sopenharmony_ci } 56813498266Sopenharmony_ci 56913498266Sopenharmony_ci return CURLUE_OK; 57013498266Sopenharmony_ci} 57113498266Sopenharmony_ci 57213498266Sopenharmony_ci/* this assumes 'hostname' now starts with [ */ 57313498266Sopenharmony_cistatic CURLUcode ipv6_parse(struct Curl_URL *u, char *hostname, 57413498266Sopenharmony_ci size_t hlen) /* length of hostname */ 57513498266Sopenharmony_ci{ 57613498266Sopenharmony_ci size_t len; 57713498266Sopenharmony_ci DEBUGASSERT(*hostname == '['); 57813498266Sopenharmony_ci if(hlen < 4) /* '[::]' is the shortest possible valid string */ 57913498266Sopenharmony_ci return CURLUE_BAD_IPV6; 58013498266Sopenharmony_ci hostname++; 58113498266Sopenharmony_ci hlen -= 2; 58213498266Sopenharmony_ci 58313498266Sopenharmony_ci /* only valid IPv6 letters are ok */ 58413498266Sopenharmony_ci len = strspn(hostname, "0123456789abcdefABCDEF:."); 58513498266Sopenharmony_ci 58613498266Sopenharmony_ci if(hlen != len) { 58713498266Sopenharmony_ci hlen = len; 58813498266Sopenharmony_ci if(hostname[len] == '%') { 58913498266Sopenharmony_ci /* this could now be '%[zone id]' */ 59013498266Sopenharmony_ci char zoneid[16]; 59113498266Sopenharmony_ci int i = 0; 59213498266Sopenharmony_ci char *h = &hostname[len + 1]; 59313498266Sopenharmony_ci /* pass '25' if present and is a url encoded percent sign */ 59413498266Sopenharmony_ci if(!strncmp(h, "25", 2) && h[2] && (h[2] != ']')) 59513498266Sopenharmony_ci h += 2; 59613498266Sopenharmony_ci while(*h && (*h != ']') && (i < 15)) 59713498266Sopenharmony_ci zoneid[i++] = *h++; 59813498266Sopenharmony_ci if(!i || (']' != *h)) 59913498266Sopenharmony_ci return CURLUE_BAD_IPV6; 60013498266Sopenharmony_ci zoneid[i] = 0; 60113498266Sopenharmony_ci u->zoneid = strdup(zoneid); 60213498266Sopenharmony_ci if(!u->zoneid) 60313498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 60413498266Sopenharmony_ci hostname[len] = ']'; /* insert end bracket */ 60513498266Sopenharmony_ci hostname[len + 1] = 0; /* terminate the hostname */ 60613498266Sopenharmony_ci } 60713498266Sopenharmony_ci else 60813498266Sopenharmony_ci return CURLUE_BAD_IPV6; 60913498266Sopenharmony_ci /* hostname is fine */ 61013498266Sopenharmony_ci } 61113498266Sopenharmony_ci 61213498266Sopenharmony_ci /* Check the IPv6 address. */ 61313498266Sopenharmony_ci { 61413498266Sopenharmony_ci char dest[16]; /* fits a binary IPv6 address */ 61513498266Sopenharmony_ci char norm[MAX_IPADR_LEN]; 61613498266Sopenharmony_ci hostname[hlen] = 0; /* end the address there */ 61713498266Sopenharmony_ci if(1 != Curl_inet_pton(AF_INET6, hostname, dest)) 61813498266Sopenharmony_ci return CURLUE_BAD_IPV6; 61913498266Sopenharmony_ci 62013498266Sopenharmony_ci /* check if it can be done shorter */ 62113498266Sopenharmony_ci if(Curl_inet_ntop(AF_INET6, dest, norm, sizeof(norm)) && 62213498266Sopenharmony_ci (strlen(norm) < hlen)) { 62313498266Sopenharmony_ci strcpy(hostname, norm); 62413498266Sopenharmony_ci hlen = strlen(norm); 62513498266Sopenharmony_ci hostname[hlen + 1] = 0; 62613498266Sopenharmony_ci } 62713498266Sopenharmony_ci hostname[hlen] = ']'; /* restore ending bracket */ 62813498266Sopenharmony_ci } 62913498266Sopenharmony_ci return CURLUE_OK; 63013498266Sopenharmony_ci} 63113498266Sopenharmony_ci 63213498266Sopenharmony_cistatic CURLUcode hostname_check(struct Curl_URL *u, char *hostname, 63313498266Sopenharmony_ci size_t hlen) /* length of hostname */ 63413498266Sopenharmony_ci{ 63513498266Sopenharmony_ci size_t len; 63613498266Sopenharmony_ci DEBUGASSERT(hostname); 63713498266Sopenharmony_ci 63813498266Sopenharmony_ci if(!hlen) 63913498266Sopenharmony_ci return CURLUE_NO_HOST; 64013498266Sopenharmony_ci else if(hostname[0] == '[') 64113498266Sopenharmony_ci return ipv6_parse(u, hostname, hlen); 64213498266Sopenharmony_ci else { 64313498266Sopenharmony_ci /* letters from the second string are not ok */ 64413498266Sopenharmony_ci len = strcspn(hostname, " \r\n\t/:#?!@{}[]\\$\'\"^`*<>=;,+&()%"); 64513498266Sopenharmony_ci if(hlen != len) 64613498266Sopenharmony_ci /* hostname with bad content */ 64713498266Sopenharmony_ci return CURLUE_BAD_HOSTNAME; 64813498266Sopenharmony_ci } 64913498266Sopenharmony_ci return CURLUE_OK; 65013498266Sopenharmony_ci} 65113498266Sopenharmony_ci 65213498266Sopenharmony_ci/* 65313498266Sopenharmony_ci * Handle partial IPv4 numerical addresses and different bases, like 65413498266Sopenharmony_ci * '16843009', '0x7f', '0x7f.1' '0177.1.1.1' etc. 65513498266Sopenharmony_ci * 65613498266Sopenharmony_ci * If the given input string is syntactically wrong IPv4 or any part for 65713498266Sopenharmony_ci * example is too big, this function returns HOST_NAME. 65813498266Sopenharmony_ci * 65913498266Sopenharmony_ci * Output the "normalized" version of that input string in plain quad decimal 66013498266Sopenharmony_ci * integers. 66113498266Sopenharmony_ci * 66213498266Sopenharmony_ci * Returns the host type. 66313498266Sopenharmony_ci */ 66413498266Sopenharmony_ci 66513498266Sopenharmony_ci#define HOST_ERROR -1 /* out of memory */ 66613498266Sopenharmony_ci#define HOST_BAD -2 /* bad IPv4 address */ 66713498266Sopenharmony_ci 66813498266Sopenharmony_ci#define HOST_NAME 1 66913498266Sopenharmony_ci#define HOST_IPV4 2 67013498266Sopenharmony_ci#define HOST_IPV6 3 67113498266Sopenharmony_ci 67213498266Sopenharmony_cistatic int ipv4_normalize(struct dynbuf *host) 67313498266Sopenharmony_ci{ 67413498266Sopenharmony_ci bool done = FALSE; 67513498266Sopenharmony_ci int n = 0; 67613498266Sopenharmony_ci const char *c = Curl_dyn_ptr(host); 67713498266Sopenharmony_ci unsigned long parts[4] = {0, 0, 0, 0}; 67813498266Sopenharmony_ci CURLcode result = CURLE_OK; 67913498266Sopenharmony_ci 68013498266Sopenharmony_ci if(*c == '[') 68113498266Sopenharmony_ci return HOST_IPV6; 68213498266Sopenharmony_ci 68313498266Sopenharmony_ci while(!done) { 68413498266Sopenharmony_ci char *endp; 68513498266Sopenharmony_ci unsigned long l; 68613498266Sopenharmony_ci if(!ISDIGIT(*c)) 68713498266Sopenharmony_ci /* most importantly this doesn't allow a leading plus or minus */ 68813498266Sopenharmony_ci return HOST_NAME; 68913498266Sopenharmony_ci l = strtoul(c, &endp, 0); 69013498266Sopenharmony_ci 69113498266Sopenharmony_ci parts[n] = l; 69213498266Sopenharmony_ci c = endp; 69313498266Sopenharmony_ci 69413498266Sopenharmony_ci switch(*c) { 69513498266Sopenharmony_ci case '.': 69613498266Sopenharmony_ci if(n == 3) 69713498266Sopenharmony_ci return HOST_NAME; 69813498266Sopenharmony_ci n++; 69913498266Sopenharmony_ci c++; 70013498266Sopenharmony_ci break; 70113498266Sopenharmony_ci 70213498266Sopenharmony_ci case '\0': 70313498266Sopenharmony_ci done = TRUE; 70413498266Sopenharmony_ci break; 70513498266Sopenharmony_ci 70613498266Sopenharmony_ci default: 70713498266Sopenharmony_ci return HOST_NAME; 70813498266Sopenharmony_ci } 70913498266Sopenharmony_ci 71013498266Sopenharmony_ci /* overflow */ 71113498266Sopenharmony_ci if((l == ULONG_MAX) && (errno == ERANGE)) 71213498266Sopenharmony_ci return HOST_NAME; 71313498266Sopenharmony_ci 71413498266Sopenharmony_ci#if SIZEOF_LONG > 4 71513498266Sopenharmony_ci /* a value larger than 32 bits */ 71613498266Sopenharmony_ci if(l > UINT_MAX) 71713498266Sopenharmony_ci return HOST_NAME; 71813498266Sopenharmony_ci#endif 71913498266Sopenharmony_ci } 72013498266Sopenharmony_ci 72113498266Sopenharmony_ci switch(n) { 72213498266Sopenharmony_ci case 0: /* a -- 32 bits */ 72313498266Sopenharmony_ci Curl_dyn_reset(host); 72413498266Sopenharmony_ci 72513498266Sopenharmony_ci result = Curl_dyn_addf(host, "%u.%u.%u.%u", 72613498266Sopenharmony_ci (unsigned int)(parts[0] >> 24), 72713498266Sopenharmony_ci (unsigned int)((parts[0] >> 16) & 0xff), 72813498266Sopenharmony_ci (unsigned int)((parts[0] >> 8) & 0xff), 72913498266Sopenharmony_ci (unsigned int)(parts[0] & 0xff)); 73013498266Sopenharmony_ci break; 73113498266Sopenharmony_ci case 1: /* a.b -- 8.24 bits */ 73213498266Sopenharmony_ci if((parts[0] > 0xff) || (parts[1] > 0xffffff)) 73313498266Sopenharmony_ci return HOST_NAME; 73413498266Sopenharmony_ci Curl_dyn_reset(host); 73513498266Sopenharmony_ci result = Curl_dyn_addf(host, "%u.%u.%u.%u", 73613498266Sopenharmony_ci (unsigned int)(parts[0]), 73713498266Sopenharmony_ci (unsigned int)((parts[1] >> 16) & 0xff), 73813498266Sopenharmony_ci (unsigned int)((parts[1] >> 8) & 0xff), 73913498266Sopenharmony_ci (unsigned int)(parts[1] & 0xff)); 74013498266Sopenharmony_ci break; 74113498266Sopenharmony_ci case 2: /* a.b.c -- 8.8.16 bits */ 74213498266Sopenharmony_ci if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xffff)) 74313498266Sopenharmony_ci return HOST_NAME; 74413498266Sopenharmony_ci Curl_dyn_reset(host); 74513498266Sopenharmony_ci result = Curl_dyn_addf(host, "%u.%u.%u.%u", 74613498266Sopenharmony_ci (unsigned int)(parts[0]), 74713498266Sopenharmony_ci (unsigned int)(parts[1]), 74813498266Sopenharmony_ci (unsigned int)((parts[2] >> 8) & 0xff), 74913498266Sopenharmony_ci (unsigned int)(parts[2] & 0xff)); 75013498266Sopenharmony_ci break; 75113498266Sopenharmony_ci case 3: /* a.b.c.d -- 8.8.8.8 bits */ 75213498266Sopenharmony_ci if((parts[0] > 0xff) || (parts[1] > 0xff) || (parts[2] > 0xff) || 75313498266Sopenharmony_ci (parts[3] > 0xff)) 75413498266Sopenharmony_ci return HOST_NAME; 75513498266Sopenharmony_ci Curl_dyn_reset(host); 75613498266Sopenharmony_ci result = Curl_dyn_addf(host, "%u.%u.%u.%u", 75713498266Sopenharmony_ci (unsigned int)(parts[0]), 75813498266Sopenharmony_ci (unsigned int)(parts[1]), 75913498266Sopenharmony_ci (unsigned int)(parts[2]), 76013498266Sopenharmony_ci (unsigned int)(parts[3])); 76113498266Sopenharmony_ci break; 76213498266Sopenharmony_ci } 76313498266Sopenharmony_ci if(result) 76413498266Sopenharmony_ci return HOST_ERROR; 76513498266Sopenharmony_ci return HOST_IPV4; 76613498266Sopenharmony_ci} 76713498266Sopenharmony_ci 76813498266Sopenharmony_ci/* if necessary, replace the host content with a URL decoded version */ 76913498266Sopenharmony_cistatic CURLUcode urldecode_host(struct dynbuf *host) 77013498266Sopenharmony_ci{ 77113498266Sopenharmony_ci char *per = NULL; 77213498266Sopenharmony_ci const char *hostname = Curl_dyn_ptr(host); 77313498266Sopenharmony_ci per = strchr(hostname, '%'); 77413498266Sopenharmony_ci if(!per) 77513498266Sopenharmony_ci /* nothing to decode */ 77613498266Sopenharmony_ci return CURLUE_OK; 77713498266Sopenharmony_ci else { 77813498266Sopenharmony_ci /* encoded */ 77913498266Sopenharmony_ci size_t dlen; 78013498266Sopenharmony_ci char *decoded; 78113498266Sopenharmony_ci CURLcode result = Curl_urldecode(hostname, 0, &decoded, &dlen, 78213498266Sopenharmony_ci REJECT_CTRL); 78313498266Sopenharmony_ci if(result) 78413498266Sopenharmony_ci return CURLUE_BAD_HOSTNAME; 78513498266Sopenharmony_ci Curl_dyn_reset(host); 78613498266Sopenharmony_ci result = Curl_dyn_addn(host, decoded, dlen); 78713498266Sopenharmony_ci free(decoded); 78813498266Sopenharmony_ci if(result) 78913498266Sopenharmony_ci return cc2cu(result); 79013498266Sopenharmony_ci } 79113498266Sopenharmony_ci 79213498266Sopenharmony_ci return CURLUE_OK; 79313498266Sopenharmony_ci} 79413498266Sopenharmony_ci 79513498266Sopenharmony_cistatic CURLUcode parse_authority(struct Curl_URL *u, 79613498266Sopenharmony_ci const char *auth, size_t authlen, 79713498266Sopenharmony_ci unsigned int flags, 79813498266Sopenharmony_ci struct dynbuf *host, 79913498266Sopenharmony_ci bool has_scheme) 80013498266Sopenharmony_ci{ 80113498266Sopenharmony_ci size_t offset; 80213498266Sopenharmony_ci CURLUcode uc; 80313498266Sopenharmony_ci CURLcode result; 80413498266Sopenharmony_ci 80513498266Sopenharmony_ci /* 80613498266Sopenharmony_ci * Parse the login details and strip them out of the host name. 80713498266Sopenharmony_ci */ 80813498266Sopenharmony_ci uc = parse_hostname_login(u, auth, authlen, flags, &offset); 80913498266Sopenharmony_ci if(uc) 81013498266Sopenharmony_ci goto out; 81113498266Sopenharmony_ci 81213498266Sopenharmony_ci result = Curl_dyn_addn(host, auth + offset, authlen - offset); 81313498266Sopenharmony_ci if(result) { 81413498266Sopenharmony_ci uc = cc2cu(result); 81513498266Sopenharmony_ci goto out; 81613498266Sopenharmony_ci } 81713498266Sopenharmony_ci 81813498266Sopenharmony_ci uc = Curl_parse_port(u, host, has_scheme); 81913498266Sopenharmony_ci if(uc) 82013498266Sopenharmony_ci goto out; 82113498266Sopenharmony_ci 82213498266Sopenharmony_ci if(!Curl_dyn_len(host)) 82313498266Sopenharmony_ci return CURLUE_NO_HOST; 82413498266Sopenharmony_ci 82513498266Sopenharmony_ci switch(ipv4_normalize(host)) { 82613498266Sopenharmony_ci case HOST_IPV4: 82713498266Sopenharmony_ci break; 82813498266Sopenharmony_ci case HOST_IPV6: 82913498266Sopenharmony_ci uc = ipv6_parse(u, Curl_dyn_ptr(host), Curl_dyn_len(host)); 83013498266Sopenharmony_ci break; 83113498266Sopenharmony_ci case HOST_NAME: 83213498266Sopenharmony_ci uc = urldecode_host(host); 83313498266Sopenharmony_ci if(!uc) 83413498266Sopenharmony_ci uc = hostname_check(u, Curl_dyn_ptr(host), Curl_dyn_len(host)); 83513498266Sopenharmony_ci break; 83613498266Sopenharmony_ci case HOST_ERROR: 83713498266Sopenharmony_ci uc = CURLUE_OUT_OF_MEMORY; 83813498266Sopenharmony_ci break; 83913498266Sopenharmony_ci case HOST_BAD: 84013498266Sopenharmony_ci default: 84113498266Sopenharmony_ci uc = CURLUE_BAD_HOSTNAME; /* Bad IPv4 address even */ 84213498266Sopenharmony_ci break; 84313498266Sopenharmony_ci } 84413498266Sopenharmony_ci 84513498266Sopenharmony_ciout: 84613498266Sopenharmony_ci return uc; 84713498266Sopenharmony_ci} 84813498266Sopenharmony_ci 84913498266Sopenharmony_ciCURLUcode Curl_url_set_authority(CURLU *u, const char *authority, 85013498266Sopenharmony_ci unsigned int flags) 85113498266Sopenharmony_ci{ 85213498266Sopenharmony_ci CURLUcode result; 85313498266Sopenharmony_ci struct dynbuf host; 85413498266Sopenharmony_ci 85513498266Sopenharmony_ci DEBUGASSERT(authority); 85613498266Sopenharmony_ci Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH); 85713498266Sopenharmony_ci 85813498266Sopenharmony_ci result = parse_authority(u, authority, strlen(authority), flags, 85913498266Sopenharmony_ci &host, !!u->scheme); 86013498266Sopenharmony_ci if(result) 86113498266Sopenharmony_ci Curl_dyn_free(&host); 86213498266Sopenharmony_ci else { 86313498266Sopenharmony_ci free(u->host); 86413498266Sopenharmony_ci u->host = Curl_dyn_ptr(&host); 86513498266Sopenharmony_ci } 86613498266Sopenharmony_ci return result; 86713498266Sopenharmony_ci} 86813498266Sopenharmony_ci 86913498266Sopenharmony_ci/* 87013498266Sopenharmony_ci * "Remove Dot Segments" 87113498266Sopenharmony_ci * https://datatracker.ietf.org/doc/html/rfc3986#section-5.2.4 87213498266Sopenharmony_ci */ 87313498266Sopenharmony_ci 87413498266Sopenharmony_ci/* 87513498266Sopenharmony_ci * dedotdotify() 87613498266Sopenharmony_ci * @unittest: 1395 87713498266Sopenharmony_ci * 87813498266Sopenharmony_ci * This function gets a null-terminated path with dot and dotdot sequences 87913498266Sopenharmony_ci * passed in and strips them off according to the rules in RFC 3986 section 88013498266Sopenharmony_ci * 5.2.4. 88113498266Sopenharmony_ci * 88213498266Sopenharmony_ci * The function handles a query part ('?' + stuff) appended but it expects 88313498266Sopenharmony_ci * that fragments ('#' + stuff) have already been cut off. 88413498266Sopenharmony_ci * 88513498266Sopenharmony_ci * RETURNS 88613498266Sopenharmony_ci * 88713498266Sopenharmony_ci * Zero for success and 'out' set to an allocated dedotdotified string. 88813498266Sopenharmony_ci */ 88913498266Sopenharmony_ciUNITTEST int dedotdotify(const char *input, size_t clen, char **outp); 89013498266Sopenharmony_ciUNITTEST int dedotdotify(const char *input, size_t clen, char **outp) 89113498266Sopenharmony_ci{ 89213498266Sopenharmony_ci char *outptr; 89313498266Sopenharmony_ci const char *endp = &input[clen]; 89413498266Sopenharmony_ci char *out; 89513498266Sopenharmony_ci 89613498266Sopenharmony_ci *outp = NULL; 89713498266Sopenharmony_ci /* the path always starts with a slash, and a slash has not dot */ 89813498266Sopenharmony_ci if((clen < 2) || !memchr(input, '.', clen)) 89913498266Sopenharmony_ci return 0; 90013498266Sopenharmony_ci 90113498266Sopenharmony_ci out = malloc(clen + 1); 90213498266Sopenharmony_ci if(!out) 90313498266Sopenharmony_ci return 1; /* out of memory */ 90413498266Sopenharmony_ci 90513498266Sopenharmony_ci *out = 0; /* null-terminates, for inputs like "./" */ 90613498266Sopenharmony_ci outptr = out; 90713498266Sopenharmony_ci 90813498266Sopenharmony_ci do { 90913498266Sopenharmony_ci bool dotdot = TRUE; 91013498266Sopenharmony_ci if(*input == '.') { 91113498266Sopenharmony_ci /* A. If the input buffer begins with a prefix of "../" or "./", then 91213498266Sopenharmony_ci remove that prefix from the input buffer; otherwise, */ 91313498266Sopenharmony_ci 91413498266Sopenharmony_ci if(!strncmp("./", input, 2)) { 91513498266Sopenharmony_ci input += 2; 91613498266Sopenharmony_ci clen -= 2; 91713498266Sopenharmony_ci } 91813498266Sopenharmony_ci else if(!strncmp("../", input, 3)) { 91913498266Sopenharmony_ci input += 3; 92013498266Sopenharmony_ci clen -= 3; 92113498266Sopenharmony_ci } 92213498266Sopenharmony_ci /* D. if the input buffer consists only of "." or "..", then remove 92313498266Sopenharmony_ci that from the input buffer; otherwise, */ 92413498266Sopenharmony_ci 92513498266Sopenharmony_ci else if(!strcmp(".", input) || !strcmp("..", input) || 92613498266Sopenharmony_ci !strncmp(".?", input, 2) || !strncmp("..?", input, 3)) { 92713498266Sopenharmony_ci *out = 0; 92813498266Sopenharmony_ci break; 92913498266Sopenharmony_ci } 93013498266Sopenharmony_ci else 93113498266Sopenharmony_ci dotdot = FALSE; 93213498266Sopenharmony_ci } 93313498266Sopenharmony_ci else if(*input == '/') { 93413498266Sopenharmony_ci /* B. if the input buffer begins with a prefix of "/./" or "/.", where 93513498266Sopenharmony_ci "." is a complete path segment, then replace that prefix with "/" in 93613498266Sopenharmony_ci the input buffer; otherwise, */ 93713498266Sopenharmony_ci if(!strncmp("/./", input, 3)) { 93813498266Sopenharmony_ci input += 2; 93913498266Sopenharmony_ci clen -= 2; 94013498266Sopenharmony_ci } 94113498266Sopenharmony_ci else if(!strcmp("/.", input) || !strncmp("/.?", input, 3)) { 94213498266Sopenharmony_ci *outptr++ = '/'; 94313498266Sopenharmony_ci *outptr = 0; 94413498266Sopenharmony_ci break; 94513498266Sopenharmony_ci } 94613498266Sopenharmony_ci 94713498266Sopenharmony_ci /* C. if the input buffer begins with a prefix of "/../" or "/..", 94813498266Sopenharmony_ci where ".." is a complete path segment, then replace that prefix with 94913498266Sopenharmony_ci "/" in the input buffer and remove the last segment and its 95013498266Sopenharmony_ci preceding "/" (if any) from the output buffer; otherwise, */ 95113498266Sopenharmony_ci 95213498266Sopenharmony_ci else if(!strncmp("/../", input, 4)) { 95313498266Sopenharmony_ci input += 3; 95413498266Sopenharmony_ci clen -= 3; 95513498266Sopenharmony_ci /* remove the last segment from the output buffer */ 95613498266Sopenharmony_ci while(outptr > out) { 95713498266Sopenharmony_ci outptr--; 95813498266Sopenharmony_ci if(*outptr == '/') 95913498266Sopenharmony_ci break; 96013498266Sopenharmony_ci } 96113498266Sopenharmony_ci *outptr = 0; /* null-terminate where it stops */ 96213498266Sopenharmony_ci } 96313498266Sopenharmony_ci else if(!strcmp("/..", input) || !strncmp("/..?", input, 4)) { 96413498266Sopenharmony_ci /* remove the last segment from the output buffer */ 96513498266Sopenharmony_ci while(outptr > out) { 96613498266Sopenharmony_ci outptr--; 96713498266Sopenharmony_ci if(*outptr == '/') 96813498266Sopenharmony_ci break; 96913498266Sopenharmony_ci } 97013498266Sopenharmony_ci *outptr++ = '/'; 97113498266Sopenharmony_ci *outptr = 0; /* null-terminate where it stops */ 97213498266Sopenharmony_ci break; 97313498266Sopenharmony_ci } 97413498266Sopenharmony_ci else 97513498266Sopenharmony_ci dotdot = FALSE; 97613498266Sopenharmony_ci } 97713498266Sopenharmony_ci else 97813498266Sopenharmony_ci dotdot = FALSE; 97913498266Sopenharmony_ci 98013498266Sopenharmony_ci if(!dotdot) { 98113498266Sopenharmony_ci /* E. move the first path segment in the input buffer to the end of 98213498266Sopenharmony_ci the output buffer, including the initial "/" character (if any) and 98313498266Sopenharmony_ci any subsequent characters up to, but not including, the next "/" 98413498266Sopenharmony_ci character or the end of the input buffer. */ 98513498266Sopenharmony_ci 98613498266Sopenharmony_ci do { 98713498266Sopenharmony_ci *outptr++ = *input++; 98813498266Sopenharmony_ci clen--; 98913498266Sopenharmony_ci } while(*input && (*input != '/') && (*input != '?')); 99013498266Sopenharmony_ci *outptr = 0; 99113498266Sopenharmony_ci } 99213498266Sopenharmony_ci 99313498266Sopenharmony_ci /* continue until end of path */ 99413498266Sopenharmony_ci } while(input < endp); 99513498266Sopenharmony_ci 99613498266Sopenharmony_ci *outp = out; 99713498266Sopenharmony_ci return 0; /* success */ 99813498266Sopenharmony_ci} 99913498266Sopenharmony_ci 100013498266Sopenharmony_cistatic CURLUcode parseurl(const char *url, CURLU *u, unsigned int flags) 100113498266Sopenharmony_ci{ 100213498266Sopenharmony_ci const char *path; 100313498266Sopenharmony_ci size_t pathlen; 100413498266Sopenharmony_ci char *query = NULL; 100513498266Sopenharmony_ci char *fragment = NULL; 100613498266Sopenharmony_ci char schemebuf[MAX_SCHEME_LEN + 1]; 100713498266Sopenharmony_ci size_t schemelen = 0; 100813498266Sopenharmony_ci size_t urllen; 100913498266Sopenharmony_ci CURLUcode result = CURLUE_OK; 101013498266Sopenharmony_ci size_t fraglen = 0; 101113498266Sopenharmony_ci struct dynbuf host; 101213498266Sopenharmony_ci 101313498266Sopenharmony_ci DEBUGASSERT(url); 101413498266Sopenharmony_ci 101513498266Sopenharmony_ci Curl_dyn_init(&host, CURL_MAX_INPUT_LENGTH); 101613498266Sopenharmony_ci 101713498266Sopenharmony_ci result = junkscan(url, &urllen, flags); 101813498266Sopenharmony_ci if(result) 101913498266Sopenharmony_ci goto fail; 102013498266Sopenharmony_ci 102113498266Sopenharmony_ci schemelen = Curl_is_absolute_url(url, schemebuf, sizeof(schemebuf), 102213498266Sopenharmony_ci flags & (CURLU_GUESS_SCHEME| 102313498266Sopenharmony_ci CURLU_DEFAULT_SCHEME)); 102413498266Sopenharmony_ci 102513498266Sopenharmony_ci /* handle the file: scheme */ 102613498266Sopenharmony_ci if(schemelen && !strcmp(schemebuf, "file")) { 102713498266Sopenharmony_ci bool uncpath = FALSE; 102813498266Sopenharmony_ci if(urllen <= 6) { 102913498266Sopenharmony_ci /* file:/ is not enough to actually be a complete file: URL */ 103013498266Sopenharmony_ci result = CURLUE_BAD_FILE_URL; 103113498266Sopenharmony_ci goto fail; 103213498266Sopenharmony_ci } 103313498266Sopenharmony_ci 103413498266Sopenharmony_ci /* path has been allocated large enough to hold this */ 103513498266Sopenharmony_ci path = (char *)&url[5]; 103613498266Sopenharmony_ci pathlen = urllen - 5; 103713498266Sopenharmony_ci 103813498266Sopenharmony_ci u->scheme = strdup("file"); 103913498266Sopenharmony_ci if(!u->scheme) { 104013498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 104113498266Sopenharmony_ci goto fail; 104213498266Sopenharmony_ci } 104313498266Sopenharmony_ci 104413498266Sopenharmony_ci /* Extra handling URLs with an authority component (i.e. that start with 104513498266Sopenharmony_ci * "file://") 104613498266Sopenharmony_ci * 104713498266Sopenharmony_ci * We allow omitted hostname (e.g. file:/<path>) -- valid according to 104813498266Sopenharmony_ci * RFC 8089, but not the (current) WHAT-WG URL spec. 104913498266Sopenharmony_ci */ 105013498266Sopenharmony_ci if(path[0] == '/' && path[1] == '/') { 105113498266Sopenharmony_ci /* swallow the two slashes */ 105213498266Sopenharmony_ci const char *ptr = &path[2]; 105313498266Sopenharmony_ci 105413498266Sopenharmony_ci /* 105513498266Sopenharmony_ci * According to RFC 8089, a file: URL can be reliably dereferenced if: 105613498266Sopenharmony_ci * 105713498266Sopenharmony_ci * o it has no/blank hostname, or 105813498266Sopenharmony_ci * 105913498266Sopenharmony_ci * o the hostname matches "localhost" (case-insensitively), or 106013498266Sopenharmony_ci * 106113498266Sopenharmony_ci * o the hostname is a FQDN that resolves to this machine, or 106213498266Sopenharmony_ci * 106313498266Sopenharmony_ci * o it is an UNC String transformed to an URI (Windows only, RFC 8089 106413498266Sopenharmony_ci * Appendix E.3). 106513498266Sopenharmony_ci * 106613498266Sopenharmony_ci * For brevity, we only consider URLs with empty, "localhost", or 106713498266Sopenharmony_ci * "127.0.0.1" hostnames as local, otherwise as an UNC String. 106813498266Sopenharmony_ci * 106913498266Sopenharmony_ci * Additionally, there is an exception for URLs with a Windows drive 107013498266Sopenharmony_ci * letter in the authority (which was accidentally omitted from RFC 8089 107113498266Sopenharmony_ci * Appendix E, but believe me, it was meant to be there. --MK) 107213498266Sopenharmony_ci */ 107313498266Sopenharmony_ci if(ptr[0] != '/' && !STARTS_WITH_URL_DRIVE_PREFIX(ptr)) { 107413498266Sopenharmony_ci /* the URL includes a host name, it must match "localhost" or 107513498266Sopenharmony_ci "127.0.0.1" to be valid */ 107613498266Sopenharmony_ci if(checkprefix("localhost/", ptr) || 107713498266Sopenharmony_ci checkprefix("127.0.0.1/", ptr)) { 107813498266Sopenharmony_ci ptr += 9; /* now points to the slash after the host */ 107913498266Sopenharmony_ci } 108013498266Sopenharmony_ci else { 108113498266Sopenharmony_ci#if defined(_WIN32) 108213498266Sopenharmony_ci size_t len; 108313498266Sopenharmony_ci 108413498266Sopenharmony_ci /* the host name, NetBIOS computer name, can not contain disallowed 108513498266Sopenharmony_ci chars, and the delimiting slash character must be appended to the 108613498266Sopenharmony_ci host name */ 108713498266Sopenharmony_ci path = strpbrk(ptr, "/\\:*?\"<>|"); 108813498266Sopenharmony_ci if(!path || *path != '/') { 108913498266Sopenharmony_ci result = CURLUE_BAD_FILE_URL; 109013498266Sopenharmony_ci goto fail; 109113498266Sopenharmony_ci } 109213498266Sopenharmony_ci 109313498266Sopenharmony_ci len = path - ptr; 109413498266Sopenharmony_ci if(len) { 109513498266Sopenharmony_ci CURLcode code = Curl_dyn_addn(&host, ptr, len); 109613498266Sopenharmony_ci if(code) { 109713498266Sopenharmony_ci result = cc2cu(code); 109813498266Sopenharmony_ci goto fail; 109913498266Sopenharmony_ci } 110013498266Sopenharmony_ci uncpath = TRUE; 110113498266Sopenharmony_ci } 110213498266Sopenharmony_ci 110313498266Sopenharmony_ci ptr -= 2; /* now points to the // before the host in UNC */ 110413498266Sopenharmony_ci#else 110513498266Sopenharmony_ci /* Invalid file://hostname/, expected localhost or 127.0.0.1 or 110613498266Sopenharmony_ci none */ 110713498266Sopenharmony_ci result = CURLUE_BAD_FILE_URL; 110813498266Sopenharmony_ci goto fail; 110913498266Sopenharmony_ci#endif 111013498266Sopenharmony_ci } 111113498266Sopenharmony_ci } 111213498266Sopenharmony_ci 111313498266Sopenharmony_ci path = ptr; 111413498266Sopenharmony_ci pathlen = urllen - (ptr - url); 111513498266Sopenharmony_ci } 111613498266Sopenharmony_ci 111713498266Sopenharmony_ci if(!uncpath) 111813498266Sopenharmony_ci /* no host for file: URLs by default */ 111913498266Sopenharmony_ci Curl_dyn_reset(&host); 112013498266Sopenharmony_ci 112113498266Sopenharmony_ci#if !defined(_WIN32) && !defined(MSDOS) && !defined(__CYGWIN__) 112213498266Sopenharmony_ci /* Don't allow Windows drive letters when not in Windows. 112313498266Sopenharmony_ci * This catches both "file:/c:" and "file:c:" */ 112413498266Sopenharmony_ci if(('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) || 112513498266Sopenharmony_ci STARTS_WITH_URL_DRIVE_PREFIX(path)) { 112613498266Sopenharmony_ci /* File drive letters are only accepted in MSDOS/Windows */ 112713498266Sopenharmony_ci result = CURLUE_BAD_FILE_URL; 112813498266Sopenharmony_ci goto fail; 112913498266Sopenharmony_ci } 113013498266Sopenharmony_ci#else 113113498266Sopenharmony_ci /* If the path starts with a slash and a drive letter, ditch the slash */ 113213498266Sopenharmony_ci if('/' == path[0] && STARTS_WITH_URL_DRIVE_PREFIX(&path[1])) { 113313498266Sopenharmony_ci /* This cannot be done with strcpy, as the memory chunks overlap! */ 113413498266Sopenharmony_ci path++; 113513498266Sopenharmony_ci pathlen--; 113613498266Sopenharmony_ci } 113713498266Sopenharmony_ci#endif 113813498266Sopenharmony_ci 113913498266Sopenharmony_ci } 114013498266Sopenharmony_ci else { 114113498266Sopenharmony_ci /* clear path */ 114213498266Sopenharmony_ci const char *schemep = NULL; 114313498266Sopenharmony_ci const char *hostp; 114413498266Sopenharmony_ci size_t hostlen; 114513498266Sopenharmony_ci 114613498266Sopenharmony_ci if(schemelen) { 114713498266Sopenharmony_ci int i = 0; 114813498266Sopenharmony_ci const char *p = &url[schemelen + 1]; 114913498266Sopenharmony_ci while((*p == '/') && (i < 4)) { 115013498266Sopenharmony_ci p++; 115113498266Sopenharmony_ci i++; 115213498266Sopenharmony_ci } 115313498266Sopenharmony_ci 115413498266Sopenharmony_ci schemep = schemebuf; 115513498266Sopenharmony_ci if(!Curl_get_scheme_handler(schemep) && 115613498266Sopenharmony_ci !(flags & CURLU_NON_SUPPORT_SCHEME)) { 115713498266Sopenharmony_ci result = CURLUE_UNSUPPORTED_SCHEME; 115813498266Sopenharmony_ci goto fail; 115913498266Sopenharmony_ci } 116013498266Sopenharmony_ci 116113498266Sopenharmony_ci if((i < 1) || (i > 3)) { 116213498266Sopenharmony_ci /* less than one or more than three slashes */ 116313498266Sopenharmony_ci result = CURLUE_BAD_SLASHES; 116413498266Sopenharmony_ci goto fail; 116513498266Sopenharmony_ci } 116613498266Sopenharmony_ci hostp = p; /* host name starts here */ 116713498266Sopenharmony_ci } 116813498266Sopenharmony_ci else { 116913498266Sopenharmony_ci /* no scheme! */ 117013498266Sopenharmony_ci 117113498266Sopenharmony_ci if(!(flags & (CURLU_DEFAULT_SCHEME|CURLU_GUESS_SCHEME))) { 117213498266Sopenharmony_ci result = CURLUE_BAD_SCHEME; 117313498266Sopenharmony_ci goto fail; 117413498266Sopenharmony_ci } 117513498266Sopenharmony_ci if(flags & CURLU_DEFAULT_SCHEME) 117613498266Sopenharmony_ci schemep = DEFAULT_SCHEME; 117713498266Sopenharmony_ci 117813498266Sopenharmony_ci /* 117913498266Sopenharmony_ci * The URL was badly formatted, let's try without scheme specified. 118013498266Sopenharmony_ci */ 118113498266Sopenharmony_ci hostp = url; 118213498266Sopenharmony_ci } 118313498266Sopenharmony_ci 118413498266Sopenharmony_ci if(schemep) { 118513498266Sopenharmony_ci u->scheme = strdup(schemep); 118613498266Sopenharmony_ci if(!u->scheme) { 118713498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 118813498266Sopenharmony_ci goto fail; 118913498266Sopenharmony_ci } 119013498266Sopenharmony_ci } 119113498266Sopenharmony_ci 119213498266Sopenharmony_ci /* find the end of the host name + port number */ 119313498266Sopenharmony_ci hostlen = strcspn(hostp, "/?#"); 119413498266Sopenharmony_ci path = &hostp[hostlen]; 119513498266Sopenharmony_ci 119613498266Sopenharmony_ci /* this pathlen also contains the query and the fragment */ 119713498266Sopenharmony_ci pathlen = urllen - (path - url); 119813498266Sopenharmony_ci if(hostlen) { 119913498266Sopenharmony_ci 120013498266Sopenharmony_ci result = parse_authority(u, hostp, hostlen, flags, &host, schemelen); 120113498266Sopenharmony_ci if(result) 120213498266Sopenharmony_ci goto fail; 120313498266Sopenharmony_ci 120413498266Sopenharmony_ci if((flags & CURLU_GUESS_SCHEME) && !schemep) { 120513498266Sopenharmony_ci const char *hostname = Curl_dyn_ptr(&host); 120613498266Sopenharmony_ci /* legacy curl-style guess based on host name */ 120713498266Sopenharmony_ci if(checkprefix("ftp.", hostname)) 120813498266Sopenharmony_ci schemep = "ftp"; 120913498266Sopenharmony_ci else if(checkprefix("dict.", hostname)) 121013498266Sopenharmony_ci schemep = "dict"; 121113498266Sopenharmony_ci else if(checkprefix("ldap.", hostname)) 121213498266Sopenharmony_ci schemep = "ldap"; 121313498266Sopenharmony_ci else if(checkprefix("imap.", hostname)) 121413498266Sopenharmony_ci schemep = "imap"; 121513498266Sopenharmony_ci else if(checkprefix("smtp.", hostname)) 121613498266Sopenharmony_ci schemep = "smtp"; 121713498266Sopenharmony_ci else if(checkprefix("pop3.", hostname)) 121813498266Sopenharmony_ci schemep = "pop3"; 121913498266Sopenharmony_ci else 122013498266Sopenharmony_ci schemep = "http"; 122113498266Sopenharmony_ci 122213498266Sopenharmony_ci u->scheme = strdup(schemep); 122313498266Sopenharmony_ci if(!u->scheme) { 122413498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 122513498266Sopenharmony_ci goto fail; 122613498266Sopenharmony_ci } 122713498266Sopenharmony_ci } 122813498266Sopenharmony_ci } 122913498266Sopenharmony_ci else if(flags & CURLU_NO_AUTHORITY) { 123013498266Sopenharmony_ci /* allowed to be empty. */ 123113498266Sopenharmony_ci if(Curl_dyn_add(&host, "")) { 123213498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 123313498266Sopenharmony_ci goto fail; 123413498266Sopenharmony_ci } 123513498266Sopenharmony_ci } 123613498266Sopenharmony_ci else { 123713498266Sopenharmony_ci result = CURLUE_NO_HOST; 123813498266Sopenharmony_ci goto fail; 123913498266Sopenharmony_ci } 124013498266Sopenharmony_ci } 124113498266Sopenharmony_ci 124213498266Sopenharmony_ci fragment = strchr(path, '#'); 124313498266Sopenharmony_ci if(fragment) { 124413498266Sopenharmony_ci fraglen = pathlen - (fragment - path); 124513498266Sopenharmony_ci if(fraglen > 1) { 124613498266Sopenharmony_ci /* skip the leading '#' in the copy but include the terminating null */ 124713498266Sopenharmony_ci if(flags & CURLU_URLENCODE) { 124813498266Sopenharmony_ci struct dynbuf enc; 124913498266Sopenharmony_ci Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); 125013498266Sopenharmony_ci result = urlencode_str(&enc, fragment + 1, fraglen - 1, TRUE, FALSE); 125113498266Sopenharmony_ci if(result) 125213498266Sopenharmony_ci goto fail; 125313498266Sopenharmony_ci u->fragment = Curl_dyn_ptr(&enc); 125413498266Sopenharmony_ci } 125513498266Sopenharmony_ci else { 125613498266Sopenharmony_ci u->fragment = Curl_memdup0(fragment + 1, fraglen - 1); 125713498266Sopenharmony_ci if(!u->fragment) { 125813498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 125913498266Sopenharmony_ci goto fail; 126013498266Sopenharmony_ci } 126113498266Sopenharmony_ci } 126213498266Sopenharmony_ci } 126313498266Sopenharmony_ci /* after this, pathlen still contains the query */ 126413498266Sopenharmony_ci pathlen -= fraglen; 126513498266Sopenharmony_ci } 126613498266Sopenharmony_ci 126713498266Sopenharmony_ci query = memchr(path, '?', pathlen); 126813498266Sopenharmony_ci if(query) { 126913498266Sopenharmony_ci size_t qlen = fragment ? (size_t)(fragment - query) : 127013498266Sopenharmony_ci pathlen - (query - path); 127113498266Sopenharmony_ci pathlen -= qlen; 127213498266Sopenharmony_ci if(qlen > 1) { 127313498266Sopenharmony_ci if(flags & CURLU_URLENCODE) { 127413498266Sopenharmony_ci struct dynbuf enc; 127513498266Sopenharmony_ci Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); 127613498266Sopenharmony_ci /* skip the leading question mark */ 127713498266Sopenharmony_ci result = urlencode_str(&enc, query + 1, qlen - 1, TRUE, TRUE); 127813498266Sopenharmony_ci if(result) 127913498266Sopenharmony_ci goto fail; 128013498266Sopenharmony_ci u->query = Curl_dyn_ptr(&enc); 128113498266Sopenharmony_ci } 128213498266Sopenharmony_ci else { 128313498266Sopenharmony_ci u->query = Curl_memdup0(query + 1, qlen - 1); 128413498266Sopenharmony_ci if(!u->query) { 128513498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 128613498266Sopenharmony_ci goto fail; 128713498266Sopenharmony_ci } 128813498266Sopenharmony_ci } 128913498266Sopenharmony_ci } 129013498266Sopenharmony_ci else { 129113498266Sopenharmony_ci /* single byte query */ 129213498266Sopenharmony_ci u->query = strdup(""); 129313498266Sopenharmony_ci if(!u->query) { 129413498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 129513498266Sopenharmony_ci goto fail; 129613498266Sopenharmony_ci } 129713498266Sopenharmony_ci } 129813498266Sopenharmony_ci } 129913498266Sopenharmony_ci 130013498266Sopenharmony_ci if(pathlen && (flags & CURLU_URLENCODE)) { 130113498266Sopenharmony_ci struct dynbuf enc; 130213498266Sopenharmony_ci Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); 130313498266Sopenharmony_ci result = urlencode_str(&enc, path, pathlen, TRUE, FALSE); 130413498266Sopenharmony_ci if(result) 130513498266Sopenharmony_ci goto fail; 130613498266Sopenharmony_ci pathlen = Curl_dyn_len(&enc); 130713498266Sopenharmony_ci path = u->path = Curl_dyn_ptr(&enc); 130813498266Sopenharmony_ci } 130913498266Sopenharmony_ci 131013498266Sopenharmony_ci if(pathlen <= 1) { 131113498266Sopenharmony_ci /* there is no path left or just the slash, unset */ 131213498266Sopenharmony_ci path = NULL; 131313498266Sopenharmony_ci } 131413498266Sopenharmony_ci else { 131513498266Sopenharmony_ci if(!u->path) { 131613498266Sopenharmony_ci u->path = Curl_memdup0(path, pathlen); 131713498266Sopenharmony_ci if(!u->path) { 131813498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 131913498266Sopenharmony_ci goto fail; 132013498266Sopenharmony_ci } 132113498266Sopenharmony_ci path = u->path; 132213498266Sopenharmony_ci } 132313498266Sopenharmony_ci else if(flags & CURLU_URLENCODE) 132413498266Sopenharmony_ci /* it might have encoded more than just the path so cut it */ 132513498266Sopenharmony_ci u->path[pathlen] = 0; 132613498266Sopenharmony_ci 132713498266Sopenharmony_ci if(!(flags & CURLU_PATH_AS_IS)) { 132813498266Sopenharmony_ci /* remove ../ and ./ sequences according to RFC3986 */ 132913498266Sopenharmony_ci char *dedot; 133013498266Sopenharmony_ci int err = dedotdotify((char *)path, pathlen, &dedot); 133113498266Sopenharmony_ci if(err) { 133213498266Sopenharmony_ci result = CURLUE_OUT_OF_MEMORY; 133313498266Sopenharmony_ci goto fail; 133413498266Sopenharmony_ci } 133513498266Sopenharmony_ci if(dedot) { 133613498266Sopenharmony_ci free(u->path); 133713498266Sopenharmony_ci u->path = dedot; 133813498266Sopenharmony_ci } 133913498266Sopenharmony_ci } 134013498266Sopenharmony_ci } 134113498266Sopenharmony_ci 134213498266Sopenharmony_ci u->host = Curl_dyn_ptr(&host); 134313498266Sopenharmony_ci 134413498266Sopenharmony_ci return result; 134513498266Sopenharmony_cifail: 134613498266Sopenharmony_ci Curl_dyn_free(&host); 134713498266Sopenharmony_ci free_urlhandle(u); 134813498266Sopenharmony_ci return result; 134913498266Sopenharmony_ci} 135013498266Sopenharmony_ci 135113498266Sopenharmony_ci/* 135213498266Sopenharmony_ci * Parse the URL and, if successful, replace everything in the Curl_URL struct. 135313498266Sopenharmony_ci */ 135413498266Sopenharmony_cistatic CURLUcode parseurl_and_replace(const char *url, CURLU *u, 135513498266Sopenharmony_ci unsigned int flags) 135613498266Sopenharmony_ci{ 135713498266Sopenharmony_ci CURLUcode result; 135813498266Sopenharmony_ci CURLU tmpurl; 135913498266Sopenharmony_ci memset(&tmpurl, 0, sizeof(tmpurl)); 136013498266Sopenharmony_ci result = parseurl(url, &tmpurl, flags); 136113498266Sopenharmony_ci if(!result) { 136213498266Sopenharmony_ci free_urlhandle(u); 136313498266Sopenharmony_ci *u = tmpurl; 136413498266Sopenharmony_ci } 136513498266Sopenharmony_ci return result; 136613498266Sopenharmony_ci} 136713498266Sopenharmony_ci 136813498266Sopenharmony_ci/* 136913498266Sopenharmony_ci */ 137013498266Sopenharmony_ciCURLU *curl_url(void) 137113498266Sopenharmony_ci{ 137213498266Sopenharmony_ci return calloc(1, sizeof(struct Curl_URL)); 137313498266Sopenharmony_ci} 137413498266Sopenharmony_ci 137513498266Sopenharmony_civoid curl_url_cleanup(CURLU *u) 137613498266Sopenharmony_ci{ 137713498266Sopenharmony_ci if(u) { 137813498266Sopenharmony_ci free_urlhandle(u); 137913498266Sopenharmony_ci free(u); 138013498266Sopenharmony_ci } 138113498266Sopenharmony_ci} 138213498266Sopenharmony_ci 138313498266Sopenharmony_ci#define DUP(dest, src, name) \ 138413498266Sopenharmony_ci do { \ 138513498266Sopenharmony_ci if(src->name) { \ 138613498266Sopenharmony_ci dest->name = strdup(src->name); \ 138713498266Sopenharmony_ci if(!dest->name) \ 138813498266Sopenharmony_ci goto fail; \ 138913498266Sopenharmony_ci } \ 139013498266Sopenharmony_ci } while(0) 139113498266Sopenharmony_ci 139213498266Sopenharmony_ciCURLU *curl_url_dup(const CURLU *in) 139313498266Sopenharmony_ci{ 139413498266Sopenharmony_ci struct Curl_URL *u = calloc(1, sizeof(struct Curl_URL)); 139513498266Sopenharmony_ci if(u) { 139613498266Sopenharmony_ci DUP(u, in, scheme); 139713498266Sopenharmony_ci DUP(u, in, user); 139813498266Sopenharmony_ci DUP(u, in, password); 139913498266Sopenharmony_ci DUP(u, in, options); 140013498266Sopenharmony_ci DUP(u, in, host); 140113498266Sopenharmony_ci DUP(u, in, port); 140213498266Sopenharmony_ci DUP(u, in, path); 140313498266Sopenharmony_ci DUP(u, in, query); 140413498266Sopenharmony_ci DUP(u, in, fragment); 140513498266Sopenharmony_ci DUP(u, in, zoneid); 140613498266Sopenharmony_ci u->portnum = in->portnum; 140713498266Sopenharmony_ci } 140813498266Sopenharmony_ci return u; 140913498266Sopenharmony_cifail: 141013498266Sopenharmony_ci curl_url_cleanup(u); 141113498266Sopenharmony_ci return NULL; 141213498266Sopenharmony_ci} 141313498266Sopenharmony_ci 141413498266Sopenharmony_ciCURLUcode curl_url_get(const CURLU *u, CURLUPart what, 141513498266Sopenharmony_ci char **part, unsigned int flags) 141613498266Sopenharmony_ci{ 141713498266Sopenharmony_ci const char *ptr; 141813498266Sopenharmony_ci CURLUcode ifmissing = CURLUE_UNKNOWN_PART; 141913498266Sopenharmony_ci char portbuf[7]; 142013498266Sopenharmony_ci bool urldecode = (flags & CURLU_URLDECODE)?1:0; 142113498266Sopenharmony_ci bool urlencode = (flags & CURLU_URLENCODE)?1:0; 142213498266Sopenharmony_ci bool punycode = FALSE; 142313498266Sopenharmony_ci bool depunyfy = FALSE; 142413498266Sopenharmony_ci bool plusdecode = FALSE; 142513498266Sopenharmony_ci (void)flags; 142613498266Sopenharmony_ci if(!u) 142713498266Sopenharmony_ci return CURLUE_BAD_HANDLE; 142813498266Sopenharmony_ci if(!part) 142913498266Sopenharmony_ci return CURLUE_BAD_PARTPOINTER; 143013498266Sopenharmony_ci *part = NULL; 143113498266Sopenharmony_ci 143213498266Sopenharmony_ci switch(what) { 143313498266Sopenharmony_ci case CURLUPART_SCHEME: 143413498266Sopenharmony_ci ptr = u->scheme; 143513498266Sopenharmony_ci ifmissing = CURLUE_NO_SCHEME; 143613498266Sopenharmony_ci urldecode = FALSE; /* never for schemes */ 143713498266Sopenharmony_ci break; 143813498266Sopenharmony_ci case CURLUPART_USER: 143913498266Sopenharmony_ci ptr = u->user; 144013498266Sopenharmony_ci ifmissing = CURLUE_NO_USER; 144113498266Sopenharmony_ci break; 144213498266Sopenharmony_ci case CURLUPART_PASSWORD: 144313498266Sopenharmony_ci ptr = u->password; 144413498266Sopenharmony_ci ifmissing = CURLUE_NO_PASSWORD; 144513498266Sopenharmony_ci break; 144613498266Sopenharmony_ci case CURLUPART_OPTIONS: 144713498266Sopenharmony_ci ptr = u->options; 144813498266Sopenharmony_ci ifmissing = CURLUE_NO_OPTIONS; 144913498266Sopenharmony_ci break; 145013498266Sopenharmony_ci case CURLUPART_HOST: 145113498266Sopenharmony_ci ptr = u->host; 145213498266Sopenharmony_ci ifmissing = CURLUE_NO_HOST; 145313498266Sopenharmony_ci punycode = (flags & CURLU_PUNYCODE)?1:0; 145413498266Sopenharmony_ci depunyfy = (flags & CURLU_PUNY2IDN)?1:0; 145513498266Sopenharmony_ci break; 145613498266Sopenharmony_ci case CURLUPART_ZONEID: 145713498266Sopenharmony_ci ptr = u->zoneid; 145813498266Sopenharmony_ci ifmissing = CURLUE_NO_ZONEID; 145913498266Sopenharmony_ci break; 146013498266Sopenharmony_ci case CURLUPART_PORT: 146113498266Sopenharmony_ci ptr = u->port; 146213498266Sopenharmony_ci ifmissing = CURLUE_NO_PORT; 146313498266Sopenharmony_ci urldecode = FALSE; /* never for port */ 146413498266Sopenharmony_ci if(!ptr && (flags & CURLU_DEFAULT_PORT) && u->scheme) { 146513498266Sopenharmony_ci /* there's no stored port number, but asked to deliver 146613498266Sopenharmony_ci a default one for the scheme */ 146713498266Sopenharmony_ci const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme); 146813498266Sopenharmony_ci if(h) { 146913498266Sopenharmony_ci msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); 147013498266Sopenharmony_ci ptr = portbuf; 147113498266Sopenharmony_ci } 147213498266Sopenharmony_ci } 147313498266Sopenharmony_ci else if(ptr && u->scheme) { 147413498266Sopenharmony_ci /* there is a stored port number, but ask to inhibit if 147513498266Sopenharmony_ci it matches the default one for the scheme */ 147613498266Sopenharmony_ci const struct Curl_handler *h = Curl_get_scheme_handler(u->scheme); 147713498266Sopenharmony_ci if(h && (h->defport == u->portnum) && 147813498266Sopenharmony_ci (flags & CURLU_NO_DEFAULT_PORT)) 147913498266Sopenharmony_ci ptr = NULL; 148013498266Sopenharmony_ci } 148113498266Sopenharmony_ci break; 148213498266Sopenharmony_ci case CURLUPART_PATH: 148313498266Sopenharmony_ci ptr = u->path; 148413498266Sopenharmony_ci if(!ptr) 148513498266Sopenharmony_ci ptr = "/"; 148613498266Sopenharmony_ci break; 148713498266Sopenharmony_ci case CURLUPART_QUERY: 148813498266Sopenharmony_ci ptr = u->query; 148913498266Sopenharmony_ci ifmissing = CURLUE_NO_QUERY; 149013498266Sopenharmony_ci plusdecode = urldecode; 149113498266Sopenharmony_ci break; 149213498266Sopenharmony_ci case CURLUPART_FRAGMENT: 149313498266Sopenharmony_ci ptr = u->fragment; 149413498266Sopenharmony_ci ifmissing = CURLUE_NO_FRAGMENT; 149513498266Sopenharmony_ci break; 149613498266Sopenharmony_ci case CURLUPART_URL: { 149713498266Sopenharmony_ci char *url; 149813498266Sopenharmony_ci char *scheme; 149913498266Sopenharmony_ci char *options = u->options; 150013498266Sopenharmony_ci char *port = u->port; 150113498266Sopenharmony_ci char *allochost = NULL; 150213498266Sopenharmony_ci punycode = (flags & CURLU_PUNYCODE)?1:0; 150313498266Sopenharmony_ci depunyfy = (flags & CURLU_PUNY2IDN)?1:0; 150413498266Sopenharmony_ci if(u->scheme && strcasecompare("file", u->scheme)) { 150513498266Sopenharmony_ci url = aprintf("file://%s%s%s", 150613498266Sopenharmony_ci u->path, 150713498266Sopenharmony_ci u->fragment? "#": "", 150813498266Sopenharmony_ci u->fragment? u->fragment : ""); 150913498266Sopenharmony_ci } 151013498266Sopenharmony_ci else if(!u->host) 151113498266Sopenharmony_ci return CURLUE_NO_HOST; 151213498266Sopenharmony_ci else { 151313498266Sopenharmony_ci const struct Curl_handler *h = NULL; 151413498266Sopenharmony_ci if(u->scheme) 151513498266Sopenharmony_ci scheme = u->scheme; 151613498266Sopenharmony_ci else if(flags & CURLU_DEFAULT_SCHEME) 151713498266Sopenharmony_ci scheme = (char *) DEFAULT_SCHEME; 151813498266Sopenharmony_ci else 151913498266Sopenharmony_ci return CURLUE_NO_SCHEME; 152013498266Sopenharmony_ci 152113498266Sopenharmony_ci h = Curl_get_scheme_handler(scheme); 152213498266Sopenharmony_ci if(!port && (flags & CURLU_DEFAULT_PORT)) { 152313498266Sopenharmony_ci /* there's no stored port number, but asked to deliver 152413498266Sopenharmony_ci a default one for the scheme */ 152513498266Sopenharmony_ci if(h) { 152613498266Sopenharmony_ci msnprintf(portbuf, sizeof(portbuf), "%u", h->defport); 152713498266Sopenharmony_ci port = portbuf; 152813498266Sopenharmony_ci } 152913498266Sopenharmony_ci } 153013498266Sopenharmony_ci else if(port) { 153113498266Sopenharmony_ci /* there is a stored port number, but asked to inhibit if it matches 153213498266Sopenharmony_ci the default one for the scheme */ 153313498266Sopenharmony_ci if(h && (h->defport == u->portnum) && 153413498266Sopenharmony_ci (flags & CURLU_NO_DEFAULT_PORT)) 153513498266Sopenharmony_ci port = NULL; 153613498266Sopenharmony_ci } 153713498266Sopenharmony_ci 153813498266Sopenharmony_ci if(h && !(h->flags & PROTOPT_URLOPTIONS)) 153913498266Sopenharmony_ci options = NULL; 154013498266Sopenharmony_ci 154113498266Sopenharmony_ci if(u->host[0] == '[') { 154213498266Sopenharmony_ci if(u->zoneid) { 154313498266Sopenharmony_ci /* make it '[ host %25 zoneid ]' */ 154413498266Sopenharmony_ci struct dynbuf enc; 154513498266Sopenharmony_ci size_t hostlen = strlen(u->host); 154613498266Sopenharmony_ci Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); 154713498266Sopenharmony_ci if(Curl_dyn_addf(&enc, "%.*s%%25%s]", (int)hostlen - 1, u->host, 154813498266Sopenharmony_ci u->zoneid)) 154913498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 155013498266Sopenharmony_ci allochost = Curl_dyn_ptr(&enc); 155113498266Sopenharmony_ci } 155213498266Sopenharmony_ci } 155313498266Sopenharmony_ci else if(urlencode) { 155413498266Sopenharmony_ci allochost = curl_easy_escape(NULL, u->host, 0); 155513498266Sopenharmony_ci if(!allochost) 155613498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 155713498266Sopenharmony_ci } 155813498266Sopenharmony_ci else if(punycode) { 155913498266Sopenharmony_ci if(!Curl_is_ASCII_name(u->host)) { 156013498266Sopenharmony_ci#ifndef USE_IDN 156113498266Sopenharmony_ci return CURLUE_LACKS_IDN; 156213498266Sopenharmony_ci#else 156313498266Sopenharmony_ci CURLcode result = Curl_idn_decode(u->host, &allochost); 156413498266Sopenharmony_ci if(result) 156513498266Sopenharmony_ci return (result == CURLE_OUT_OF_MEMORY) ? 156613498266Sopenharmony_ci CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; 156713498266Sopenharmony_ci#endif 156813498266Sopenharmony_ci } 156913498266Sopenharmony_ci } 157013498266Sopenharmony_ci else if(depunyfy) { 157113498266Sopenharmony_ci if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) { 157213498266Sopenharmony_ci#ifndef USE_IDN 157313498266Sopenharmony_ci return CURLUE_LACKS_IDN; 157413498266Sopenharmony_ci#else 157513498266Sopenharmony_ci CURLcode result = Curl_idn_encode(u->host, &allochost); 157613498266Sopenharmony_ci if(result) 157713498266Sopenharmony_ci /* this is the most likely error */ 157813498266Sopenharmony_ci return (result == CURLE_OUT_OF_MEMORY) ? 157913498266Sopenharmony_ci CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; 158013498266Sopenharmony_ci#endif 158113498266Sopenharmony_ci } 158213498266Sopenharmony_ci } 158313498266Sopenharmony_ci 158413498266Sopenharmony_ci url = aprintf("%s://%s%s%s%s%s%s%s%s%s%s%s%s%s%s", 158513498266Sopenharmony_ci scheme, 158613498266Sopenharmony_ci u->user ? u->user : "", 158713498266Sopenharmony_ci u->password ? ":": "", 158813498266Sopenharmony_ci u->password ? u->password : "", 158913498266Sopenharmony_ci options ? ";" : "", 159013498266Sopenharmony_ci options ? options : "", 159113498266Sopenharmony_ci (u->user || u->password || options) ? "@": "", 159213498266Sopenharmony_ci allochost ? allochost : u->host, 159313498266Sopenharmony_ci port ? ":": "", 159413498266Sopenharmony_ci port ? port : "", 159513498266Sopenharmony_ci u->path ? u->path : "/", 159613498266Sopenharmony_ci (u->query && u->query[0]) ? "?": "", 159713498266Sopenharmony_ci (u->query && u->query[0]) ? u->query : "", 159813498266Sopenharmony_ci u->fragment? "#": "", 159913498266Sopenharmony_ci u->fragment? u->fragment : ""); 160013498266Sopenharmony_ci free(allochost); 160113498266Sopenharmony_ci } 160213498266Sopenharmony_ci if(!url) 160313498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 160413498266Sopenharmony_ci *part = url; 160513498266Sopenharmony_ci return CURLUE_OK; 160613498266Sopenharmony_ci } 160713498266Sopenharmony_ci default: 160813498266Sopenharmony_ci ptr = NULL; 160913498266Sopenharmony_ci break; 161013498266Sopenharmony_ci } 161113498266Sopenharmony_ci if(ptr) { 161213498266Sopenharmony_ci size_t partlen = strlen(ptr); 161313498266Sopenharmony_ci size_t i = 0; 161413498266Sopenharmony_ci *part = Curl_memdup0(ptr, partlen); 161513498266Sopenharmony_ci if(!*part) 161613498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 161713498266Sopenharmony_ci if(plusdecode) { 161813498266Sopenharmony_ci /* convert + to space */ 161913498266Sopenharmony_ci char *plus = *part; 162013498266Sopenharmony_ci for(i = 0; i < partlen; ++plus, i++) { 162113498266Sopenharmony_ci if(*plus == '+') 162213498266Sopenharmony_ci *plus = ' '; 162313498266Sopenharmony_ci } 162413498266Sopenharmony_ci } 162513498266Sopenharmony_ci if(urldecode) { 162613498266Sopenharmony_ci char *decoded; 162713498266Sopenharmony_ci size_t dlen; 162813498266Sopenharmony_ci /* this unconditional rejection of control bytes is documented 162913498266Sopenharmony_ci API behavior */ 163013498266Sopenharmony_ci CURLcode res = Curl_urldecode(*part, 0, &decoded, &dlen, REJECT_CTRL); 163113498266Sopenharmony_ci free(*part); 163213498266Sopenharmony_ci if(res) { 163313498266Sopenharmony_ci *part = NULL; 163413498266Sopenharmony_ci return CURLUE_URLDECODE; 163513498266Sopenharmony_ci } 163613498266Sopenharmony_ci *part = decoded; 163713498266Sopenharmony_ci partlen = dlen; 163813498266Sopenharmony_ci } 163913498266Sopenharmony_ci if(urlencode) { 164013498266Sopenharmony_ci struct dynbuf enc; 164113498266Sopenharmony_ci CURLUcode uc; 164213498266Sopenharmony_ci Curl_dyn_init(&enc, CURL_MAX_INPUT_LENGTH); 164313498266Sopenharmony_ci uc = urlencode_str(&enc, *part, partlen, TRUE, what == CURLUPART_QUERY); 164413498266Sopenharmony_ci if(uc) 164513498266Sopenharmony_ci return uc; 164613498266Sopenharmony_ci free(*part); 164713498266Sopenharmony_ci *part = Curl_dyn_ptr(&enc); 164813498266Sopenharmony_ci } 164913498266Sopenharmony_ci else if(punycode) { 165013498266Sopenharmony_ci if(!Curl_is_ASCII_name(u->host)) { 165113498266Sopenharmony_ci#ifndef USE_IDN 165213498266Sopenharmony_ci return CURLUE_LACKS_IDN; 165313498266Sopenharmony_ci#else 165413498266Sopenharmony_ci char *allochost; 165513498266Sopenharmony_ci CURLcode result = Curl_idn_decode(*part, &allochost); 165613498266Sopenharmony_ci if(result) 165713498266Sopenharmony_ci return (result == CURLE_OUT_OF_MEMORY) ? 165813498266Sopenharmony_ci CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; 165913498266Sopenharmony_ci free(*part); 166013498266Sopenharmony_ci *part = allochost; 166113498266Sopenharmony_ci#endif 166213498266Sopenharmony_ci } 166313498266Sopenharmony_ci } 166413498266Sopenharmony_ci else if(depunyfy) { 166513498266Sopenharmony_ci if(Curl_is_ASCII_name(u->host) && !strncmp("xn--", u->host, 4)) { 166613498266Sopenharmony_ci#ifndef USE_IDN 166713498266Sopenharmony_ci return CURLUE_LACKS_IDN; 166813498266Sopenharmony_ci#else 166913498266Sopenharmony_ci char *allochost; 167013498266Sopenharmony_ci CURLcode result = Curl_idn_encode(*part, &allochost); 167113498266Sopenharmony_ci if(result) 167213498266Sopenharmony_ci return (result == CURLE_OUT_OF_MEMORY) ? 167313498266Sopenharmony_ci CURLUE_OUT_OF_MEMORY : CURLUE_BAD_HOSTNAME; 167413498266Sopenharmony_ci free(*part); 167513498266Sopenharmony_ci *part = allochost; 167613498266Sopenharmony_ci#endif 167713498266Sopenharmony_ci } 167813498266Sopenharmony_ci } 167913498266Sopenharmony_ci 168013498266Sopenharmony_ci return CURLUE_OK; 168113498266Sopenharmony_ci } 168213498266Sopenharmony_ci else 168313498266Sopenharmony_ci return ifmissing; 168413498266Sopenharmony_ci} 168513498266Sopenharmony_ci 168613498266Sopenharmony_ciCURLUcode curl_url_set(CURLU *u, CURLUPart what, 168713498266Sopenharmony_ci const char *part, unsigned int flags) 168813498266Sopenharmony_ci{ 168913498266Sopenharmony_ci char **storep = NULL; 169013498266Sopenharmony_ci long port = 0; 169113498266Sopenharmony_ci bool urlencode = (flags & CURLU_URLENCODE)? 1 : 0; 169213498266Sopenharmony_ci bool plusencode = FALSE; 169313498266Sopenharmony_ci bool urlskipslash = FALSE; 169413498266Sopenharmony_ci bool leadingslash = FALSE; 169513498266Sopenharmony_ci bool appendquery = FALSE; 169613498266Sopenharmony_ci bool equalsencode = FALSE; 169713498266Sopenharmony_ci size_t nalloc; 169813498266Sopenharmony_ci 169913498266Sopenharmony_ci if(!u) 170013498266Sopenharmony_ci return CURLUE_BAD_HANDLE; 170113498266Sopenharmony_ci if(!part) { 170213498266Sopenharmony_ci /* setting a part to NULL clears it */ 170313498266Sopenharmony_ci switch(what) { 170413498266Sopenharmony_ci case CURLUPART_URL: 170513498266Sopenharmony_ci break; 170613498266Sopenharmony_ci case CURLUPART_SCHEME: 170713498266Sopenharmony_ci storep = &u->scheme; 170813498266Sopenharmony_ci break; 170913498266Sopenharmony_ci case CURLUPART_USER: 171013498266Sopenharmony_ci storep = &u->user; 171113498266Sopenharmony_ci break; 171213498266Sopenharmony_ci case CURLUPART_PASSWORD: 171313498266Sopenharmony_ci storep = &u->password; 171413498266Sopenharmony_ci break; 171513498266Sopenharmony_ci case CURLUPART_OPTIONS: 171613498266Sopenharmony_ci storep = &u->options; 171713498266Sopenharmony_ci break; 171813498266Sopenharmony_ci case CURLUPART_HOST: 171913498266Sopenharmony_ci storep = &u->host; 172013498266Sopenharmony_ci break; 172113498266Sopenharmony_ci case CURLUPART_ZONEID: 172213498266Sopenharmony_ci storep = &u->zoneid; 172313498266Sopenharmony_ci break; 172413498266Sopenharmony_ci case CURLUPART_PORT: 172513498266Sopenharmony_ci u->portnum = 0; 172613498266Sopenharmony_ci storep = &u->port; 172713498266Sopenharmony_ci break; 172813498266Sopenharmony_ci case CURLUPART_PATH: 172913498266Sopenharmony_ci storep = &u->path; 173013498266Sopenharmony_ci break; 173113498266Sopenharmony_ci case CURLUPART_QUERY: 173213498266Sopenharmony_ci storep = &u->query; 173313498266Sopenharmony_ci break; 173413498266Sopenharmony_ci case CURLUPART_FRAGMENT: 173513498266Sopenharmony_ci storep = &u->fragment; 173613498266Sopenharmony_ci break; 173713498266Sopenharmony_ci default: 173813498266Sopenharmony_ci return CURLUE_UNKNOWN_PART; 173913498266Sopenharmony_ci } 174013498266Sopenharmony_ci if(storep && *storep) { 174113498266Sopenharmony_ci Curl_safefree(*storep); 174213498266Sopenharmony_ci } 174313498266Sopenharmony_ci else if(!storep) { 174413498266Sopenharmony_ci free_urlhandle(u); 174513498266Sopenharmony_ci memset(u, 0, sizeof(struct Curl_URL)); 174613498266Sopenharmony_ci } 174713498266Sopenharmony_ci return CURLUE_OK; 174813498266Sopenharmony_ci } 174913498266Sopenharmony_ci 175013498266Sopenharmony_ci nalloc = strlen(part); 175113498266Sopenharmony_ci if(nalloc > CURL_MAX_INPUT_LENGTH) 175213498266Sopenharmony_ci /* excessive input length */ 175313498266Sopenharmony_ci return CURLUE_MALFORMED_INPUT; 175413498266Sopenharmony_ci 175513498266Sopenharmony_ci switch(what) { 175613498266Sopenharmony_ci case CURLUPART_SCHEME: { 175713498266Sopenharmony_ci size_t plen = strlen(part); 175813498266Sopenharmony_ci const char *s = part; 175913498266Sopenharmony_ci if((plen > MAX_SCHEME_LEN) || (plen < 1)) 176013498266Sopenharmony_ci /* too long or too short */ 176113498266Sopenharmony_ci return CURLUE_BAD_SCHEME; 176213498266Sopenharmony_ci /* verify that it is a fine scheme */ 176313498266Sopenharmony_ci if(!(flags & CURLU_NON_SUPPORT_SCHEME) && !Curl_get_scheme_handler(part)) 176413498266Sopenharmony_ci return CURLUE_UNSUPPORTED_SCHEME; 176513498266Sopenharmony_ci storep = &u->scheme; 176613498266Sopenharmony_ci urlencode = FALSE; /* never */ 176713498266Sopenharmony_ci if(ISALPHA(*s)) { 176813498266Sopenharmony_ci /* ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) */ 176913498266Sopenharmony_ci while(--plen) { 177013498266Sopenharmony_ci if(ISALNUM(*s) || (*s == '+') || (*s == '-') || (*s == '.')) 177113498266Sopenharmony_ci s++; /* fine */ 177213498266Sopenharmony_ci else 177313498266Sopenharmony_ci return CURLUE_BAD_SCHEME; 177413498266Sopenharmony_ci } 177513498266Sopenharmony_ci } 177613498266Sopenharmony_ci else 177713498266Sopenharmony_ci return CURLUE_BAD_SCHEME; 177813498266Sopenharmony_ci break; 177913498266Sopenharmony_ci } 178013498266Sopenharmony_ci case CURLUPART_USER: 178113498266Sopenharmony_ci storep = &u->user; 178213498266Sopenharmony_ci break; 178313498266Sopenharmony_ci case CURLUPART_PASSWORD: 178413498266Sopenharmony_ci storep = &u->password; 178513498266Sopenharmony_ci break; 178613498266Sopenharmony_ci case CURLUPART_OPTIONS: 178713498266Sopenharmony_ci storep = &u->options; 178813498266Sopenharmony_ci break; 178913498266Sopenharmony_ci case CURLUPART_HOST: 179013498266Sopenharmony_ci storep = &u->host; 179113498266Sopenharmony_ci Curl_safefree(u->zoneid); 179213498266Sopenharmony_ci break; 179313498266Sopenharmony_ci case CURLUPART_ZONEID: 179413498266Sopenharmony_ci storep = &u->zoneid; 179513498266Sopenharmony_ci break; 179613498266Sopenharmony_ci case CURLUPART_PORT: 179713498266Sopenharmony_ci { 179813498266Sopenharmony_ci char *endp; 179913498266Sopenharmony_ci urlencode = FALSE; /* never */ 180013498266Sopenharmony_ci port = strtol(part, &endp, 10); /* Port number must be decimal */ 180113498266Sopenharmony_ci if((port <= 0) || (port > 0xffff)) 180213498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 180313498266Sopenharmony_ci if(*endp) 180413498266Sopenharmony_ci /* weirdly provided number, not good! */ 180513498266Sopenharmony_ci return CURLUE_BAD_PORT_NUMBER; 180613498266Sopenharmony_ci storep = &u->port; 180713498266Sopenharmony_ci } 180813498266Sopenharmony_ci break; 180913498266Sopenharmony_ci case CURLUPART_PATH: 181013498266Sopenharmony_ci urlskipslash = TRUE; 181113498266Sopenharmony_ci leadingslash = TRUE; /* enforce */ 181213498266Sopenharmony_ci storep = &u->path; 181313498266Sopenharmony_ci break; 181413498266Sopenharmony_ci case CURLUPART_QUERY: 181513498266Sopenharmony_ci plusencode = urlencode; 181613498266Sopenharmony_ci appendquery = (flags & CURLU_APPENDQUERY)?1:0; 181713498266Sopenharmony_ci equalsencode = appendquery; 181813498266Sopenharmony_ci storep = &u->query; 181913498266Sopenharmony_ci break; 182013498266Sopenharmony_ci case CURLUPART_FRAGMENT: 182113498266Sopenharmony_ci storep = &u->fragment; 182213498266Sopenharmony_ci break; 182313498266Sopenharmony_ci case CURLUPART_URL: { 182413498266Sopenharmony_ci /* 182513498266Sopenharmony_ci * Allow a new URL to replace the existing (if any) contents. 182613498266Sopenharmony_ci * 182713498266Sopenharmony_ci * If the existing contents is enough for a URL, allow a relative URL to 182813498266Sopenharmony_ci * replace it. 182913498266Sopenharmony_ci */ 183013498266Sopenharmony_ci CURLcode result; 183113498266Sopenharmony_ci CURLUcode uc; 183213498266Sopenharmony_ci char *oldurl; 183313498266Sopenharmony_ci char *redired_url; 183413498266Sopenharmony_ci 183513498266Sopenharmony_ci if(!nalloc) 183613498266Sopenharmony_ci /* a blank URL is not a valid URL */ 183713498266Sopenharmony_ci return CURLUE_MALFORMED_INPUT; 183813498266Sopenharmony_ci 183913498266Sopenharmony_ci /* if the new thing is absolute or the old one is not 184013498266Sopenharmony_ci * (we could not get an absolute url in 'oldurl'), 184113498266Sopenharmony_ci * then replace the existing with the new. */ 184213498266Sopenharmony_ci if(Curl_is_absolute_url(part, NULL, 0, 184313498266Sopenharmony_ci flags & (CURLU_GUESS_SCHEME| 184413498266Sopenharmony_ci CURLU_DEFAULT_SCHEME)) 184513498266Sopenharmony_ci || curl_url_get(u, CURLUPART_URL, &oldurl, flags)) { 184613498266Sopenharmony_ci return parseurl_and_replace(part, u, flags); 184713498266Sopenharmony_ci } 184813498266Sopenharmony_ci 184913498266Sopenharmony_ci /* apply the relative part to create a new URL 185013498266Sopenharmony_ci * and replace the existing one with it. */ 185113498266Sopenharmony_ci result = concat_url(oldurl, part, &redired_url); 185213498266Sopenharmony_ci free(oldurl); 185313498266Sopenharmony_ci if(result) 185413498266Sopenharmony_ci return cc2cu(result); 185513498266Sopenharmony_ci 185613498266Sopenharmony_ci uc = parseurl_and_replace(redired_url, u, flags); 185713498266Sopenharmony_ci free(redired_url); 185813498266Sopenharmony_ci return uc; 185913498266Sopenharmony_ci } 186013498266Sopenharmony_ci default: 186113498266Sopenharmony_ci return CURLUE_UNKNOWN_PART; 186213498266Sopenharmony_ci } 186313498266Sopenharmony_ci DEBUGASSERT(storep); 186413498266Sopenharmony_ci { 186513498266Sopenharmony_ci const char *newp; 186613498266Sopenharmony_ci struct dynbuf enc; 186713498266Sopenharmony_ci Curl_dyn_init(&enc, nalloc * 3 + 1 + leadingslash); 186813498266Sopenharmony_ci 186913498266Sopenharmony_ci if(leadingslash && (part[0] != '/')) { 187013498266Sopenharmony_ci CURLcode result = Curl_dyn_addn(&enc, "/", 1); 187113498266Sopenharmony_ci if(result) 187213498266Sopenharmony_ci return cc2cu(result); 187313498266Sopenharmony_ci } 187413498266Sopenharmony_ci if(urlencode) { 187513498266Sopenharmony_ci const unsigned char *i; 187613498266Sopenharmony_ci 187713498266Sopenharmony_ci for(i = (const unsigned char *)part; *i; i++) { 187813498266Sopenharmony_ci CURLcode result; 187913498266Sopenharmony_ci if((*i == ' ') && plusencode) { 188013498266Sopenharmony_ci result = Curl_dyn_addn(&enc, "+", 1); 188113498266Sopenharmony_ci if(result) 188213498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 188313498266Sopenharmony_ci } 188413498266Sopenharmony_ci else if(ISUNRESERVED(*i) || 188513498266Sopenharmony_ci ((*i == '/') && urlskipslash) || 188613498266Sopenharmony_ci ((*i == '=') && equalsencode)) { 188713498266Sopenharmony_ci if((*i == '=') && equalsencode) 188813498266Sopenharmony_ci /* only skip the first equals sign */ 188913498266Sopenharmony_ci equalsencode = FALSE; 189013498266Sopenharmony_ci result = Curl_dyn_addn(&enc, i, 1); 189113498266Sopenharmony_ci if(result) 189213498266Sopenharmony_ci return cc2cu(result); 189313498266Sopenharmony_ci } 189413498266Sopenharmony_ci else { 189513498266Sopenharmony_ci char out[3]={'%'}; 189613498266Sopenharmony_ci out[1] = hexdigits[*i>>4]; 189713498266Sopenharmony_ci out[2] = hexdigits[*i & 0xf]; 189813498266Sopenharmony_ci result = Curl_dyn_addn(&enc, out, 3); 189913498266Sopenharmony_ci if(result) 190013498266Sopenharmony_ci return cc2cu(result); 190113498266Sopenharmony_ci } 190213498266Sopenharmony_ci } 190313498266Sopenharmony_ci } 190413498266Sopenharmony_ci else { 190513498266Sopenharmony_ci char *p; 190613498266Sopenharmony_ci CURLcode result = Curl_dyn_add(&enc, part); 190713498266Sopenharmony_ci if(result) 190813498266Sopenharmony_ci return cc2cu(result); 190913498266Sopenharmony_ci p = Curl_dyn_ptr(&enc); 191013498266Sopenharmony_ci while(*p) { 191113498266Sopenharmony_ci /* make sure percent encoded are lower case */ 191213498266Sopenharmony_ci if((*p == '%') && ISXDIGIT(p[1]) && ISXDIGIT(p[2]) && 191313498266Sopenharmony_ci (ISUPPER(p[1]) || ISUPPER(p[2]))) { 191413498266Sopenharmony_ci p[1] = Curl_raw_tolower(p[1]); 191513498266Sopenharmony_ci p[2] = Curl_raw_tolower(p[2]); 191613498266Sopenharmony_ci p += 3; 191713498266Sopenharmony_ci } 191813498266Sopenharmony_ci else 191913498266Sopenharmony_ci p++; 192013498266Sopenharmony_ci } 192113498266Sopenharmony_ci } 192213498266Sopenharmony_ci newp = Curl_dyn_ptr(&enc); 192313498266Sopenharmony_ci 192413498266Sopenharmony_ci if(appendquery && newp) { 192513498266Sopenharmony_ci /* Append the 'newp' string onto the old query. Add a '&' separator if 192613498266Sopenharmony_ci none is present at the end of the existing query already */ 192713498266Sopenharmony_ci 192813498266Sopenharmony_ci size_t querylen = u->query ? strlen(u->query) : 0; 192913498266Sopenharmony_ci bool addamperand = querylen && (u->query[querylen -1] != '&'); 193013498266Sopenharmony_ci if(querylen) { 193113498266Sopenharmony_ci struct dynbuf qbuf; 193213498266Sopenharmony_ci Curl_dyn_init(&qbuf, CURL_MAX_INPUT_LENGTH); 193313498266Sopenharmony_ci 193413498266Sopenharmony_ci if(Curl_dyn_addn(&qbuf, u->query, querylen)) /* add original query */ 193513498266Sopenharmony_ci goto nomem; 193613498266Sopenharmony_ci 193713498266Sopenharmony_ci if(addamperand) { 193813498266Sopenharmony_ci if(Curl_dyn_addn(&qbuf, "&", 1)) 193913498266Sopenharmony_ci goto nomem; 194013498266Sopenharmony_ci } 194113498266Sopenharmony_ci if(Curl_dyn_add(&qbuf, newp)) 194213498266Sopenharmony_ci goto nomem; 194313498266Sopenharmony_ci Curl_dyn_free(&enc); 194413498266Sopenharmony_ci free(*storep); 194513498266Sopenharmony_ci *storep = Curl_dyn_ptr(&qbuf); 194613498266Sopenharmony_ci return CURLUE_OK; 194713498266Sopenharmony_cinomem: 194813498266Sopenharmony_ci Curl_dyn_free(&enc); 194913498266Sopenharmony_ci return CURLUE_OUT_OF_MEMORY; 195013498266Sopenharmony_ci } 195113498266Sopenharmony_ci } 195213498266Sopenharmony_ci 195313498266Sopenharmony_ci else if(what == CURLUPART_HOST) { 195413498266Sopenharmony_ci size_t n = Curl_dyn_len(&enc); 195513498266Sopenharmony_ci if(!n && (flags & CURLU_NO_AUTHORITY)) { 195613498266Sopenharmony_ci /* Skip hostname check, it's allowed to be empty. */ 195713498266Sopenharmony_ci } 195813498266Sopenharmony_ci else { 195913498266Sopenharmony_ci if(!n || hostname_check(u, (char *)newp, n)) { 196013498266Sopenharmony_ci Curl_dyn_free(&enc); 196113498266Sopenharmony_ci return CURLUE_BAD_HOSTNAME; 196213498266Sopenharmony_ci } 196313498266Sopenharmony_ci } 196413498266Sopenharmony_ci } 196513498266Sopenharmony_ci 196613498266Sopenharmony_ci free(*storep); 196713498266Sopenharmony_ci *storep = (char *)newp; 196813498266Sopenharmony_ci } 196913498266Sopenharmony_ci /* set after the string, to make it not assigned if the allocation above 197013498266Sopenharmony_ci fails */ 197113498266Sopenharmony_ci if(port) 197213498266Sopenharmony_ci u->portnum = port; 197313498266Sopenharmony_ci return CURLUE_OK; 197413498266Sopenharmony_ci} 1975