1// Copyright Joyent, Inc. and other Node contributors.
2//
3// Permission is hereby granted, free of charge, to any person obtaining a
4// copy of this software and associated documentation files (the
5// "Software"), to deal in the Software without restriction, including
6// without limitation the rights to use, copy, modify, merge, publish,
7// distribute, sublicense, and/or sell copies of the Software, and to permit
8// persons to whom the Software is furnished to do so, subject to the
9// following conditions:
10//
11// The above copyright notice and this permission notice shall be included
12// in all copies or substantial portions of the Software.
13//
14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
15// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
17// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
18// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
19// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20// USE OR OTHER DEALINGS IN THE SOFTWARE.
21
22'use strict';
23
24const {
25  Boolean,
26  Int8Array,
27  ObjectCreate,
28  ObjectKeys,
29  StringPrototypeCharCodeAt,
30  decodeURIComponent,
31} = primordials;
32
33const { toASCII } = require('internal/idna');
34const { encodeStr, hexTable } = require('internal/querystring');
35const querystring = require('querystring');
36
37const {
38  ERR_INVALID_ARG_TYPE,
39  ERR_INVALID_URL,
40} = require('internal/errors').codes;
41const {
42  validateString,
43  validateObject,
44} = require('internal/validators');
45
46// This ensures setURLConstructor() is called before the native
47// URL::ToObject() method is used.
48const { spliceOne } = require('internal/util');
49
50// WHATWG URL implementation provided by internal/url
51const {
52  URL,
53  URLSearchParams,
54  domainToASCII,
55  domainToUnicode,
56  fileURLToPath,
57  pathToFileURL: _pathToFileURL,
58  urlToHttpOptions,
59  unsafeProtocol,
60  hostlessProtocol,
61  slashedProtocol,
62} = require('internal/url');
63
64const bindingUrl = internalBinding('url');
65
66const { getOptionValue } = require('internal/options');
67
68// Original url.parse() API
69
70function Url() {
71  this.protocol = null;
72  this.slashes = null;
73  this.auth = null;
74  this.host = null;
75  this.port = null;
76  this.hostname = null;
77  this.hash = null;
78  this.search = null;
79  this.query = null;
80  this.pathname = null;
81  this.path = null;
82  this.href = null;
83}
84
85// Reference: RFC 3986, RFC 1808, RFC 2396
86
87// define these here so at least they only have to be
88// compiled once on the first module load.
89const protocolPattern = /^[a-z0-9.+-]+:/i;
90const portPattern = /:[0-9]*$/;
91const hostPattern = /^\/\/[^@/]+@[^@/]+/;
92
93// Special case for a simple path URL
94const simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/;
95
96const hostnameMaxLen = 255;
97const {
98  CHAR_SPACE,
99  CHAR_TAB,
100  CHAR_CARRIAGE_RETURN,
101  CHAR_LINE_FEED,
102  CHAR_NO_BREAK_SPACE,
103  CHAR_ZERO_WIDTH_NOBREAK_SPACE,
104  CHAR_HASH,
105  CHAR_FORWARD_SLASH,
106  CHAR_LEFT_SQUARE_BRACKET,
107  CHAR_RIGHT_SQUARE_BRACKET,
108  CHAR_LEFT_ANGLE_BRACKET,
109  CHAR_RIGHT_ANGLE_BRACKET,
110  CHAR_LEFT_CURLY_BRACKET,
111  CHAR_RIGHT_CURLY_BRACKET,
112  CHAR_QUESTION_MARK,
113  CHAR_DOUBLE_QUOTE,
114  CHAR_SINGLE_QUOTE,
115  CHAR_PERCENT,
116  CHAR_SEMICOLON,
117  CHAR_BACKWARD_SLASH,
118  CHAR_CIRCUMFLEX_ACCENT,
119  CHAR_GRAVE_ACCENT,
120  CHAR_VERTICAL_LINE,
121  CHAR_AT,
122  CHAR_COLON,
123} = require('internal/constants');
124
125let urlParseWarned = false;
126
127function urlParse(url, parseQueryString, slashesDenoteHost) {
128  if (!urlParseWarned && getOptionValue('--pending-deprecation')) {
129    urlParseWarned = true;
130    process.emitWarning(
131      '`url.parse()` behavior is not standardized and prone to ' +
132      'errors that have security implications. Use the WHATWG URL API ' +
133      'instead. CVEs are not issued for `url.parse()` vulnerabilities.',
134      'DeprecationWarning',
135      'DEP0169',
136    );
137  }
138
139  if (url instanceof Url) return url;
140
141  const urlObject = new Url();
142  urlObject.parse(url, parseQueryString, slashesDenoteHost);
143  return urlObject;
144}
145
146function isIpv6Hostname(hostname) {
147  return (
148    StringPrototypeCharCodeAt(hostname, 0) === CHAR_LEFT_SQUARE_BRACKET &&
149    StringPrototypeCharCodeAt(hostname, hostname.length - 1) ===
150    CHAR_RIGHT_SQUARE_BRACKET
151  );
152}
153
154// This prevents some common spoofing bugs due to our use of IDNA toASCII. For
155// compatibility, the set of characters we use here is the *intersection* of
156// "forbidden host code point" in the WHATWG URL Standard [1] and the
157// characters in the host parsing loop in Url.prototype.parse, with the
158// following additions:
159//
160// - ':' since this could cause a "protocol spoofing" bug
161// - '@' since this could cause parts of the hostname to be confused with auth
162// - '[' and ']' since this could cause a non-IPv6 hostname to be interpreted
163//   as IPv6 by isIpv6Hostname above
164//
165// [1]: https://url.spec.whatwg.org/#forbidden-host-code-point
166const forbiddenHostChars = /[\0\t\n\r #%/:<>?@[\\\]^|]/;
167// For IPv6, permit '[', ']', and ':'.
168const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/;
169
170Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) {
171  validateString(url, 'url');
172
173  // Copy chrome, IE, opera backslash-handling behavior.
174  // Back slashes before the query string get converted to forward slashes
175  // See: https://code.google.com/p/chromium/issues/detail?id=25916
176  let hasHash = false;
177  let hasAt = false;
178  let start = -1;
179  let end = -1;
180  let rest = '';
181  let lastPos = 0;
182  for (let i = 0, inWs = false, split = false; i < url.length; ++i) {
183    const code = url.charCodeAt(i);
184
185    // Find first and last non-whitespace characters for trimming
186    const isWs = code < 33 ||
187                 code === CHAR_NO_BREAK_SPACE ||
188                 code === CHAR_ZERO_WIDTH_NOBREAK_SPACE;
189    if (start === -1) {
190      if (isWs)
191        continue;
192      lastPos = start = i;
193    } else if (inWs) {
194      if (!isWs) {
195        end = -1;
196        inWs = false;
197      }
198    } else if (isWs) {
199      end = i;
200      inWs = true;
201    }
202
203    // Only convert backslashes while we haven't seen a split character
204    if (!split) {
205      switch (code) {
206        case CHAR_AT:
207          hasAt = true;
208          break;
209        case CHAR_HASH:
210          hasHash = true;
211        // Fall through
212        case CHAR_QUESTION_MARK:
213          split = true;
214          break;
215        case CHAR_BACKWARD_SLASH:
216          if (i - lastPos > 0)
217            rest += url.slice(lastPos, i);
218          rest += '/';
219          lastPos = i + 1;
220          break;
221      }
222    } else if (!hasHash && code === CHAR_HASH) {
223      hasHash = true;
224    }
225  }
226
227  // Check if string was non-empty (including strings with only whitespace)
228  if (start !== -1) {
229    if (lastPos === start) {
230      // We didn't convert any backslashes
231
232      if (end === -1) {
233        if (start === 0)
234          rest = url;
235        else
236          rest = url.slice(start);
237      } else {
238        rest = url.slice(start, end);
239      }
240    } else if (end === -1 && lastPos < url.length) {
241      // We converted some backslashes and have only part of the entire string
242      rest += url.slice(lastPos);
243    } else if (end !== -1 && lastPos < end) {
244      // We converted some backslashes and have only part of the entire string
245      rest += url.slice(lastPos, end);
246    }
247  }
248
249  if (!slashesDenoteHost && !hasHash && !hasAt) {
250    // Try fast path regexp
251    const simplePath = simplePathPattern.exec(rest);
252    if (simplePath) {
253      this.path = rest;
254      this.href = rest;
255      this.pathname = simplePath[1];
256      if (simplePath[2]) {
257        this.search = simplePath[2];
258        if (parseQueryString) {
259          this.query = querystring.parse(this.search.slice(1));
260        } else {
261          this.query = this.search.slice(1);
262        }
263      } else if (parseQueryString) {
264        this.search = null;
265        this.query = ObjectCreate(null);
266      }
267      return this;
268    }
269  }
270
271  let proto = protocolPattern.exec(rest);
272  let lowerProto;
273  if (proto) {
274    proto = proto[0];
275    lowerProto = proto.toLowerCase();
276    this.protocol = lowerProto;
277    rest = rest.slice(proto.length);
278  }
279
280  // Figure out if it's got a host
281  // user@server is *always* interpreted as a hostname, and url
282  // resolution will treat //foo/bar as host=foo,path=bar because that's
283  // how the browser resolves relative URLs.
284  let slashes;
285  if (slashesDenoteHost || proto || hostPattern.test(rest)) {
286    slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH &&
287              rest.charCodeAt(1) === CHAR_FORWARD_SLASH;
288    if (slashes && !(proto && hostlessProtocol.has(lowerProto))) {
289      rest = rest.slice(2);
290      this.slashes = true;
291    }
292  }
293
294  if (!hostlessProtocol.has(lowerProto) &&
295      (slashes || (proto && !slashedProtocol.has(proto)))) {
296
297    // there's a hostname.
298    // the first instance of /, ?, ;, or # ends the host.
299    //
300    // If there is an @ in the hostname, then non-host chars *are* allowed
301    // to the left of the last @ sign, unless some host-ending character
302    // comes *before* the @-sign.
303    // URLs are obnoxious.
304    //
305    // ex:
306    // http://a@b@c/ => user:a@b host:c
307    // http://a@b?@c => user:a host:b path:/?@c
308
309    let hostEnd = -1;
310    let atSign = -1;
311    let nonHost = -1;
312    for (let i = 0; i < rest.length; ++i) {
313      switch (rest.charCodeAt(i)) {
314        case CHAR_TAB:
315        case CHAR_LINE_FEED:
316        case CHAR_CARRIAGE_RETURN:
317          // WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too.
318          rest = rest.slice(0, i) + rest.slice(i + 1);
319          i -= 1;
320          break;
321        case CHAR_SPACE:
322        case CHAR_DOUBLE_QUOTE:
323        case CHAR_PERCENT:
324        case CHAR_SINGLE_QUOTE:
325        case CHAR_SEMICOLON:
326        case CHAR_LEFT_ANGLE_BRACKET:
327        case CHAR_RIGHT_ANGLE_BRACKET:
328        case CHAR_BACKWARD_SLASH:
329        case CHAR_CIRCUMFLEX_ACCENT:
330        case CHAR_GRAVE_ACCENT:
331        case CHAR_LEFT_CURLY_BRACKET:
332        case CHAR_VERTICAL_LINE:
333        case CHAR_RIGHT_CURLY_BRACKET:
334          // Characters that are never ever allowed in a hostname from RFC 2396
335          if (nonHost === -1)
336            nonHost = i;
337          break;
338        case CHAR_HASH:
339        case CHAR_FORWARD_SLASH:
340        case CHAR_QUESTION_MARK:
341          // Find the first instance of any host-ending characters
342          if (nonHost === -1)
343            nonHost = i;
344          hostEnd = i;
345          break;
346        case CHAR_AT:
347          // At this point, either we have an explicit point where the
348          // auth portion cannot go past, or the last @ char is the decider.
349          atSign = i;
350          nonHost = -1;
351          break;
352      }
353      if (hostEnd !== -1)
354        break;
355    }
356    start = 0;
357    if (atSign !== -1) {
358      this.auth = decodeURIComponent(rest.slice(0, atSign));
359      start = atSign + 1;
360    }
361    if (nonHost === -1) {
362      this.host = rest.slice(start);
363      rest = '';
364    } else {
365      this.host = rest.slice(start, nonHost);
366      rest = rest.slice(nonHost);
367    }
368
369    // pull out port.
370    this.parseHost();
371
372    // We've indicated that there is a hostname,
373    // so even if it's empty, it has to be present.
374    if (typeof this.hostname !== 'string')
375      this.hostname = '';
376
377    const hostname = this.hostname;
378
379    // If hostname begins with [ and ends with ]
380    // assume that it's an IPv6 address.
381    const ipv6Hostname = isIpv6Hostname(hostname);
382
383    // validate a little.
384    if (!ipv6Hostname) {
385      rest = getHostname(this, rest, hostname);
386    }
387
388    if (this.hostname.length > hostnameMaxLen) {
389      this.hostname = '';
390    } else {
391      // Hostnames are always lower case.
392      this.hostname = this.hostname.toLowerCase();
393    }
394
395    if (this.hostname !== '') {
396      if (ipv6Hostname) {
397        if (forbiddenHostCharsIpv6.test(this.hostname)) {
398          throw new ERR_INVALID_URL(url);
399        }
400      } else {
401        // IDNA Support: Returns a punycoded representation of "domain".
402        // It only converts parts of the domain name that
403        // have non-ASCII characters, i.e. it doesn't matter if
404        // you call it with a domain that already is ASCII-only.
405        this.hostname = toASCII(this.hostname);
406
407        // Prevent two potential routes of hostname spoofing.
408        // 1. If this.hostname is empty, it must have become empty due to toASCII
409        //    since we checked this.hostname above.
410        // 2. If any of forbiddenHostChars appears in this.hostname, it must have
411        //    also gotten in due to toASCII. This is since getHostname would have
412        //    filtered them out otherwise.
413        // Rather than trying to correct this by moving the non-host part into
414        // the pathname as we've done in getHostname, throw an exception to
415        // convey the severity of this issue.
416        if (this.hostname === '' || forbiddenHostChars.test(this.hostname)) {
417          throw new ERR_INVALID_URL(url);
418        }
419      }
420    }
421
422    const p = this.port ? ':' + this.port : '';
423    const h = this.hostname || '';
424    this.host = h + p;
425
426    // strip [ and ] from the hostname
427    // the host field still retains them, though
428    if (ipv6Hostname) {
429      this.hostname = this.hostname.slice(1, -1);
430      if (rest[0] !== '/') {
431        rest = '/' + rest;
432      }
433    }
434  }
435
436  // Now rest is set to the post-host stuff.
437  // Chop off any delim chars.
438  if (!unsafeProtocol.has(lowerProto)) {
439    // First, make 100% sure that any "autoEscape" chars get
440    // escaped, even if encodeURIComponent doesn't think they
441    // need to be.
442    rest = autoEscapeStr(rest);
443  }
444
445  let questionIdx = -1;
446  let hashIdx = -1;
447  for (let i = 0; i < rest.length; ++i) {
448    const code = rest.charCodeAt(i);
449    if (code === CHAR_HASH) {
450      this.hash = rest.slice(i);
451      hashIdx = i;
452      break;
453    } else if (code === CHAR_QUESTION_MARK && questionIdx === -1) {
454      questionIdx = i;
455    }
456  }
457
458  if (questionIdx !== -1) {
459    if (hashIdx === -1) {
460      this.search = rest.slice(questionIdx);
461      this.query = rest.slice(questionIdx + 1);
462    } else {
463      this.search = rest.slice(questionIdx, hashIdx);
464      this.query = rest.slice(questionIdx + 1, hashIdx);
465    }
466    if (parseQueryString) {
467      this.query = querystring.parse(this.query);
468    }
469  } else if (parseQueryString) {
470    // No query string, but parseQueryString still requested
471    this.search = null;
472    this.query = ObjectCreate(null);
473  }
474
475  const useQuestionIdx =
476    questionIdx !== -1 && (hashIdx === -1 || questionIdx < hashIdx);
477  const firstIdx = useQuestionIdx ? questionIdx : hashIdx;
478  if (firstIdx === -1) {
479    if (rest.length > 0)
480      this.pathname = rest;
481  } else if (firstIdx > 0) {
482    this.pathname = rest.slice(0, firstIdx);
483  }
484  if (slashedProtocol.has(lowerProto) &&
485      this.hostname && !this.pathname) {
486    this.pathname = '/';
487  }
488
489  // To support http.request
490  if (this.pathname || this.search) {
491    const p = this.pathname || '';
492    const s = this.search || '';
493    this.path = p + s;
494  }
495
496  // Finally, reconstruct the href based on what has been validated.
497  this.href = this.format();
498  return this;
499};
500
501function getHostname(self, rest, hostname) {
502  for (let i = 0; i < hostname.length; ++i) {
503    const code = hostname.charCodeAt(i);
504    const isValid = (code !== CHAR_FORWARD_SLASH &&
505                     code !== CHAR_BACKWARD_SLASH &&
506                     code !== CHAR_HASH &&
507                     code !== CHAR_QUESTION_MARK &&
508                     code !== CHAR_COLON);
509
510    if (!isValid) {
511      self.hostname = hostname.slice(0, i);
512      return `/${hostname.slice(i)}${rest}`;
513    }
514  }
515  return rest;
516}
517
518// Escaped characters. Use empty strings to fill up unused entries.
519// Using Array is faster than Object/Map
520const escapedCodes = [
521  /* 0 - 9 */ '', '', '', '', '', '', '', '', '', '%09',
522  /* 10 - 19 */ '%0A', '', '', '%0D', '', '', '', '', '', '',
523  /* 20 - 29 */ '', '', '', '', '', '', '', '', '', '',
524  /* 30 - 39 */ '', '', '%20', '', '%22', '', '', '', '', '%27',
525  /* 40 - 49 */ '', '', '', '', '', '', '', '', '', '',
526  /* 50 - 59 */ '', '', '', '', '', '', '', '', '', '',
527  /* 60 - 69 */ '%3C', '', '%3E', '', '', '', '', '', '', '',
528  /* 70 - 79 */ '', '', '', '', '', '', '', '', '', '',
529  /* 80 - 89 */ '', '', '', '', '', '', '', '', '', '',
530  /* 90 - 99 */ '', '', '%5C', '', '%5E', '', '%60', '', '', '',
531  /* 100 - 109 */ '', '', '', '', '', '', '', '', '', '',
532  /* 110 - 119 */ '', '', '', '', '', '', '', '', '', '',
533  /* 120 - 125 */ '', '', '', '%7B', '%7C', '%7D',
534];
535
536// Automatically escape all delimiters and unwise characters from RFC 2396.
537// Also escape single quotes in case of an XSS attack.
538// Return the escaped string.
539function autoEscapeStr(rest) {
540  let escaped = '';
541  let lastEscapedPos = 0;
542  for (let i = 0; i < rest.length; ++i) {
543    // `escaped` contains substring up to the last escaped character.
544    const escapedChar = escapedCodes[rest.charCodeAt(i)];
545    if (escapedChar) {
546      // Concat if there are ordinary characters in the middle.
547      if (i > lastEscapedPos)
548        escaped += rest.slice(lastEscapedPos, i);
549      escaped += escapedChar;
550      lastEscapedPos = i + 1;
551    }
552  }
553  if (lastEscapedPos === 0)  // Nothing has been escaped.
554    return rest;
555
556  // There are ordinary characters at the end.
557  if (lastEscapedPos < rest.length)
558    escaped += rest.slice(lastEscapedPos);
559
560  return escaped;
561}
562
563// Format a parsed object into a url string
564function urlFormat(urlObject, options) {
565  // Ensure it's an object, and not a string url.
566  // If it's an object, this is a no-op.
567  // this way, you can call urlParse() on strings
568  // to clean up potentially wonky urls.
569  if (typeof urlObject === 'string') {
570    urlObject = urlParse(urlObject);
571  } else if (typeof urlObject !== 'object' || urlObject === null) {
572    throw new ERR_INVALID_ARG_TYPE('urlObject',
573                                   ['Object', 'string'], urlObject);
574  } else if (urlObject instanceof URL) {
575    let fragment = true;
576    let unicode = false;
577    let search = true;
578    let auth = true;
579
580    if (options) {
581      validateObject(options, 'options');
582
583      if (options.fragment != null) {
584        fragment = Boolean(options.fragment);
585      }
586
587      if (options.unicode != null) {
588        unicode = Boolean(options.unicode);
589      }
590
591      if (options.search != null) {
592        search = Boolean(options.search);
593      }
594
595      if (options.auth != null) {
596        auth = Boolean(options.auth);
597      }
598    }
599
600    return bindingUrl.format(urlObject.href, fragment, unicode, search, auth);
601  }
602
603  return Url.prototype.format.call(urlObject);
604}
605
606// These characters do not need escaping:
607// ! - . _ ~
608// ' ( ) * :
609// digits
610// alpha (uppercase)
611// alpha (lowercase)
612const noEscapeAuth = new Int8Array([
613  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F
614  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F
615  0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F
616  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F
617  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F
618  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F
619  0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F
620  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 0x70 - 0x7F
621]);
622
623Url.prototype.format = function format() {
624  let auth = this.auth || '';
625  if (auth) {
626    auth = encodeStr(auth, noEscapeAuth, hexTable);
627    auth += '@';
628  }
629
630  let protocol = this.protocol || '';
631  let pathname = this.pathname || '';
632  let hash = this.hash || '';
633  let host = '';
634  let query = '';
635
636  if (this.host) {
637    host = auth + this.host;
638  } else if (this.hostname) {
639    host = auth + (
640      this.hostname.includes(':') && !isIpv6Hostname(this.hostname) ?
641        '[' + this.hostname + ']' :
642        this.hostname
643    );
644    if (this.port) {
645      host += ':' + this.port;
646    }
647  }
648
649  if (this.query !== null && typeof this.query === 'object') {
650    query = querystring.stringify(this.query);
651  }
652
653  let search = this.search || (query && ('?' + query)) || '';
654
655  if (protocol && protocol.charCodeAt(protocol.length - 1) !== 58/* : */)
656    protocol += ':';
657
658  let newPathname = '';
659  let lastPos = 0;
660  for (let i = 0; i < pathname.length; ++i) {
661    switch (pathname.charCodeAt(i)) {
662      case CHAR_HASH:
663        if (i - lastPos > 0)
664          newPathname += pathname.slice(lastPos, i);
665        newPathname += '%23';
666        lastPos = i + 1;
667        break;
668      case CHAR_QUESTION_MARK:
669        if (i - lastPos > 0)
670          newPathname += pathname.slice(lastPos, i);
671        newPathname += '%3F';
672        lastPos = i + 1;
673        break;
674    }
675  }
676  if (lastPos > 0) {
677    if (lastPos !== pathname.length)
678      pathname = newPathname + pathname.slice(lastPos);
679    else
680      pathname = newPathname;
681  }
682
683  // Only the slashedProtocols get the //.  Not mailto:, xmpp:, etc.
684  // unless they had them to begin with.
685  if (this.slashes || slashedProtocol.has(protocol)) {
686    if (this.slashes || host) {
687      if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH)
688        pathname = '/' + pathname;
689      host = '//' + host;
690    } else if (protocol.length >= 4 &&
691               protocol.charCodeAt(0) === 102/* f */ &&
692               protocol.charCodeAt(1) === 105/* i */ &&
693               protocol.charCodeAt(2) === 108/* l */ &&
694               protocol.charCodeAt(3) === 101/* e */) {
695      host = '//';
696    }
697  }
698
699  search = search.replace(/#/g, '%23');
700
701  if (hash && hash.charCodeAt(0) !== CHAR_HASH)
702    hash = '#' + hash;
703  if (search && search.charCodeAt(0) !== CHAR_QUESTION_MARK)
704    search = '?' + search;
705
706  return protocol + host + pathname + search + hash;
707};
708
709function urlResolve(source, relative) {
710  return urlParse(source, false, true).resolve(relative);
711}
712
713Url.prototype.resolve = function resolve(relative) {
714  return this.resolveObject(urlParse(relative, false, true)).format();
715};
716
717function urlResolveObject(source, relative) {
718  if (!source) return relative;
719  return urlParse(source, false, true).resolveObject(relative);
720}
721
722Url.prototype.resolveObject = function resolveObject(relative) {
723  if (typeof relative === 'string') {
724    const rel = new Url();
725    rel.parse(relative, false, true);
726    relative = rel;
727  }
728
729  const result = new Url();
730  const tkeys = ObjectKeys(this);
731  for (let tk = 0; tk < tkeys.length; tk++) {
732    const tkey = tkeys[tk];
733    result[tkey] = this[tkey];
734  }
735
736  // Hash is always overridden, no matter what.
737  // even href="" will remove it.
738  result.hash = relative.hash;
739
740  // If the relative url is empty, then there's nothing left to do here.
741  if (relative.href === '') {
742    result.href = result.format();
743    return result;
744  }
745
746  // Hrefs like //foo/bar always cut to the protocol.
747  if (relative.slashes && !relative.protocol) {
748    // Take everything except the protocol from relative
749    const rkeys = ObjectKeys(relative);
750    for (let rk = 0; rk < rkeys.length; rk++) {
751      const rkey = rkeys[rk];
752      if (rkey !== 'protocol')
753        result[rkey] = relative[rkey];
754    }
755
756    // urlParse appends trailing / to urls like http://www.example.com
757    if (slashedProtocol.has(result.protocol) &&
758        result.hostname && !result.pathname) {
759      result.path = result.pathname = '/';
760    }
761
762    result.href = result.format();
763    return result;
764  }
765
766  if (relative.protocol && relative.protocol !== result.protocol) {
767    // If it's a known url protocol, then changing
768    // the protocol does weird things
769    // first, if it's not file:, then we MUST have a host,
770    // and if there was a path
771    // to begin with, then we MUST have a path.
772    // if it is file:, then the host is dropped,
773    // because that's known to be hostless.
774    // anything else is assumed to be absolute.
775    if (!slashedProtocol.has(relative.protocol)) {
776      const keys = ObjectKeys(relative);
777      for (let v = 0; v < keys.length; v++) {
778        const k = keys[v];
779        result[k] = relative[k];
780      }
781      result.href = result.format();
782      return result;
783    }
784
785    result.protocol = relative.protocol;
786    if (!relative.host &&
787        !/^file:?$/.test(relative.protocol) &&
788        !hostlessProtocol.has(relative.protocol)) {
789      const relPath = (relative.pathname || '').split('/');
790      while (relPath.length && !(relative.host = relPath.shift()));
791      if (!relative.host) relative.host = '';
792      if (!relative.hostname) relative.hostname = '';
793      if (relPath[0] !== '') relPath.unshift('');
794      if (relPath.length < 2) relPath.unshift('');
795      result.pathname = relPath.join('/');
796    } else {
797      result.pathname = relative.pathname;
798    }
799    result.search = relative.search;
800    result.query = relative.query;
801    result.host = relative.host || '';
802    result.auth = relative.auth;
803    result.hostname = relative.hostname || relative.host;
804    result.port = relative.port;
805    // To support http.request
806    if (result.pathname || result.search) {
807      const p = result.pathname || '';
808      const s = result.search || '';
809      result.path = p + s;
810    }
811    result.slashes = result.slashes || relative.slashes;
812    result.href = result.format();
813    return result;
814  }
815
816  const isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/');
817  const isRelAbs = (
818    relative.host || (relative.pathname && relative.pathname.charAt(0) === '/')
819  );
820  let mustEndAbs = (isRelAbs || isSourceAbs ||
821                    (result.host && relative.pathname));
822  const removeAllDots = mustEndAbs;
823  let srcPath = (result.pathname && result.pathname.split('/')) || [];
824  const relPath = (relative.pathname && relative.pathname.split('/')) || [];
825  const noLeadingSlashes = result.protocol &&
826      !slashedProtocol.has(result.protocol);
827
828  // If the url is a non-slashed url, then relative
829  // links like ../.. should be able
830  // to crawl up to the hostname, as well.  This is strange.
831  // result.protocol has already been set by now.
832  // Later on, put the first path part into the host field.
833  if (noLeadingSlashes) {
834    result.hostname = '';
835    result.port = null;
836    if (result.host) {
837      if (srcPath[0] === '') srcPath[0] = result.host;
838      else srcPath.unshift(result.host);
839    }
840    result.host = '';
841    if (relative.protocol) {
842      relative.hostname = null;
843      relative.port = null;
844      result.auth = null;
845      if (relative.host) {
846        if (relPath[0] === '') relPath[0] = relative.host;
847        else relPath.unshift(relative.host);
848      }
849      relative.host = null;
850    }
851    mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === '');
852  }
853
854  if (isRelAbs) {
855    // it's absolute.
856    if (relative.host || relative.host === '') {
857      if (result.host !== relative.host) result.auth = null;
858      result.host = relative.host;
859      result.port = relative.port;
860    }
861    if (relative.hostname || relative.hostname === '') {
862      if (result.hostname !== relative.hostname) result.auth = null;
863      result.hostname = relative.hostname;
864    }
865    result.search = relative.search;
866    result.query = relative.query;
867    srcPath = relPath;
868    // Fall through to the dot-handling below.
869  } else if (relPath.length) {
870    // it's relative
871    // throw away the existing file, and take the new path instead.
872    if (!srcPath) srcPath = [];
873    srcPath.pop();
874    srcPath = srcPath.concat(relPath);
875    result.search = relative.search;
876    result.query = relative.query;
877  } else if (relative.search !== null && relative.search !== undefined) {
878    // Just pull out the search.
879    // like href='?foo'.
880    // Put this after the other two cases because it simplifies the booleans
881    if (noLeadingSlashes) {
882      result.hostname = result.host = srcPath.shift();
883      // Occasionally the auth can get stuck only in host.
884      // This especially happens in cases like
885      // url.resolveObject('mailto:local1@domain1', 'local2@domain2')
886      const authInHost =
887        result.host && result.host.indexOf('@') > 0 && result.host.split('@');
888      if (authInHost) {
889        result.auth = authInHost.shift();
890        result.host = result.hostname = authInHost.shift();
891      }
892    }
893    result.search = relative.search;
894    result.query = relative.query;
895    // To support http.request
896    if (result.pathname !== null || result.search !== null) {
897      result.path = (result.pathname ? result.pathname : '') +
898                    (result.search ? result.search : '');
899    }
900    result.href = result.format();
901    return result;
902  }
903
904  if (!srcPath.length) {
905    // No path at all. All other things were already handled above.
906    result.pathname = null;
907    // To support http.request
908    if (result.search) {
909      result.path = '/' + result.search;
910    } else {
911      result.path = null;
912    }
913    result.href = result.format();
914    return result;
915  }
916
917  // If a url ENDs in . or .., then it must get a trailing slash.
918  // however, if it ends in anything else non-slashy,
919  // then it must NOT get a trailing slash.
920  let last = srcPath.slice(-1)[0];
921  const hasTrailingSlash = (
922    ((result.host || relative.host || srcPath.length > 1) &&
923    (last === '.' || last === '..')) || last === '');
924
925  // Strip single dots, resolve double dots to parent dir
926  // if the path tries to go above the root, `up` ends up > 0
927  let up = 0;
928  for (let i = srcPath.length - 1; i >= 0; i--) {
929    last = srcPath[i];
930    if (last === '.') {
931      spliceOne(srcPath, i);
932    } else if (last === '..') {
933      spliceOne(srcPath, i);
934      up++;
935    } else if (up) {
936      spliceOne(srcPath, i);
937      up--;
938    }
939  }
940
941  // If the path is allowed to go above the root, restore leading ..s
942  if (!mustEndAbs && !removeAllDots) {
943    while (up--) {
944      srcPath.unshift('..');
945    }
946  }
947
948  if (mustEndAbs && srcPath[0] !== '' &&
949      (!srcPath[0] || srcPath[0].charAt(0) !== '/')) {
950    srcPath.unshift('');
951  }
952
953  if (hasTrailingSlash && (srcPath.join('/').substr(-1) !== '/')) {
954    srcPath.push('');
955  }
956
957  const isAbsolute = srcPath[0] === '' ||
958      (srcPath[0] && srcPath[0].charAt(0) === '/');
959
960  // put the host back
961  if (noLeadingSlashes) {
962    result.hostname =
963      result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : '';
964    // Occasionally the auth can get stuck only in host.
965    // This especially happens in cases like
966    // url.resolveObject('mailto:local1@domain1', 'local2@domain2')
967    const authInHost = result.host && result.host.indexOf('@') > 0 ?
968      result.host.split('@') : false;
969    if (authInHost) {
970      result.auth = authInHost.shift();
971      result.host = result.hostname = authInHost.shift();
972    }
973  }
974
975  mustEndAbs = mustEndAbs || (result.host && srcPath.length);
976
977  if (mustEndAbs && !isAbsolute) {
978    srcPath.unshift('');
979  }
980
981  if (!srcPath.length) {
982    result.pathname = null;
983    result.path = null;
984  } else {
985    result.pathname = srcPath.join('/');
986  }
987
988  // To support request.http
989  if (result.pathname !== null || result.search !== null) {
990    result.path = (result.pathname ? result.pathname : '') +
991                  (result.search ? result.search : '');
992  }
993  result.auth = relative.auth || result.auth;
994  result.slashes = result.slashes || relative.slashes;
995  result.href = result.format();
996  return result;
997};
998
999Url.prototype.parseHost = function parseHost() {
1000  let host = this.host;
1001  let port = portPattern.exec(host);
1002  if (port) {
1003    port = port[0];
1004    if (port !== ':') {
1005      this.port = port.slice(1);
1006    }
1007    host = host.slice(0, host.length - port.length);
1008  }
1009  if (host) this.hostname = host;
1010};
1011
1012// When used internally, we are not obligated to associate TypeError with
1013// this function, so non-strings can be rejected by underlying implementation.
1014// Public API has to validate input and throw appropriate error.
1015function pathToFileURL(path) {
1016  validateString(path, 'path');
1017
1018  return _pathToFileURL(path);
1019}
1020
1021module.exports = {
1022  // Original API
1023  Url,
1024  parse: urlParse,
1025  resolve: urlResolve,
1026  resolveObject: urlResolveObject,
1027  format: urlFormat,
1028
1029  // WHATWG API
1030  URL,
1031  URLSearchParams,
1032  domainToASCII,
1033  domainToUnicode,
1034
1035  // Utilities
1036  pathToFileURL,
1037  fileURLToPath,
1038  urlToHttpOptions,
1039};
1040