1// Copyright Joyent, Inc. and other Node contributors. 2// 3// Permission is hereby granted, free of charge, to any person obtaining a 4// copy of this software and associated documentation files (the 5// "Software"), to deal in the Software without restriction, including 6// without limitation the rights to use, copy, modify, merge, publish, 7// distribute, sublicense, and/or sell copies of the Software, and to permit 8// persons to whom the Software is furnished to do so, subject to the 9// following conditions: 10// 11// The above copyright notice and this permission notice shall be included 12// in all copies or substantial portions of the Software. 13// 14// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS 15// OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 16// MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN 17// NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, 18// DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 19// OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 20// USE OR OTHER DEALINGS IN THE SOFTWARE. 21 22'use strict'; 23 24const { 25 Boolean, 26 Int8Array, 27 ObjectCreate, 28 ObjectKeys, 29 StringPrototypeCharCodeAt, 30 decodeURIComponent, 31} = primordials; 32 33const { toASCII } = require('internal/idna'); 34const { encodeStr, hexTable } = require('internal/querystring'); 35const querystring = require('querystring'); 36 37const { 38 ERR_INVALID_ARG_TYPE, 39 ERR_INVALID_URL, 40} = require('internal/errors').codes; 41const { 42 validateString, 43 validateObject, 44} = require('internal/validators'); 45 46// This ensures setURLConstructor() is called before the native 47// URL::ToObject() method is used. 48const { spliceOne } = require('internal/util'); 49 50// WHATWG URL implementation provided by internal/url 51const { 52 URL, 53 URLSearchParams, 54 domainToASCII, 55 domainToUnicode, 56 fileURLToPath, 57 pathToFileURL: _pathToFileURL, 58 urlToHttpOptions, 59 unsafeProtocol, 60 hostlessProtocol, 61 slashedProtocol, 62} = require('internal/url'); 63 64const bindingUrl = internalBinding('url'); 65 66const { getOptionValue } = require('internal/options'); 67 68// Original url.parse() API 69 70function Url() { 71 this.protocol = null; 72 this.slashes = null; 73 this.auth = null; 74 this.host = null; 75 this.port = null; 76 this.hostname = null; 77 this.hash = null; 78 this.search = null; 79 this.query = null; 80 this.pathname = null; 81 this.path = null; 82 this.href = null; 83} 84 85// Reference: RFC 3986, RFC 1808, RFC 2396 86 87// define these here so at least they only have to be 88// compiled once on the first module load. 89const protocolPattern = /^[a-z0-9.+-]+:/i; 90const portPattern = /:[0-9]*$/; 91const hostPattern = /^\/\/[^@/]+@[^@/]+/; 92 93// Special case for a simple path URL 94const simplePathPattern = /^(\/\/?(?!\/)[^?\s]*)(\?[^\s]*)?$/; 95 96const hostnameMaxLen = 255; 97const { 98 CHAR_SPACE, 99 CHAR_TAB, 100 CHAR_CARRIAGE_RETURN, 101 CHAR_LINE_FEED, 102 CHAR_NO_BREAK_SPACE, 103 CHAR_ZERO_WIDTH_NOBREAK_SPACE, 104 CHAR_HASH, 105 CHAR_FORWARD_SLASH, 106 CHAR_LEFT_SQUARE_BRACKET, 107 CHAR_RIGHT_SQUARE_BRACKET, 108 CHAR_LEFT_ANGLE_BRACKET, 109 CHAR_RIGHT_ANGLE_BRACKET, 110 CHAR_LEFT_CURLY_BRACKET, 111 CHAR_RIGHT_CURLY_BRACKET, 112 CHAR_QUESTION_MARK, 113 CHAR_DOUBLE_QUOTE, 114 CHAR_SINGLE_QUOTE, 115 CHAR_PERCENT, 116 CHAR_SEMICOLON, 117 CHAR_BACKWARD_SLASH, 118 CHAR_CIRCUMFLEX_ACCENT, 119 CHAR_GRAVE_ACCENT, 120 CHAR_VERTICAL_LINE, 121 CHAR_AT, 122 CHAR_COLON, 123} = require('internal/constants'); 124 125let urlParseWarned = false; 126 127function urlParse(url, parseQueryString, slashesDenoteHost) { 128 if (!urlParseWarned && getOptionValue('--pending-deprecation')) { 129 urlParseWarned = true; 130 process.emitWarning( 131 '`url.parse()` behavior is not standardized and prone to ' + 132 'errors that have security implications. Use the WHATWG URL API ' + 133 'instead. CVEs are not issued for `url.parse()` vulnerabilities.', 134 'DeprecationWarning', 135 'DEP0169', 136 ); 137 } 138 139 if (url instanceof Url) return url; 140 141 const urlObject = new Url(); 142 urlObject.parse(url, parseQueryString, slashesDenoteHost); 143 return urlObject; 144} 145 146function isIpv6Hostname(hostname) { 147 return ( 148 StringPrototypeCharCodeAt(hostname, 0) === CHAR_LEFT_SQUARE_BRACKET && 149 StringPrototypeCharCodeAt(hostname, hostname.length - 1) === 150 CHAR_RIGHT_SQUARE_BRACKET 151 ); 152} 153 154// This prevents some common spoofing bugs due to our use of IDNA toASCII. For 155// compatibility, the set of characters we use here is the *intersection* of 156// "forbidden host code point" in the WHATWG URL Standard [1] and the 157// characters in the host parsing loop in Url.prototype.parse, with the 158// following additions: 159// 160// - ':' since this could cause a "protocol spoofing" bug 161// - '@' since this could cause parts of the hostname to be confused with auth 162// - '[' and ']' since this could cause a non-IPv6 hostname to be interpreted 163// as IPv6 by isIpv6Hostname above 164// 165// [1]: https://url.spec.whatwg.org/#forbidden-host-code-point 166const forbiddenHostChars = /[\0\t\n\r #%/:<>?@[\\\]^|]/; 167// For IPv6, permit '[', ']', and ':'. 168const forbiddenHostCharsIpv6 = /[\0\t\n\r #%/<>?@\\^|]/; 169 170Url.prototype.parse = function parse(url, parseQueryString, slashesDenoteHost) { 171 validateString(url, 'url'); 172 173 // Copy chrome, IE, opera backslash-handling behavior. 174 // Back slashes before the query string get converted to forward slashes 175 // See: https://code.google.com/p/chromium/issues/detail?id=25916 176 let hasHash = false; 177 let hasAt = false; 178 let start = -1; 179 let end = -1; 180 let rest = ''; 181 let lastPos = 0; 182 for (let i = 0, inWs = false, split = false; i < url.length; ++i) { 183 const code = url.charCodeAt(i); 184 185 // Find first and last non-whitespace characters for trimming 186 const isWs = code < 33 || 187 code === CHAR_NO_BREAK_SPACE || 188 code === CHAR_ZERO_WIDTH_NOBREAK_SPACE; 189 if (start === -1) { 190 if (isWs) 191 continue; 192 lastPos = start = i; 193 } else if (inWs) { 194 if (!isWs) { 195 end = -1; 196 inWs = false; 197 } 198 } else if (isWs) { 199 end = i; 200 inWs = true; 201 } 202 203 // Only convert backslashes while we haven't seen a split character 204 if (!split) { 205 switch (code) { 206 case CHAR_AT: 207 hasAt = true; 208 break; 209 case CHAR_HASH: 210 hasHash = true; 211 // Fall through 212 case CHAR_QUESTION_MARK: 213 split = true; 214 break; 215 case CHAR_BACKWARD_SLASH: 216 if (i - lastPos > 0) 217 rest += url.slice(lastPos, i); 218 rest += '/'; 219 lastPos = i + 1; 220 break; 221 } 222 } else if (!hasHash && code === CHAR_HASH) { 223 hasHash = true; 224 } 225 } 226 227 // Check if string was non-empty (including strings with only whitespace) 228 if (start !== -1) { 229 if (lastPos === start) { 230 // We didn't convert any backslashes 231 232 if (end === -1) { 233 if (start === 0) 234 rest = url; 235 else 236 rest = url.slice(start); 237 } else { 238 rest = url.slice(start, end); 239 } 240 } else if (end === -1 && lastPos < url.length) { 241 // We converted some backslashes and have only part of the entire string 242 rest += url.slice(lastPos); 243 } else if (end !== -1 && lastPos < end) { 244 // We converted some backslashes and have only part of the entire string 245 rest += url.slice(lastPos, end); 246 } 247 } 248 249 if (!slashesDenoteHost && !hasHash && !hasAt) { 250 // Try fast path regexp 251 const simplePath = simplePathPattern.exec(rest); 252 if (simplePath) { 253 this.path = rest; 254 this.href = rest; 255 this.pathname = simplePath[1]; 256 if (simplePath[2]) { 257 this.search = simplePath[2]; 258 if (parseQueryString) { 259 this.query = querystring.parse(this.search.slice(1)); 260 } else { 261 this.query = this.search.slice(1); 262 } 263 } else if (parseQueryString) { 264 this.search = null; 265 this.query = ObjectCreate(null); 266 } 267 return this; 268 } 269 } 270 271 let proto = protocolPattern.exec(rest); 272 let lowerProto; 273 if (proto) { 274 proto = proto[0]; 275 lowerProto = proto.toLowerCase(); 276 this.protocol = lowerProto; 277 rest = rest.slice(proto.length); 278 } 279 280 // Figure out if it's got a host 281 // user@server is *always* interpreted as a hostname, and url 282 // resolution will treat //foo/bar as host=foo,path=bar because that's 283 // how the browser resolves relative URLs. 284 let slashes; 285 if (slashesDenoteHost || proto || hostPattern.test(rest)) { 286 slashes = rest.charCodeAt(0) === CHAR_FORWARD_SLASH && 287 rest.charCodeAt(1) === CHAR_FORWARD_SLASH; 288 if (slashes && !(proto && hostlessProtocol.has(lowerProto))) { 289 rest = rest.slice(2); 290 this.slashes = true; 291 } 292 } 293 294 if (!hostlessProtocol.has(lowerProto) && 295 (slashes || (proto && !slashedProtocol.has(proto)))) { 296 297 // there's a hostname. 298 // the first instance of /, ?, ;, or # ends the host. 299 // 300 // If there is an @ in the hostname, then non-host chars *are* allowed 301 // to the left of the last @ sign, unless some host-ending character 302 // comes *before* the @-sign. 303 // URLs are obnoxious. 304 // 305 // ex: 306 // http://a@b@c/ => user:a@b host:c 307 // http://a@b?@c => user:a host:b path:/?@c 308 309 let hostEnd = -1; 310 let atSign = -1; 311 let nonHost = -1; 312 for (let i = 0; i < rest.length; ++i) { 313 switch (rest.charCodeAt(i)) { 314 case CHAR_TAB: 315 case CHAR_LINE_FEED: 316 case CHAR_CARRIAGE_RETURN: 317 // WHATWG URL removes tabs, newlines, and carriage returns. Let's do that too. 318 rest = rest.slice(0, i) + rest.slice(i + 1); 319 i -= 1; 320 break; 321 case CHAR_SPACE: 322 case CHAR_DOUBLE_QUOTE: 323 case CHAR_PERCENT: 324 case CHAR_SINGLE_QUOTE: 325 case CHAR_SEMICOLON: 326 case CHAR_LEFT_ANGLE_BRACKET: 327 case CHAR_RIGHT_ANGLE_BRACKET: 328 case CHAR_BACKWARD_SLASH: 329 case CHAR_CIRCUMFLEX_ACCENT: 330 case CHAR_GRAVE_ACCENT: 331 case CHAR_LEFT_CURLY_BRACKET: 332 case CHAR_VERTICAL_LINE: 333 case CHAR_RIGHT_CURLY_BRACKET: 334 // Characters that are never ever allowed in a hostname from RFC 2396 335 if (nonHost === -1) 336 nonHost = i; 337 break; 338 case CHAR_HASH: 339 case CHAR_FORWARD_SLASH: 340 case CHAR_QUESTION_MARK: 341 // Find the first instance of any host-ending characters 342 if (nonHost === -1) 343 nonHost = i; 344 hostEnd = i; 345 break; 346 case CHAR_AT: 347 // At this point, either we have an explicit point where the 348 // auth portion cannot go past, or the last @ char is the decider. 349 atSign = i; 350 nonHost = -1; 351 break; 352 } 353 if (hostEnd !== -1) 354 break; 355 } 356 start = 0; 357 if (atSign !== -1) { 358 this.auth = decodeURIComponent(rest.slice(0, atSign)); 359 start = atSign + 1; 360 } 361 if (nonHost === -1) { 362 this.host = rest.slice(start); 363 rest = ''; 364 } else { 365 this.host = rest.slice(start, nonHost); 366 rest = rest.slice(nonHost); 367 } 368 369 // pull out port. 370 this.parseHost(); 371 372 // We've indicated that there is a hostname, 373 // so even if it's empty, it has to be present. 374 if (typeof this.hostname !== 'string') 375 this.hostname = ''; 376 377 const hostname = this.hostname; 378 379 // If hostname begins with [ and ends with ] 380 // assume that it's an IPv6 address. 381 const ipv6Hostname = isIpv6Hostname(hostname); 382 383 // validate a little. 384 if (!ipv6Hostname) { 385 rest = getHostname(this, rest, hostname); 386 } 387 388 if (this.hostname.length > hostnameMaxLen) { 389 this.hostname = ''; 390 } else { 391 // Hostnames are always lower case. 392 this.hostname = this.hostname.toLowerCase(); 393 } 394 395 if (this.hostname !== '') { 396 if (ipv6Hostname) { 397 if (forbiddenHostCharsIpv6.test(this.hostname)) { 398 throw new ERR_INVALID_URL(url); 399 } 400 } else { 401 // IDNA Support: Returns a punycoded representation of "domain". 402 // It only converts parts of the domain name that 403 // have non-ASCII characters, i.e. it doesn't matter if 404 // you call it with a domain that already is ASCII-only. 405 this.hostname = toASCII(this.hostname); 406 407 // Prevent two potential routes of hostname spoofing. 408 // 1. If this.hostname is empty, it must have become empty due to toASCII 409 // since we checked this.hostname above. 410 // 2. If any of forbiddenHostChars appears in this.hostname, it must have 411 // also gotten in due to toASCII. This is since getHostname would have 412 // filtered them out otherwise. 413 // Rather than trying to correct this by moving the non-host part into 414 // the pathname as we've done in getHostname, throw an exception to 415 // convey the severity of this issue. 416 if (this.hostname === '' || forbiddenHostChars.test(this.hostname)) { 417 throw new ERR_INVALID_URL(url); 418 } 419 } 420 } 421 422 const p = this.port ? ':' + this.port : ''; 423 const h = this.hostname || ''; 424 this.host = h + p; 425 426 // strip [ and ] from the hostname 427 // the host field still retains them, though 428 if (ipv6Hostname) { 429 this.hostname = this.hostname.slice(1, -1); 430 if (rest[0] !== '/') { 431 rest = '/' + rest; 432 } 433 } 434 } 435 436 // Now rest is set to the post-host stuff. 437 // Chop off any delim chars. 438 if (!unsafeProtocol.has(lowerProto)) { 439 // First, make 100% sure that any "autoEscape" chars get 440 // escaped, even if encodeURIComponent doesn't think they 441 // need to be. 442 rest = autoEscapeStr(rest); 443 } 444 445 let questionIdx = -1; 446 let hashIdx = -1; 447 for (let i = 0; i < rest.length; ++i) { 448 const code = rest.charCodeAt(i); 449 if (code === CHAR_HASH) { 450 this.hash = rest.slice(i); 451 hashIdx = i; 452 break; 453 } else if (code === CHAR_QUESTION_MARK && questionIdx === -1) { 454 questionIdx = i; 455 } 456 } 457 458 if (questionIdx !== -1) { 459 if (hashIdx === -1) { 460 this.search = rest.slice(questionIdx); 461 this.query = rest.slice(questionIdx + 1); 462 } else { 463 this.search = rest.slice(questionIdx, hashIdx); 464 this.query = rest.slice(questionIdx + 1, hashIdx); 465 } 466 if (parseQueryString) { 467 this.query = querystring.parse(this.query); 468 } 469 } else if (parseQueryString) { 470 // No query string, but parseQueryString still requested 471 this.search = null; 472 this.query = ObjectCreate(null); 473 } 474 475 const useQuestionIdx = 476 questionIdx !== -1 && (hashIdx === -1 || questionIdx < hashIdx); 477 const firstIdx = useQuestionIdx ? questionIdx : hashIdx; 478 if (firstIdx === -1) { 479 if (rest.length > 0) 480 this.pathname = rest; 481 } else if (firstIdx > 0) { 482 this.pathname = rest.slice(0, firstIdx); 483 } 484 if (slashedProtocol.has(lowerProto) && 485 this.hostname && !this.pathname) { 486 this.pathname = '/'; 487 } 488 489 // To support http.request 490 if (this.pathname || this.search) { 491 const p = this.pathname || ''; 492 const s = this.search || ''; 493 this.path = p + s; 494 } 495 496 // Finally, reconstruct the href based on what has been validated. 497 this.href = this.format(); 498 return this; 499}; 500 501function getHostname(self, rest, hostname) { 502 for (let i = 0; i < hostname.length; ++i) { 503 const code = hostname.charCodeAt(i); 504 const isValid = (code !== CHAR_FORWARD_SLASH && 505 code !== CHAR_BACKWARD_SLASH && 506 code !== CHAR_HASH && 507 code !== CHAR_QUESTION_MARK && 508 code !== CHAR_COLON); 509 510 if (!isValid) { 511 self.hostname = hostname.slice(0, i); 512 return `/${hostname.slice(i)}${rest}`; 513 } 514 } 515 return rest; 516} 517 518// Escaped characters. Use empty strings to fill up unused entries. 519// Using Array is faster than Object/Map 520const escapedCodes = [ 521 /* 0 - 9 */ '', '', '', '', '', '', '', '', '', '%09', 522 /* 10 - 19 */ '%0A', '', '', '%0D', '', '', '', '', '', '', 523 /* 20 - 29 */ '', '', '', '', '', '', '', '', '', '', 524 /* 30 - 39 */ '', '', '%20', '', '%22', '', '', '', '', '%27', 525 /* 40 - 49 */ '', '', '', '', '', '', '', '', '', '', 526 /* 50 - 59 */ '', '', '', '', '', '', '', '', '', '', 527 /* 60 - 69 */ '%3C', '', '%3E', '', '', '', '', '', '', '', 528 /* 70 - 79 */ '', '', '', '', '', '', '', '', '', '', 529 /* 80 - 89 */ '', '', '', '', '', '', '', '', '', '', 530 /* 90 - 99 */ '', '', '%5C', '', '%5E', '', '%60', '', '', '', 531 /* 100 - 109 */ '', '', '', '', '', '', '', '', '', '', 532 /* 110 - 119 */ '', '', '', '', '', '', '', '', '', '', 533 /* 120 - 125 */ '', '', '', '%7B', '%7C', '%7D', 534]; 535 536// Automatically escape all delimiters and unwise characters from RFC 2396. 537// Also escape single quotes in case of an XSS attack. 538// Return the escaped string. 539function autoEscapeStr(rest) { 540 let escaped = ''; 541 let lastEscapedPos = 0; 542 for (let i = 0; i < rest.length; ++i) { 543 // `escaped` contains substring up to the last escaped character. 544 const escapedChar = escapedCodes[rest.charCodeAt(i)]; 545 if (escapedChar) { 546 // Concat if there are ordinary characters in the middle. 547 if (i > lastEscapedPos) 548 escaped += rest.slice(lastEscapedPos, i); 549 escaped += escapedChar; 550 lastEscapedPos = i + 1; 551 } 552 } 553 if (lastEscapedPos === 0) // Nothing has been escaped. 554 return rest; 555 556 // There are ordinary characters at the end. 557 if (lastEscapedPos < rest.length) 558 escaped += rest.slice(lastEscapedPos); 559 560 return escaped; 561} 562 563// Format a parsed object into a url string 564function urlFormat(urlObject, options) { 565 // Ensure it's an object, and not a string url. 566 // If it's an object, this is a no-op. 567 // this way, you can call urlParse() on strings 568 // to clean up potentially wonky urls. 569 if (typeof urlObject === 'string') { 570 urlObject = urlParse(urlObject); 571 } else if (typeof urlObject !== 'object' || urlObject === null) { 572 throw new ERR_INVALID_ARG_TYPE('urlObject', 573 ['Object', 'string'], urlObject); 574 } else if (urlObject instanceof URL) { 575 let fragment = true; 576 let unicode = false; 577 let search = true; 578 let auth = true; 579 580 if (options) { 581 validateObject(options, 'options'); 582 583 if (options.fragment != null) { 584 fragment = Boolean(options.fragment); 585 } 586 587 if (options.unicode != null) { 588 unicode = Boolean(options.unicode); 589 } 590 591 if (options.search != null) { 592 search = Boolean(options.search); 593 } 594 595 if (options.auth != null) { 596 auth = Boolean(options.auth); 597 } 598 } 599 600 return bindingUrl.format(urlObject.href, fragment, unicode, search, auth); 601 } 602 603 return Url.prototype.format.call(urlObject); 604} 605 606// These characters do not need escaping: 607// ! - . _ ~ 608// ' ( ) * : 609// digits 610// alpha (uppercase) 611// alpha (lowercase) 612const noEscapeAuth = new Int8Array([ 613 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x00 - 0x0F 614 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x10 - 0x1F 615 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, // 0x20 - 0x2F 616 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, // 0x30 - 0x3F 617 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x40 - 0x4F 618 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, // 0x50 - 0x5F 619 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x60 - 0x6F 620 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, // 0x70 - 0x7F 621]); 622 623Url.prototype.format = function format() { 624 let auth = this.auth || ''; 625 if (auth) { 626 auth = encodeStr(auth, noEscapeAuth, hexTable); 627 auth += '@'; 628 } 629 630 let protocol = this.protocol || ''; 631 let pathname = this.pathname || ''; 632 let hash = this.hash || ''; 633 let host = ''; 634 let query = ''; 635 636 if (this.host) { 637 host = auth + this.host; 638 } else if (this.hostname) { 639 host = auth + ( 640 this.hostname.includes(':') && !isIpv6Hostname(this.hostname) ? 641 '[' + this.hostname + ']' : 642 this.hostname 643 ); 644 if (this.port) { 645 host += ':' + this.port; 646 } 647 } 648 649 if (this.query !== null && typeof this.query === 'object') { 650 query = querystring.stringify(this.query); 651 } 652 653 let search = this.search || (query && ('?' + query)) || ''; 654 655 if (protocol && protocol.charCodeAt(protocol.length - 1) !== 58/* : */) 656 protocol += ':'; 657 658 let newPathname = ''; 659 let lastPos = 0; 660 for (let i = 0; i < pathname.length; ++i) { 661 switch (pathname.charCodeAt(i)) { 662 case CHAR_HASH: 663 if (i - lastPos > 0) 664 newPathname += pathname.slice(lastPos, i); 665 newPathname += '%23'; 666 lastPos = i + 1; 667 break; 668 case CHAR_QUESTION_MARK: 669 if (i - lastPos > 0) 670 newPathname += pathname.slice(lastPos, i); 671 newPathname += '%3F'; 672 lastPos = i + 1; 673 break; 674 } 675 } 676 if (lastPos > 0) { 677 if (lastPos !== pathname.length) 678 pathname = newPathname + pathname.slice(lastPos); 679 else 680 pathname = newPathname; 681 } 682 683 // Only the slashedProtocols get the //. Not mailto:, xmpp:, etc. 684 // unless they had them to begin with. 685 if (this.slashes || slashedProtocol.has(protocol)) { 686 if (this.slashes || host) { 687 if (pathname && pathname.charCodeAt(0) !== CHAR_FORWARD_SLASH) 688 pathname = '/' + pathname; 689 host = '//' + host; 690 } else if (protocol.length >= 4 && 691 protocol.charCodeAt(0) === 102/* f */ && 692 protocol.charCodeAt(1) === 105/* i */ && 693 protocol.charCodeAt(2) === 108/* l */ && 694 protocol.charCodeAt(3) === 101/* e */) { 695 host = '//'; 696 } 697 } 698 699 search = search.replace(/#/g, '%23'); 700 701 if (hash && hash.charCodeAt(0) !== CHAR_HASH) 702 hash = '#' + hash; 703 if (search && search.charCodeAt(0) !== CHAR_QUESTION_MARK) 704 search = '?' + search; 705 706 return protocol + host + pathname + search + hash; 707}; 708 709function urlResolve(source, relative) { 710 return urlParse(source, false, true).resolve(relative); 711} 712 713Url.prototype.resolve = function resolve(relative) { 714 return this.resolveObject(urlParse(relative, false, true)).format(); 715}; 716 717function urlResolveObject(source, relative) { 718 if (!source) return relative; 719 return urlParse(source, false, true).resolveObject(relative); 720} 721 722Url.prototype.resolveObject = function resolveObject(relative) { 723 if (typeof relative === 'string') { 724 const rel = new Url(); 725 rel.parse(relative, false, true); 726 relative = rel; 727 } 728 729 const result = new Url(); 730 const tkeys = ObjectKeys(this); 731 for (let tk = 0; tk < tkeys.length; tk++) { 732 const tkey = tkeys[tk]; 733 result[tkey] = this[tkey]; 734 } 735 736 // Hash is always overridden, no matter what. 737 // even href="" will remove it. 738 result.hash = relative.hash; 739 740 // If the relative url is empty, then there's nothing left to do here. 741 if (relative.href === '') { 742 result.href = result.format(); 743 return result; 744 } 745 746 // Hrefs like //foo/bar always cut to the protocol. 747 if (relative.slashes && !relative.protocol) { 748 // Take everything except the protocol from relative 749 const rkeys = ObjectKeys(relative); 750 for (let rk = 0; rk < rkeys.length; rk++) { 751 const rkey = rkeys[rk]; 752 if (rkey !== 'protocol') 753 result[rkey] = relative[rkey]; 754 } 755 756 // urlParse appends trailing / to urls like http://www.example.com 757 if (slashedProtocol.has(result.protocol) && 758 result.hostname && !result.pathname) { 759 result.path = result.pathname = '/'; 760 } 761 762 result.href = result.format(); 763 return result; 764 } 765 766 if (relative.protocol && relative.protocol !== result.protocol) { 767 // If it's a known url protocol, then changing 768 // the protocol does weird things 769 // first, if it's not file:, then we MUST have a host, 770 // and if there was a path 771 // to begin with, then we MUST have a path. 772 // if it is file:, then the host is dropped, 773 // because that's known to be hostless. 774 // anything else is assumed to be absolute. 775 if (!slashedProtocol.has(relative.protocol)) { 776 const keys = ObjectKeys(relative); 777 for (let v = 0; v < keys.length; v++) { 778 const k = keys[v]; 779 result[k] = relative[k]; 780 } 781 result.href = result.format(); 782 return result; 783 } 784 785 result.protocol = relative.protocol; 786 if (!relative.host && 787 !/^file:?$/.test(relative.protocol) && 788 !hostlessProtocol.has(relative.protocol)) { 789 const relPath = (relative.pathname || '').split('/'); 790 while (relPath.length && !(relative.host = relPath.shift())); 791 if (!relative.host) relative.host = ''; 792 if (!relative.hostname) relative.hostname = ''; 793 if (relPath[0] !== '') relPath.unshift(''); 794 if (relPath.length < 2) relPath.unshift(''); 795 result.pathname = relPath.join('/'); 796 } else { 797 result.pathname = relative.pathname; 798 } 799 result.search = relative.search; 800 result.query = relative.query; 801 result.host = relative.host || ''; 802 result.auth = relative.auth; 803 result.hostname = relative.hostname || relative.host; 804 result.port = relative.port; 805 // To support http.request 806 if (result.pathname || result.search) { 807 const p = result.pathname || ''; 808 const s = result.search || ''; 809 result.path = p + s; 810 } 811 result.slashes = result.slashes || relative.slashes; 812 result.href = result.format(); 813 return result; 814 } 815 816 const isSourceAbs = (result.pathname && result.pathname.charAt(0) === '/'); 817 const isRelAbs = ( 818 relative.host || (relative.pathname && relative.pathname.charAt(0) === '/') 819 ); 820 let mustEndAbs = (isRelAbs || isSourceAbs || 821 (result.host && relative.pathname)); 822 const removeAllDots = mustEndAbs; 823 let srcPath = (result.pathname && result.pathname.split('/')) || []; 824 const relPath = (relative.pathname && relative.pathname.split('/')) || []; 825 const noLeadingSlashes = result.protocol && 826 !slashedProtocol.has(result.protocol); 827 828 // If the url is a non-slashed url, then relative 829 // links like ../.. should be able 830 // to crawl up to the hostname, as well. This is strange. 831 // result.protocol has already been set by now. 832 // Later on, put the first path part into the host field. 833 if (noLeadingSlashes) { 834 result.hostname = ''; 835 result.port = null; 836 if (result.host) { 837 if (srcPath[0] === '') srcPath[0] = result.host; 838 else srcPath.unshift(result.host); 839 } 840 result.host = ''; 841 if (relative.protocol) { 842 relative.hostname = null; 843 relative.port = null; 844 result.auth = null; 845 if (relative.host) { 846 if (relPath[0] === '') relPath[0] = relative.host; 847 else relPath.unshift(relative.host); 848 } 849 relative.host = null; 850 } 851 mustEndAbs = mustEndAbs && (relPath[0] === '' || srcPath[0] === ''); 852 } 853 854 if (isRelAbs) { 855 // it's absolute. 856 if (relative.host || relative.host === '') { 857 if (result.host !== relative.host) result.auth = null; 858 result.host = relative.host; 859 result.port = relative.port; 860 } 861 if (relative.hostname || relative.hostname === '') { 862 if (result.hostname !== relative.hostname) result.auth = null; 863 result.hostname = relative.hostname; 864 } 865 result.search = relative.search; 866 result.query = relative.query; 867 srcPath = relPath; 868 // Fall through to the dot-handling below. 869 } else if (relPath.length) { 870 // it's relative 871 // throw away the existing file, and take the new path instead. 872 if (!srcPath) srcPath = []; 873 srcPath.pop(); 874 srcPath = srcPath.concat(relPath); 875 result.search = relative.search; 876 result.query = relative.query; 877 } else if (relative.search !== null && relative.search !== undefined) { 878 // Just pull out the search. 879 // like href='?foo'. 880 // Put this after the other two cases because it simplifies the booleans 881 if (noLeadingSlashes) { 882 result.hostname = result.host = srcPath.shift(); 883 // Occasionally the auth can get stuck only in host. 884 // This especially happens in cases like 885 // url.resolveObject('mailto:local1@domain1', 'local2@domain2') 886 const authInHost = 887 result.host && result.host.indexOf('@') > 0 && result.host.split('@'); 888 if (authInHost) { 889 result.auth = authInHost.shift(); 890 result.host = result.hostname = authInHost.shift(); 891 } 892 } 893 result.search = relative.search; 894 result.query = relative.query; 895 // To support http.request 896 if (result.pathname !== null || result.search !== null) { 897 result.path = (result.pathname ? result.pathname : '') + 898 (result.search ? result.search : ''); 899 } 900 result.href = result.format(); 901 return result; 902 } 903 904 if (!srcPath.length) { 905 // No path at all. All other things were already handled above. 906 result.pathname = null; 907 // To support http.request 908 if (result.search) { 909 result.path = '/' + result.search; 910 } else { 911 result.path = null; 912 } 913 result.href = result.format(); 914 return result; 915 } 916 917 // If a url ENDs in . or .., then it must get a trailing slash. 918 // however, if it ends in anything else non-slashy, 919 // then it must NOT get a trailing slash. 920 let last = srcPath.slice(-1)[0]; 921 const hasTrailingSlash = ( 922 ((result.host || relative.host || srcPath.length > 1) && 923 (last === '.' || last === '..')) || last === ''); 924 925 // Strip single dots, resolve double dots to parent dir 926 // if the path tries to go above the root, `up` ends up > 0 927 let up = 0; 928 for (let i = srcPath.length - 1; i >= 0; i--) { 929 last = srcPath[i]; 930 if (last === '.') { 931 spliceOne(srcPath, i); 932 } else if (last === '..') { 933 spliceOne(srcPath, i); 934 up++; 935 } else if (up) { 936 spliceOne(srcPath, i); 937 up--; 938 } 939 } 940 941 // If the path is allowed to go above the root, restore leading ..s 942 if (!mustEndAbs && !removeAllDots) { 943 while (up--) { 944 srcPath.unshift('..'); 945 } 946 } 947 948 if (mustEndAbs && srcPath[0] !== '' && 949 (!srcPath[0] || srcPath[0].charAt(0) !== '/')) { 950 srcPath.unshift(''); 951 } 952 953 if (hasTrailingSlash && (srcPath.join('/').substr(-1) !== '/')) { 954 srcPath.push(''); 955 } 956 957 const isAbsolute = srcPath[0] === '' || 958 (srcPath[0] && srcPath[0].charAt(0) === '/'); 959 960 // put the host back 961 if (noLeadingSlashes) { 962 result.hostname = 963 result.host = isAbsolute ? '' : srcPath.length ? srcPath.shift() : ''; 964 // Occasionally the auth can get stuck only in host. 965 // This especially happens in cases like 966 // url.resolveObject('mailto:local1@domain1', 'local2@domain2') 967 const authInHost = result.host && result.host.indexOf('@') > 0 ? 968 result.host.split('@') : false; 969 if (authInHost) { 970 result.auth = authInHost.shift(); 971 result.host = result.hostname = authInHost.shift(); 972 } 973 } 974 975 mustEndAbs = mustEndAbs || (result.host && srcPath.length); 976 977 if (mustEndAbs && !isAbsolute) { 978 srcPath.unshift(''); 979 } 980 981 if (!srcPath.length) { 982 result.pathname = null; 983 result.path = null; 984 } else { 985 result.pathname = srcPath.join('/'); 986 } 987 988 // To support request.http 989 if (result.pathname !== null || result.search !== null) { 990 result.path = (result.pathname ? result.pathname : '') + 991 (result.search ? result.search : ''); 992 } 993 result.auth = relative.auth || result.auth; 994 result.slashes = result.slashes || relative.slashes; 995 result.href = result.format(); 996 return result; 997}; 998 999Url.prototype.parseHost = function parseHost() { 1000 let host = this.host; 1001 let port = portPattern.exec(host); 1002 if (port) { 1003 port = port[0]; 1004 if (port !== ':') { 1005 this.port = port.slice(1); 1006 } 1007 host = host.slice(0, host.length - port.length); 1008 } 1009 if (host) this.hostname = host; 1010}; 1011 1012// When used internally, we are not obligated to associate TypeError with 1013// this function, so non-strings can be rejected by underlying implementation. 1014// Public API has to validate input and throw appropriate error. 1015function pathToFileURL(path) { 1016 validateString(path, 'path'); 1017 1018 return _pathToFileURL(path); 1019} 1020 1021module.exports = { 1022 // Original API 1023 Url, 1024 parse: urlParse, 1025 resolve: urlResolve, 1026 resolveObject: urlResolveObject, 1027 format: urlFormat, 1028 1029 // WHATWG API 1030 URL, 1031 URLSearchParams, 1032 domainToASCII, 1033 domainToUnicode, 1034 1035 // Utilities 1036 pathToFileURL, 1037 fileURLToPath, 1038 urlToHttpOptions, 1039}; 1040