1const assert = require('assert') 2const { atob } = require('buffer') 3const { isomorphicDecode } = require('./util') 4 5const encoder = new TextEncoder() 6 7/** 8 * @see https://mimesniff.spec.whatwg.org/#http-token-code-point 9 */ 10const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/ 11const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line 12/** 13 * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point 14 */ 15const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line 16 17// https://fetch.spec.whatwg.org/#data-url-processor 18/** @param {URL} dataURL */ 19function dataURLProcessor (dataURL) { 20 // 1. Assert: dataURL’s scheme is "data". 21 assert(dataURL.protocol === 'data:') 22 23 // 2. Let input be the result of running the URL 24 // serializer on dataURL with exclude fragment 25 // set to true. 26 let input = URLSerializer(dataURL, true) 27 28 // 3. Remove the leading "data:" string from input. 29 input = input.slice(5) 30 31 // 4. Let position point at the start of input. 32 const position = { position: 0 } 33 34 // 5. Let mimeType be the result of collecting a 35 // sequence of code points that are not equal 36 // to U+002C (,), given position. 37 let mimeType = collectASequenceOfCodePointsFast( 38 ',', 39 input, 40 position 41 ) 42 43 // 6. Strip leading and trailing ASCII whitespace 44 // from mimeType. 45 // Undici implementation note: we need to store the 46 // length because if the mimetype has spaces removed, 47 // the wrong amount will be sliced from the input in 48 // step #9 49 const mimeTypeLength = mimeType.length 50 mimeType = removeASCIIWhitespace(mimeType, true, true) 51 52 // 7. If position is past the end of input, then 53 // return failure 54 if (position.position >= input.length) { 55 return 'failure' 56 } 57 58 // 8. Advance position by 1. 59 position.position++ 60 61 // 9. Let encodedBody be the remainder of input. 62 const encodedBody = input.slice(mimeTypeLength + 1) 63 64 // 10. Let body be the percent-decoding of encodedBody. 65 let body = stringPercentDecode(encodedBody) 66 67 // 11. If mimeType ends with U+003B (;), followed by 68 // zero or more U+0020 SPACE, followed by an ASCII 69 // case-insensitive match for "base64", then: 70 if (/;(\u0020){0,}base64$/i.test(mimeType)) { 71 // 1. Let stringBody be the isomorphic decode of body. 72 const stringBody = isomorphicDecode(body) 73 74 // 2. Set body to the forgiving-base64 decode of 75 // stringBody. 76 body = forgivingBase64(stringBody) 77 78 // 3. If body is failure, then return failure. 79 if (body === 'failure') { 80 return 'failure' 81 } 82 83 // 4. Remove the last 6 code points from mimeType. 84 mimeType = mimeType.slice(0, -6) 85 86 // 5. Remove trailing U+0020 SPACE code points from mimeType, 87 // if any. 88 mimeType = mimeType.replace(/(\u0020)+$/, '') 89 90 // 6. Remove the last U+003B (;) code point from mimeType. 91 mimeType = mimeType.slice(0, -1) 92 } 93 94 // 12. If mimeType starts with U+003B (;), then prepend 95 // "text/plain" to mimeType. 96 if (mimeType.startsWith(';')) { 97 mimeType = 'text/plain' + mimeType 98 } 99 100 // 13. Let mimeTypeRecord be the result of parsing 101 // mimeType. 102 let mimeTypeRecord = parseMIMEType(mimeType) 103 104 // 14. If mimeTypeRecord is failure, then set 105 // mimeTypeRecord to text/plain;charset=US-ASCII. 106 if (mimeTypeRecord === 'failure') { 107 mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII') 108 } 109 110 // 15. Return a new data: URL struct whose MIME 111 // type is mimeTypeRecord and body is body. 112 // https://fetch.spec.whatwg.org/#data-url-struct 113 return { mimeType: mimeTypeRecord, body } 114} 115 116// https://url.spec.whatwg.org/#concept-url-serializer 117/** 118 * @param {URL} url 119 * @param {boolean} excludeFragment 120 */ 121function URLSerializer (url, excludeFragment = false) { 122 if (!excludeFragment) { 123 return url.href 124 } 125 126 const href = url.href 127 const hashLength = url.hash.length 128 129 return hashLength === 0 ? href : href.substring(0, href.length - hashLength) 130} 131 132// https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points 133/** 134 * @param {(char: string) => boolean} condition 135 * @param {string} input 136 * @param {{ position: number }} position 137 */ 138function collectASequenceOfCodePoints (condition, input, position) { 139 // 1. Let result be the empty string. 140 let result = '' 141 142 // 2. While position doesn’t point past the end of input and the 143 // code point at position within input meets the condition condition: 144 while (position.position < input.length && condition(input[position.position])) { 145 // 1. Append that code point to the end of result. 146 result += input[position.position] 147 148 // 2. Advance position by 1. 149 position.position++ 150 } 151 152 // 3. Return result. 153 return result 154} 155 156/** 157 * A faster collectASequenceOfCodePoints that only works when comparing a single character. 158 * @param {string} char 159 * @param {string} input 160 * @param {{ position: number }} position 161 */ 162function collectASequenceOfCodePointsFast (char, input, position) { 163 const idx = input.indexOf(char, position.position) 164 const start = position.position 165 166 if (idx === -1) { 167 position.position = input.length 168 return input.slice(start) 169 } 170 171 position.position = idx 172 return input.slice(start, position.position) 173} 174 175// https://url.spec.whatwg.org/#string-percent-decode 176/** @param {string} input */ 177function stringPercentDecode (input) { 178 // 1. Let bytes be the UTF-8 encoding of input. 179 const bytes = encoder.encode(input) 180 181 // 2. Return the percent-decoding of bytes. 182 return percentDecode(bytes) 183} 184 185// https://url.spec.whatwg.org/#percent-decode 186/** @param {Uint8Array} input */ 187function percentDecode (input) { 188 // 1. Let output be an empty byte sequence. 189 /** @type {number[]} */ 190 const output = [] 191 192 // 2. For each byte byte in input: 193 for (let i = 0; i < input.length; i++) { 194 const byte = input[i] 195 196 // 1. If byte is not 0x25 (%), then append byte to output. 197 if (byte !== 0x25) { 198 output.push(byte) 199 200 // 2. Otherwise, if byte is 0x25 (%) and the next two bytes 201 // after byte in input are not in the ranges 202 // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F), 203 // and 0x61 (a) to 0x66 (f), all inclusive, append byte 204 // to output. 205 } else if ( 206 byte === 0x25 && 207 !/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2])) 208 ) { 209 output.push(0x25) 210 211 // 3. Otherwise: 212 } else { 213 // 1. Let bytePoint be the two bytes after byte in input, 214 // decoded, and then interpreted as hexadecimal number. 215 const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2]) 216 const bytePoint = Number.parseInt(nextTwoBytes, 16) 217 218 // 2. Append a byte whose value is bytePoint to output. 219 output.push(bytePoint) 220 221 // 3. Skip the next two bytes in input. 222 i += 2 223 } 224 } 225 226 // 3. Return output. 227 return Uint8Array.from(output) 228} 229 230// https://mimesniff.spec.whatwg.org/#parse-a-mime-type 231/** @param {string} input */ 232function parseMIMEType (input) { 233 // 1. Remove any leading and trailing HTTP whitespace 234 // from input. 235 input = removeHTTPWhitespace(input, true, true) 236 237 // 2. Let position be a position variable for input, 238 // initially pointing at the start of input. 239 const position = { position: 0 } 240 241 // 3. Let type be the result of collecting a sequence 242 // of code points that are not U+002F (/) from 243 // input, given position. 244 const type = collectASequenceOfCodePointsFast( 245 '/', 246 input, 247 position 248 ) 249 250 // 4. If type is the empty string or does not solely 251 // contain HTTP token code points, then return failure. 252 // https://mimesniff.spec.whatwg.org/#http-token-code-point 253 if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) { 254 return 'failure' 255 } 256 257 // 5. If position is past the end of input, then return 258 // failure 259 if (position.position > input.length) { 260 return 'failure' 261 } 262 263 // 6. Advance position by 1. (This skips past U+002F (/).) 264 position.position++ 265 266 // 7. Let subtype be the result of collecting a sequence of 267 // code points that are not U+003B (;) from input, given 268 // position. 269 let subtype = collectASequenceOfCodePointsFast( 270 ';', 271 input, 272 position 273 ) 274 275 // 8. Remove any trailing HTTP whitespace from subtype. 276 subtype = removeHTTPWhitespace(subtype, false, true) 277 278 // 9. If subtype is the empty string or does not solely 279 // contain HTTP token code points, then return failure. 280 if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) { 281 return 'failure' 282 } 283 284 const typeLowercase = type.toLowerCase() 285 const subtypeLowercase = subtype.toLowerCase() 286 287 // 10. Let mimeType be a new MIME type record whose type 288 // is type, in ASCII lowercase, and subtype is subtype, 289 // in ASCII lowercase. 290 // https://mimesniff.spec.whatwg.org/#mime-type 291 const mimeType = { 292 type: typeLowercase, 293 subtype: subtypeLowercase, 294 /** @type {Map<string, string>} */ 295 parameters: new Map(), 296 // https://mimesniff.spec.whatwg.org/#mime-type-essence 297 essence: `${typeLowercase}/${subtypeLowercase}` 298 } 299 300 // 11. While position is not past the end of input: 301 while (position.position < input.length) { 302 // 1. Advance position by 1. (This skips past U+003B (;).) 303 position.position++ 304 305 // 2. Collect a sequence of code points that are HTTP 306 // whitespace from input given position. 307 collectASequenceOfCodePoints( 308 // https://fetch.spec.whatwg.org/#http-whitespace 309 char => HTTP_WHITESPACE_REGEX.test(char), 310 input, 311 position 312 ) 313 314 // 3. Let parameterName be the result of collecting a 315 // sequence of code points that are not U+003B (;) 316 // or U+003D (=) from input, given position. 317 let parameterName = collectASequenceOfCodePoints( 318 (char) => char !== ';' && char !== '=', 319 input, 320 position 321 ) 322 323 // 4. Set parameterName to parameterName, in ASCII 324 // lowercase. 325 parameterName = parameterName.toLowerCase() 326 327 // 5. If position is not past the end of input, then: 328 if (position.position < input.length) { 329 // 1. If the code point at position within input is 330 // U+003B (;), then continue. 331 if (input[position.position] === ';') { 332 continue 333 } 334 335 // 2. Advance position by 1. (This skips past U+003D (=).) 336 position.position++ 337 } 338 339 // 6. If position is past the end of input, then break. 340 if (position.position > input.length) { 341 break 342 } 343 344 // 7. Let parameterValue be null. 345 let parameterValue = null 346 347 // 8. If the code point at position within input is 348 // U+0022 ("), then: 349 if (input[position.position] === '"') { 350 // 1. Set parameterValue to the result of collecting 351 // an HTTP quoted string from input, given position 352 // and the extract-value flag. 353 parameterValue = collectAnHTTPQuotedString(input, position, true) 354 355 // 2. Collect a sequence of code points that are not 356 // U+003B (;) from input, given position. 357 collectASequenceOfCodePointsFast( 358 ';', 359 input, 360 position 361 ) 362 363 // 9. Otherwise: 364 } else { 365 // 1. Set parameterValue to the result of collecting 366 // a sequence of code points that are not U+003B (;) 367 // from input, given position. 368 parameterValue = collectASequenceOfCodePointsFast( 369 ';', 370 input, 371 position 372 ) 373 374 // 2. Remove any trailing HTTP whitespace from parameterValue. 375 parameterValue = removeHTTPWhitespace(parameterValue, false, true) 376 377 // 3. If parameterValue is the empty string, then continue. 378 if (parameterValue.length === 0) { 379 continue 380 } 381 } 382 383 // 10. If all of the following are true 384 // - parameterName is not the empty string 385 // - parameterName solely contains HTTP token code points 386 // - parameterValue solely contains HTTP quoted-string token code points 387 // - mimeType’s parameters[parameterName] does not exist 388 // then set mimeType’s parameters[parameterName] to parameterValue. 389 if ( 390 parameterName.length !== 0 && 391 HTTP_TOKEN_CODEPOINTS.test(parameterName) && 392 (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) && 393 !mimeType.parameters.has(parameterName) 394 ) { 395 mimeType.parameters.set(parameterName, parameterValue) 396 } 397 } 398 399 // 12. Return mimeType. 400 return mimeType 401} 402 403// https://infra.spec.whatwg.org/#forgiving-base64-decode 404/** @param {string} data */ 405function forgivingBase64 (data) { 406 // 1. Remove all ASCII whitespace from data. 407 data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '') // eslint-disable-line 408 409 // 2. If data’s code point length divides by 4 leaving 410 // no remainder, then: 411 if (data.length % 4 === 0) { 412 // 1. If data ends with one or two U+003D (=) code points, 413 // then remove them from data. 414 data = data.replace(/=?=$/, '') 415 } 416 417 // 3. If data’s code point length divides by 4 leaving 418 // a remainder of 1, then return failure. 419 if (data.length % 4 === 1) { 420 return 'failure' 421 } 422 423 // 4. If data contains a code point that is not one of 424 // U+002B (+) 425 // U+002F (/) 426 // ASCII alphanumeric 427 // then return failure. 428 if (/[^+/0-9A-Za-z]/.test(data)) { 429 return 'failure' 430 } 431 432 const binary = atob(data) 433 const bytes = new Uint8Array(binary.length) 434 435 for (let byte = 0; byte < binary.length; byte++) { 436 bytes[byte] = binary.charCodeAt(byte) 437 } 438 439 return bytes 440} 441 442// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string 443// tests: https://fetch.spec.whatwg.org/#example-http-quoted-string 444/** 445 * @param {string} input 446 * @param {{ position: number }} position 447 * @param {boolean?} extractValue 448 */ 449function collectAnHTTPQuotedString (input, position, extractValue) { 450 // 1. Let positionStart be position. 451 const positionStart = position.position 452 453 // 2. Let value be the empty string. 454 let value = '' 455 456 // 3. Assert: the code point at position within input 457 // is U+0022 ("). 458 assert(input[position.position] === '"') 459 460 // 4. Advance position by 1. 461 position.position++ 462 463 // 5. While true: 464 while (true) { 465 // 1. Append the result of collecting a sequence of code points 466 // that are not U+0022 (") or U+005C (\) from input, given 467 // position, to value. 468 value += collectASequenceOfCodePoints( 469 (char) => char !== '"' && char !== '\\', 470 input, 471 position 472 ) 473 474 // 2. If position is past the end of input, then break. 475 if (position.position >= input.length) { 476 break 477 } 478 479 // 3. Let quoteOrBackslash be the code point at position within 480 // input. 481 const quoteOrBackslash = input[position.position] 482 483 // 4. Advance position by 1. 484 position.position++ 485 486 // 5. If quoteOrBackslash is U+005C (\), then: 487 if (quoteOrBackslash === '\\') { 488 // 1. If position is past the end of input, then append 489 // U+005C (\) to value and break. 490 if (position.position >= input.length) { 491 value += '\\' 492 break 493 } 494 495 // 2. Append the code point at position within input to value. 496 value += input[position.position] 497 498 // 3. Advance position by 1. 499 position.position++ 500 501 // 6. Otherwise: 502 } else { 503 // 1. Assert: quoteOrBackslash is U+0022 ("). 504 assert(quoteOrBackslash === '"') 505 506 // 2. Break. 507 break 508 } 509 } 510 511 // 6. If the extract-value flag is set, then return value. 512 if (extractValue) { 513 return value 514 } 515 516 // 7. Return the code points from positionStart to position, 517 // inclusive, within input. 518 return input.slice(positionStart, position.position) 519} 520 521/** 522 * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type 523 */ 524function serializeAMimeType (mimeType) { 525 assert(mimeType !== 'failure') 526 const { parameters, essence } = mimeType 527 528 // 1. Let serialization be the concatenation of mimeType’s 529 // type, U+002F (/), and mimeType’s subtype. 530 let serialization = essence 531 532 // 2. For each name → value of mimeType’s parameters: 533 for (let [name, value] of parameters.entries()) { 534 // 1. Append U+003B (;) to serialization. 535 serialization += ';' 536 537 // 2. Append name to serialization. 538 serialization += name 539 540 // 3. Append U+003D (=) to serialization. 541 serialization += '=' 542 543 // 4. If value does not solely contain HTTP token code 544 // points or value is the empty string, then: 545 if (!HTTP_TOKEN_CODEPOINTS.test(value)) { 546 // 1. Precede each occurence of U+0022 (") or 547 // U+005C (\) in value with U+005C (\). 548 value = value.replace(/(\\|")/g, '\\$1') 549 550 // 2. Prepend U+0022 (") to value. 551 value = '"' + value 552 553 // 3. Append U+0022 (") to value. 554 value += '"' 555 } 556 557 // 5. Append value to serialization. 558 serialization += value 559 } 560 561 // 3. Return serialization. 562 return serialization 563} 564 565/** 566 * @see https://fetch.spec.whatwg.org/#http-whitespace 567 * @param {string} char 568 */ 569function isHTTPWhiteSpace (char) { 570 return char === '\r' || char === '\n' || char === '\t' || char === ' ' 571} 572 573/** 574 * @see https://fetch.spec.whatwg.org/#http-whitespace 575 * @param {string} str 576 */ 577function removeHTTPWhitespace (str, leading = true, trailing = true) { 578 let lead = 0 579 let trail = str.length - 1 580 581 if (leading) { 582 for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++); 583 } 584 585 if (trailing) { 586 for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--); 587 } 588 589 return str.slice(lead, trail + 1) 590} 591 592/** 593 * @see https://infra.spec.whatwg.org/#ascii-whitespace 594 * @param {string} char 595 */ 596function isASCIIWhitespace (char) { 597 return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' ' 598} 599 600/** 601 * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace 602 */ 603function removeASCIIWhitespace (str, leading = true, trailing = true) { 604 let lead = 0 605 let trail = str.length - 1 606 607 if (leading) { 608 for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++); 609 } 610 611 if (trailing) { 612 for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--); 613 } 614 615 return str.slice(lead, trail + 1) 616} 617 618module.exports = { 619 dataURLProcessor, 620 URLSerializer, 621 collectASequenceOfCodePoints, 622 collectASequenceOfCodePointsFast, 623 stringPercentDecode, 624 parseMIMEType, 625 collectAnHTTPQuotedString, 626 serializeAMimeType 627} 628