1const assert = require('assert')
2const { atob } = require('buffer')
3const { isomorphicDecode } = require('./util')
4
5const encoder = new TextEncoder()
6
7/**
8 * @see https://mimesniff.spec.whatwg.org/#http-token-code-point
9 */
10const HTTP_TOKEN_CODEPOINTS = /^[!#$%&'*+-.^_|~A-Za-z0-9]+$/
11const HTTP_WHITESPACE_REGEX = /(\u000A|\u000D|\u0009|\u0020)/ // eslint-disable-line
12/**
13 * @see https://mimesniff.spec.whatwg.org/#http-quoted-string-token-code-point
14 */
15const HTTP_QUOTED_STRING_TOKENS = /[\u0009|\u0020-\u007E|\u0080-\u00FF]/ // eslint-disable-line
16
17// https://fetch.spec.whatwg.org/#data-url-processor
18/** @param {URL} dataURL */
19function dataURLProcessor (dataURL) {
20  // 1. Assert: dataURL’s scheme is "data".
21  assert(dataURL.protocol === 'data:')
22
23  // 2. Let input be the result of running the URL
24  // serializer on dataURL with exclude fragment
25  // set to true.
26  let input = URLSerializer(dataURL, true)
27
28  // 3. Remove the leading "data:" string from input.
29  input = input.slice(5)
30
31  // 4. Let position point at the start of input.
32  const position = { position: 0 }
33
34  // 5. Let mimeType be the result of collecting a
35  // sequence of code points that are not equal
36  // to U+002C (,), given position.
37  let mimeType = collectASequenceOfCodePointsFast(
38    ',',
39    input,
40    position
41  )
42
43  // 6. Strip leading and trailing ASCII whitespace
44  // from mimeType.
45  // Undici implementation note: we need to store the
46  // length because if the mimetype has spaces removed,
47  // the wrong amount will be sliced from the input in
48  // step #9
49  const mimeTypeLength = mimeType.length
50  mimeType = removeASCIIWhitespace(mimeType, true, true)
51
52  // 7. If position is past the end of input, then
53  // return failure
54  if (position.position >= input.length) {
55    return 'failure'
56  }
57
58  // 8. Advance position by 1.
59  position.position++
60
61  // 9. Let encodedBody be the remainder of input.
62  const encodedBody = input.slice(mimeTypeLength + 1)
63
64  // 10. Let body be the percent-decoding of encodedBody.
65  let body = stringPercentDecode(encodedBody)
66
67  // 11. If mimeType ends with U+003B (;), followed by
68  // zero or more U+0020 SPACE, followed by an ASCII
69  // case-insensitive match for "base64", then:
70  if (/;(\u0020){0,}base64$/i.test(mimeType)) {
71    // 1. Let stringBody be the isomorphic decode of body.
72    const stringBody = isomorphicDecode(body)
73
74    // 2. Set body to the forgiving-base64 decode of
75    // stringBody.
76    body = forgivingBase64(stringBody)
77
78    // 3. If body is failure, then return failure.
79    if (body === 'failure') {
80      return 'failure'
81    }
82
83    // 4. Remove the last 6 code points from mimeType.
84    mimeType = mimeType.slice(0, -6)
85
86    // 5. Remove trailing U+0020 SPACE code points from mimeType,
87    // if any.
88    mimeType = mimeType.replace(/(\u0020)+$/, '')
89
90    // 6. Remove the last U+003B (;) code point from mimeType.
91    mimeType = mimeType.slice(0, -1)
92  }
93
94  // 12. If mimeType starts with U+003B (;), then prepend
95  // "text/plain" to mimeType.
96  if (mimeType.startsWith(';')) {
97    mimeType = 'text/plain' + mimeType
98  }
99
100  // 13. Let mimeTypeRecord be the result of parsing
101  // mimeType.
102  let mimeTypeRecord = parseMIMEType(mimeType)
103
104  // 14. If mimeTypeRecord is failure, then set
105  // mimeTypeRecord to text/plain;charset=US-ASCII.
106  if (mimeTypeRecord === 'failure') {
107    mimeTypeRecord = parseMIMEType('text/plain;charset=US-ASCII')
108  }
109
110  // 15. Return a new data: URL struct whose MIME
111  // type is mimeTypeRecord and body is body.
112  // https://fetch.spec.whatwg.org/#data-url-struct
113  return { mimeType: mimeTypeRecord, body }
114}
115
116// https://url.spec.whatwg.org/#concept-url-serializer
117/**
118 * @param {URL} url
119 * @param {boolean} excludeFragment
120 */
121function URLSerializer (url, excludeFragment = false) {
122  if (!excludeFragment) {
123    return url.href
124  }
125
126  const href = url.href
127  const hashLength = url.hash.length
128
129  return hashLength === 0 ? href : href.substring(0, href.length - hashLength)
130}
131
132// https://infra.spec.whatwg.org/#collect-a-sequence-of-code-points
133/**
134 * @param {(char: string) => boolean} condition
135 * @param {string} input
136 * @param {{ position: number }} position
137 */
138function collectASequenceOfCodePoints (condition, input, position) {
139  // 1. Let result be the empty string.
140  let result = ''
141
142  // 2. While position doesn’t point past the end of input and the
143  // code point at position within input meets the condition condition:
144  while (position.position < input.length && condition(input[position.position])) {
145    // 1. Append that code point to the end of result.
146    result += input[position.position]
147
148    // 2. Advance position by 1.
149    position.position++
150  }
151
152  // 3. Return result.
153  return result
154}
155
156/**
157 * A faster collectASequenceOfCodePoints that only works when comparing a single character.
158 * @param {string} char
159 * @param {string} input
160 * @param {{ position: number }} position
161 */
162function collectASequenceOfCodePointsFast (char, input, position) {
163  const idx = input.indexOf(char, position.position)
164  const start = position.position
165
166  if (idx === -1) {
167    position.position = input.length
168    return input.slice(start)
169  }
170
171  position.position = idx
172  return input.slice(start, position.position)
173}
174
175// https://url.spec.whatwg.org/#string-percent-decode
176/** @param {string} input */
177function stringPercentDecode (input) {
178  // 1. Let bytes be the UTF-8 encoding of input.
179  const bytes = encoder.encode(input)
180
181  // 2. Return the percent-decoding of bytes.
182  return percentDecode(bytes)
183}
184
185// https://url.spec.whatwg.org/#percent-decode
186/** @param {Uint8Array} input */
187function percentDecode (input) {
188  // 1. Let output be an empty byte sequence.
189  /** @type {number[]} */
190  const output = []
191
192  // 2. For each byte byte in input:
193  for (let i = 0; i < input.length; i++) {
194    const byte = input[i]
195
196    // 1. If byte is not 0x25 (%), then append byte to output.
197    if (byte !== 0x25) {
198      output.push(byte)
199
200    // 2. Otherwise, if byte is 0x25 (%) and the next two bytes
201    // after byte in input are not in the ranges
202    // 0x30 (0) to 0x39 (9), 0x41 (A) to 0x46 (F),
203    // and 0x61 (a) to 0x66 (f), all inclusive, append byte
204    // to output.
205    } else if (
206      byte === 0x25 &&
207      !/^[0-9A-Fa-f]{2}$/i.test(String.fromCharCode(input[i + 1], input[i + 2]))
208    ) {
209      output.push(0x25)
210
211    // 3. Otherwise:
212    } else {
213      // 1. Let bytePoint be the two bytes after byte in input,
214      // decoded, and then interpreted as hexadecimal number.
215      const nextTwoBytes = String.fromCharCode(input[i + 1], input[i + 2])
216      const bytePoint = Number.parseInt(nextTwoBytes, 16)
217
218      // 2. Append a byte whose value is bytePoint to output.
219      output.push(bytePoint)
220
221      // 3. Skip the next two bytes in input.
222      i += 2
223    }
224  }
225
226  // 3. Return output.
227  return Uint8Array.from(output)
228}
229
230// https://mimesniff.spec.whatwg.org/#parse-a-mime-type
231/** @param {string} input */
232function parseMIMEType (input) {
233  // 1. Remove any leading and trailing HTTP whitespace
234  // from input.
235  input = removeHTTPWhitespace(input, true, true)
236
237  // 2. Let position be a position variable for input,
238  // initially pointing at the start of input.
239  const position = { position: 0 }
240
241  // 3. Let type be the result of collecting a sequence
242  // of code points that are not U+002F (/) from
243  // input, given position.
244  const type = collectASequenceOfCodePointsFast(
245    '/',
246    input,
247    position
248  )
249
250  // 4. If type is the empty string or does not solely
251  // contain HTTP token code points, then return failure.
252  // https://mimesniff.spec.whatwg.org/#http-token-code-point
253  if (type.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(type)) {
254    return 'failure'
255  }
256
257  // 5. If position is past the end of input, then return
258  // failure
259  if (position.position > input.length) {
260    return 'failure'
261  }
262
263  // 6. Advance position by 1. (This skips past U+002F (/).)
264  position.position++
265
266  // 7. Let subtype be the result of collecting a sequence of
267  // code points that are not U+003B (;) from input, given
268  // position.
269  let subtype = collectASequenceOfCodePointsFast(
270    ';',
271    input,
272    position
273  )
274
275  // 8. Remove any trailing HTTP whitespace from subtype.
276  subtype = removeHTTPWhitespace(subtype, false, true)
277
278  // 9. If subtype is the empty string or does not solely
279  // contain HTTP token code points, then return failure.
280  if (subtype.length === 0 || !HTTP_TOKEN_CODEPOINTS.test(subtype)) {
281    return 'failure'
282  }
283
284  const typeLowercase = type.toLowerCase()
285  const subtypeLowercase = subtype.toLowerCase()
286
287  // 10. Let mimeType be a new MIME type record whose type
288  // is type, in ASCII lowercase, and subtype is subtype,
289  // in ASCII lowercase.
290  // https://mimesniff.spec.whatwg.org/#mime-type
291  const mimeType = {
292    type: typeLowercase,
293    subtype: subtypeLowercase,
294    /** @type {Map<string, string>} */
295    parameters: new Map(),
296    // https://mimesniff.spec.whatwg.org/#mime-type-essence
297    essence: `${typeLowercase}/${subtypeLowercase}`
298  }
299
300  // 11. While position is not past the end of input:
301  while (position.position < input.length) {
302    // 1. Advance position by 1. (This skips past U+003B (;).)
303    position.position++
304
305    // 2. Collect a sequence of code points that are HTTP
306    // whitespace from input given position.
307    collectASequenceOfCodePoints(
308      // https://fetch.spec.whatwg.org/#http-whitespace
309      char => HTTP_WHITESPACE_REGEX.test(char),
310      input,
311      position
312    )
313
314    // 3. Let parameterName be the result of collecting a
315    // sequence of code points that are not U+003B (;)
316    // or U+003D (=) from input, given position.
317    let parameterName = collectASequenceOfCodePoints(
318      (char) => char !== ';' && char !== '=',
319      input,
320      position
321    )
322
323    // 4. Set parameterName to parameterName, in ASCII
324    // lowercase.
325    parameterName = parameterName.toLowerCase()
326
327    // 5. If position is not past the end of input, then:
328    if (position.position < input.length) {
329      // 1. If the code point at position within input is
330      // U+003B (;), then continue.
331      if (input[position.position] === ';') {
332        continue
333      }
334
335      // 2. Advance position by 1. (This skips past U+003D (=).)
336      position.position++
337    }
338
339    // 6. If position is past the end of input, then break.
340    if (position.position > input.length) {
341      break
342    }
343
344    // 7. Let parameterValue be null.
345    let parameterValue = null
346
347    // 8. If the code point at position within input is
348    // U+0022 ("), then:
349    if (input[position.position] === '"') {
350      // 1. Set parameterValue to the result of collecting
351      // an HTTP quoted string from input, given position
352      // and the extract-value flag.
353      parameterValue = collectAnHTTPQuotedString(input, position, true)
354
355      // 2. Collect a sequence of code points that are not
356      // U+003B (;) from input, given position.
357      collectASequenceOfCodePointsFast(
358        ';',
359        input,
360        position
361      )
362
363    // 9. Otherwise:
364    } else {
365      // 1. Set parameterValue to the result of collecting
366      // a sequence of code points that are not U+003B (;)
367      // from input, given position.
368      parameterValue = collectASequenceOfCodePointsFast(
369        ';',
370        input,
371        position
372      )
373
374      // 2. Remove any trailing HTTP whitespace from parameterValue.
375      parameterValue = removeHTTPWhitespace(parameterValue, false, true)
376
377      // 3. If parameterValue is the empty string, then continue.
378      if (parameterValue.length === 0) {
379        continue
380      }
381    }
382
383    // 10. If all of the following are true
384    // - parameterName is not the empty string
385    // - parameterName solely contains HTTP token code points
386    // - parameterValue solely contains HTTP quoted-string token code points
387    // - mimeType’s parameters[parameterName] does not exist
388    // then set mimeType’s parameters[parameterName] to parameterValue.
389    if (
390      parameterName.length !== 0 &&
391      HTTP_TOKEN_CODEPOINTS.test(parameterName) &&
392      (parameterValue.length === 0 || HTTP_QUOTED_STRING_TOKENS.test(parameterValue)) &&
393      !mimeType.parameters.has(parameterName)
394    ) {
395      mimeType.parameters.set(parameterName, parameterValue)
396    }
397  }
398
399  // 12. Return mimeType.
400  return mimeType
401}
402
403// https://infra.spec.whatwg.org/#forgiving-base64-decode
404/** @param {string} data */
405function forgivingBase64 (data) {
406  // 1. Remove all ASCII whitespace from data.
407  data = data.replace(/[\u0009\u000A\u000C\u000D\u0020]/g, '')  // eslint-disable-line
408
409  // 2. If data’s code point length divides by 4 leaving
410  // no remainder, then:
411  if (data.length % 4 === 0) {
412    // 1. If data ends with one or two U+003D (=) code points,
413    // then remove them from data.
414    data = data.replace(/=?=$/, '')
415  }
416
417  // 3. If data’s code point length divides by 4 leaving
418  // a remainder of 1, then return failure.
419  if (data.length % 4 === 1) {
420    return 'failure'
421  }
422
423  // 4. If data contains a code point that is not one of
424  //  U+002B (+)
425  //  U+002F (/)
426  //  ASCII alphanumeric
427  // then return failure.
428  if (/[^+/0-9A-Za-z]/.test(data)) {
429    return 'failure'
430  }
431
432  const binary = atob(data)
433  const bytes = new Uint8Array(binary.length)
434
435  for (let byte = 0; byte < binary.length; byte++) {
436    bytes[byte] = binary.charCodeAt(byte)
437  }
438
439  return bytes
440}
441
442// https://fetch.spec.whatwg.org/#collect-an-http-quoted-string
443// tests: https://fetch.spec.whatwg.org/#example-http-quoted-string
444/**
445 * @param {string} input
446 * @param {{ position: number }} position
447 * @param {boolean?} extractValue
448 */
449function collectAnHTTPQuotedString (input, position, extractValue) {
450  // 1. Let positionStart be position.
451  const positionStart = position.position
452
453  // 2. Let value be the empty string.
454  let value = ''
455
456  // 3. Assert: the code point at position within input
457  // is U+0022 (").
458  assert(input[position.position] === '"')
459
460  // 4. Advance position by 1.
461  position.position++
462
463  // 5. While true:
464  while (true) {
465    // 1. Append the result of collecting a sequence of code points
466    // that are not U+0022 (") or U+005C (\) from input, given
467    // position, to value.
468    value += collectASequenceOfCodePoints(
469      (char) => char !== '"' && char !== '\\',
470      input,
471      position
472    )
473
474    // 2. If position is past the end of input, then break.
475    if (position.position >= input.length) {
476      break
477    }
478
479    // 3. Let quoteOrBackslash be the code point at position within
480    // input.
481    const quoteOrBackslash = input[position.position]
482
483    // 4. Advance position by 1.
484    position.position++
485
486    // 5. If quoteOrBackslash is U+005C (\), then:
487    if (quoteOrBackslash === '\\') {
488      // 1. If position is past the end of input, then append
489      // U+005C (\) to value and break.
490      if (position.position >= input.length) {
491        value += '\\'
492        break
493      }
494
495      // 2. Append the code point at position within input to value.
496      value += input[position.position]
497
498      // 3. Advance position by 1.
499      position.position++
500
501    // 6. Otherwise:
502    } else {
503      // 1. Assert: quoteOrBackslash is U+0022 (").
504      assert(quoteOrBackslash === '"')
505
506      // 2. Break.
507      break
508    }
509  }
510
511  // 6. If the extract-value flag is set, then return value.
512  if (extractValue) {
513    return value
514  }
515
516  // 7. Return the code points from positionStart to position,
517  // inclusive, within input.
518  return input.slice(positionStart, position.position)
519}
520
521/**
522 * @see https://mimesniff.spec.whatwg.org/#serialize-a-mime-type
523 */
524function serializeAMimeType (mimeType) {
525  assert(mimeType !== 'failure')
526  const { parameters, essence } = mimeType
527
528  // 1. Let serialization be the concatenation of mimeType’s
529  //    type, U+002F (/), and mimeType’s subtype.
530  let serialization = essence
531
532  // 2. For each name → value of mimeType’s parameters:
533  for (let [name, value] of parameters.entries()) {
534    // 1. Append U+003B (;) to serialization.
535    serialization += ';'
536
537    // 2. Append name to serialization.
538    serialization += name
539
540    // 3. Append U+003D (=) to serialization.
541    serialization += '='
542
543    // 4. If value does not solely contain HTTP token code
544    //    points or value is the empty string, then:
545    if (!HTTP_TOKEN_CODEPOINTS.test(value)) {
546      // 1. Precede each occurence of U+0022 (") or
547      //    U+005C (\) in value with U+005C (\).
548      value = value.replace(/(\\|")/g, '\\$1')
549
550      // 2. Prepend U+0022 (") to value.
551      value = '"' + value
552
553      // 3. Append U+0022 (") to value.
554      value += '"'
555    }
556
557    // 5. Append value to serialization.
558    serialization += value
559  }
560
561  // 3. Return serialization.
562  return serialization
563}
564
565/**
566 * @see https://fetch.spec.whatwg.org/#http-whitespace
567 * @param {string} char
568 */
569function isHTTPWhiteSpace (char) {
570  return char === '\r' || char === '\n' || char === '\t' || char === ' '
571}
572
573/**
574 * @see https://fetch.spec.whatwg.org/#http-whitespace
575 * @param {string} str
576 */
577function removeHTTPWhitespace (str, leading = true, trailing = true) {
578  let lead = 0
579  let trail = str.length - 1
580
581  if (leading) {
582    for (; lead < str.length && isHTTPWhiteSpace(str[lead]); lead++);
583  }
584
585  if (trailing) {
586    for (; trail > 0 && isHTTPWhiteSpace(str[trail]); trail--);
587  }
588
589  return str.slice(lead, trail + 1)
590}
591
592/**
593 * @see https://infra.spec.whatwg.org/#ascii-whitespace
594 * @param {string} char
595 */
596function isASCIIWhitespace (char) {
597  return char === '\r' || char === '\n' || char === '\t' || char === '\f' || char === ' '
598}
599
600/**
601 * @see https://infra.spec.whatwg.org/#strip-leading-and-trailing-ascii-whitespace
602 */
603function removeASCIIWhitespace (str, leading = true, trailing = true) {
604  let lead = 0
605  let trail = str.length - 1
606
607  if (leading) {
608    for (; lead < str.length && isASCIIWhitespace(str[lead]); lead++);
609  }
610
611  if (trailing) {
612    for (; trail > 0 && isASCIIWhitespace(str[trail]); trail--);
613  }
614
615  return str.slice(lead, trail + 1)
616}
617
618module.exports = {
619  dataURLProcessor,
620  URLSerializer,
621  collectASequenceOfCodePoints,
622  collectASequenceOfCodePointsFast,
623  stringPercentDecode,
624  parseMIMEType,
625  collectAnHTTPQuotedString,
626  serializeAMimeType
627}
628