1'use strict'; 2 3// An implementation of the WHATWG Encoding Standard 4// https://encoding.spec.whatwg.org 5 6const { 7 Boolean, 8 ObjectCreate, 9 ObjectDefineProperties, 10 ObjectGetOwnPropertyDescriptors, 11 ObjectSetPrototypeOf, 12 ObjectValues, 13 SafeMap, 14 StringPrototypeSlice, 15 Symbol, 16 SymbolToStringTag, 17 Uint32Array, 18 Uint8Array, 19} = primordials; 20 21const { 22 ERR_ENCODING_NOT_SUPPORTED, 23 ERR_INVALID_ARG_TYPE, 24 ERR_INVALID_THIS, 25 ERR_NO_ICU, 26} = require('internal/errors').codes; 27const kHandle = Symbol('handle'); 28const kFlags = Symbol('flags'); 29const kEncoding = Symbol('encoding'); 30const kDecoder = Symbol('decoder'); 31const kEncoder = Symbol('encoder'); 32const kFatal = Symbol('kFatal'); 33const kUTF8FastPath = Symbol('kUTF8FastPath'); 34const kIgnoreBOM = Symbol('kIgnoreBOM'); 35 36const { 37 getConstructorOf, 38 customInspectSymbol: inspect, 39 kEmptyObject, 40 kEnumerableProperty, 41} = require('internal/util'); 42 43const { 44 isAnyArrayBuffer, 45 isArrayBufferView, 46 isUint8Array, 47} = require('internal/util/types'); 48 49const { 50 validateString, 51 validateObject, 52} = require('internal/validators'); 53 54const { 55 encodeInto, 56 encodeUtf8String, 57 decodeUTF8, 58} = internalBinding('buffer'); 59 60let Buffer; 61function lazyBuffer() { 62 if (Buffer === undefined) 63 Buffer = require('buffer').Buffer; 64 return Buffer; 65} 66 67function validateEncoder(obj) { 68 if (obj == null || obj[kEncoder] !== true) 69 throw new ERR_INVALID_THIS('TextEncoder'); 70} 71 72function validateDecoder(obj) { 73 if (obj == null || obj[kDecoder] !== true) 74 throw new ERR_INVALID_THIS('TextDecoder'); 75} 76 77const CONVERTER_FLAGS_FLUSH = 0x1; 78const CONVERTER_FLAGS_FATAL = 0x2; 79const CONVERTER_FLAGS_IGNORE_BOM = 0x4; 80 81const empty = new Uint8Array(0); 82 83const encodings = new SafeMap([ 84 ['unicode-1-1-utf-8', 'utf-8'], 85 ['utf8', 'utf-8'], 86 ['utf-8', 'utf-8'], 87 ['866', 'ibm866'], 88 ['cp866', 'ibm866'], 89 ['csibm866', 'ibm866'], 90 ['ibm866', 'ibm866'], 91 ['csisolatin2', 'iso-8859-2'], 92 ['iso-8859-2', 'iso-8859-2'], 93 ['iso-ir-101', 'iso-8859-2'], 94 ['iso8859-2', 'iso-8859-2'], 95 ['iso88592', 'iso-8859-2'], 96 ['iso_8859-2', 'iso-8859-2'], 97 ['iso_8859-2:1987', 'iso-8859-2'], 98 ['l2', 'iso-8859-2'], 99 ['latin2', 'iso-8859-2'], 100 ['csisolatin3', 'iso-8859-3'], 101 ['iso-8859-3', 'iso-8859-3'], 102 ['iso-ir-109', 'iso-8859-3'], 103 ['iso8859-3', 'iso-8859-3'], 104 ['iso88593', 'iso-8859-3'], 105 ['iso_8859-3', 'iso-8859-3'], 106 ['iso_8859-3:1988', 'iso-8859-3'], 107 ['l3', 'iso-8859-3'], 108 ['latin3', 'iso-8859-3'], 109 ['csisolatin4', 'iso-8859-4'], 110 ['iso-8859-4', 'iso-8859-4'], 111 ['iso-ir-110', 'iso-8859-4'], 112 ['iso8859-4', 'iso-8859-4'], 113 ['iso88594', 'iso-8859-4'], 114 ['iso_8859-4', 'iso-8859-4'], 115 ['iso_8859-4:1988', 'iso-8859-4'], 116 ['l4', 'iso-8859-4'], 117 ['latin4', 'iso-8859-4'], 118 ['csisolatincyrillic', 'iso-8859-5'], 119 ['cyrillic', 'iso-8859-5'], 120 ['iso-8859-5', 'iso-8859-5'], 121 ['iso-ir-144', 'iso-8859-5'], 122 ['iso8859-5', 'iso-8859-5'], 123 ['iso88595', 'iso-8859-5'], 124 ['iso_8859-5', 'iso-8859-5'], 125 ['iso_8859-5:1988', 'iso-8859-5'], 126 ['arabic', 'iso-8859-6'], 127 ['asmo-708', 'iso-8859-6'], 128 ['csiso88596e', 'iso-8859-6'], 129 ['csiso88596i', 'iso-8859-6'], 130 ['csisolatinarabic', 'iso-8859-6'], 131 ['ecma-114', 'iso-8859-6'], 132 ['iso-8859-6', 'iso-8859-6'], 133 ['iso-8859-6-e', 'iso-8859-6'], 134 ['iso-8859-6-i', 'iso-8859-6'], 135 ['iso-ir-127', 'iso-8859-6'], 136 ['iso8859-6', 'iso-8859-6'], 137 ['iso88596', 'iso-8859-6'], 138 ['iso_8859-6', 'iso-8859-6'], 139 ['iso_8859-6:1987', 'iso-8859-6'], 140 ['csisolatingreek', 'iso-8859-7'], 141 ['ecma-118', 'iso-8859-7'], 142 ['elot_928', 'iso-8859-7'], 143 ['greek', 'iso-8859-7'], 144 ['greek8', 'iso-8859-7'], 145 ['iso-8859-7', 'iso-8859-7'], 146 ['iso-ir-126', 'iso-8859-7'], 147 ['iso8859-7', 'iso-8859-7'], 148 ['iso88597', 'iso-8859-7'], 149 ['iso_8859-7', 'iso-8859-7'], 150 ['iso_8859-7:1987', 'iso-8859-7'], 151 ['sun_eu_greek', 'iso-8859-7'], 152 ['csiso88598e', 'iso-8859-8'], 153 ['csisolatinhebrew', 'iso-8859-8'], 154 ['hebrew', 'iso-8859-8'], 155 ['iso-8859-8', 'iso-8859-8'], 156 ['iso-8859-8-e', 'iso-8859-8'], 157 ['iso-ir-138', 'iso-8859-8'], 158 ['iso8859-8', 'iso-8859-8'], 159 ['iso88598', 'iso-8859-8'], 160 ['iso_8859-8', 'iso-8859-8'], 161 ['iso_8859-8:1988', 'iso-8859-8'], 162 ['visual', 'iso-8859-8'], 163 ['csiso88598i', 'iso-8859-8-i'], 164 ['iso-8859-8-i', 'iso-8859-8-i'], 165 ['logical', 'iso-8859-8-i'], 166 ['csisolatin6', 'iso-8859-10'], 167 ['iso-8859-10', 'iso-8859-10'], 168 ['iso-ir-157', 'iso-8859-10'], 169 ['iso8859-10', 'iso-8859-10'], 170 ['iso885910', 'iso-8859-10'], 171 ['l6', 'iso-8859-10'], 172 ['latin6', 'iso-8859-10'], 173 ['iso-8859-13', 'iso-8859-13'], 174 ['iso8859-13', 'iso-8859-13'], 175 ['iso885913', 'iso-8859-13'], 176 ['iso-8859-14', 'iso-8859-14'], 177 ['iso8859-14', 'iso-8859-14'], 178 ['iso885914', 'iso-8859-14'], 179 ['csisolatin9', 'iso-8859-15'], 180 ['iso-8859-15', 'iso-8859-15'], 181 ['iso8859-15', 'iso-8859-15'], 182 ['iso885915', 'iso-8859-15'], 183 ['iso_8859-15', 'iso-8859-15'], 184 ['l9', 'iso-8859-15'], 185 ['cskoi8r', 'koi8-r'], 186 ['koi', 'koi8-r'], 187 ['koi8', 'koi8-r'], 188 ['koi8-r', 'koi8-r'], 189 ['koi8_r', 'koi8-r'], 190 ['koi8-ru', 'koi8-u'], 191 ['koi8-u', 'koi8-u'], 192 ['csmacintosh', 'macintosh'], 193 ['mac', 'macintosh'], 194 ['macintosh', 'macintosh'], 195 ['x-mac-roman', 'macintosh'], 196 ['dos-874', 'windows-874'], 197 ['iso-8859-11', 'windows-874'], 198 ['iso8859-11', 'windows-874'], 199 ['iso885911', 'windows-874'], 200 ['tis-620', 'windows-874'], 201 ['windows-874', 'windows-874'], 202 ['cp1250', 'windows-1250'], 203 ['windows-1250', 'windows-1250'], 204 ['x-cp1250', 'windows-1250'], 205 ['cp1251', 'windows-1251'], 206 ['windows-1251', 'windows-1251'], 207 ['x-cp1251', 'windows-1251'], 208 ['ansi_x3.4-1968', 'windows-1252'], 209 ['ascii', 'windows-1252'], 210 ['cp1252', 'windows-1252'], 211 ['cp819', 'windows-1252'], 212 ['csisolatin1', 'windows-1252'], 213 ['ibm819', 'windows-1252'], 214 ['iso-8859-1', 'windows-1252'], 215 ['iso-ir-100', 'windows-1252'], 216 ['iso8859-1', 'windows-1252'], 217 ['iso88591', 'windows-1252'], 218 ['iso_8859-1', 'windows-1252'], 219 ['iso_8859-1:1987', 'windows-1252'], 220 ['l1', 'windows-1252'], 221 ['latin1', 'windows-1252'], 222 ['us-ascii', 'windows-1252'], 223 ['windows-1252', 'windows-1252'], 224 ['x-cp1252', 'windows-1252'], 225 ['cp1253', 'windows-1253'], 226 ['windows-1253', 'windows-1253'], 227 ['x-cp1253', 'windows-1253'], 228 ['cp1254', 'windows-1254'], 229 ['csisolatin5', 'windows-1254'], 230 ['iso-8859-9', 'windows-1254'], 231 ['iso-ir-148', 'windows-1254'], 232 ['iso8859-9', 'windows-1254'], 233 ['iso88599', 'windows-1254'], 234 ['iso_8859-9', 'windows-1254'], 235 ['iso_8859-9:1989', 'windows-1254'], 236 ['l5', 'windows-1254'], 237 ['latin5', 'windows-1254'], 238 ['windows-1254', 'windows-1254'], 239 ['x-cp1254', 'windows-1254'], 240 ['cp1255', 'windows-1255'], 241 ['windows-1255', 'windows-1255'], 242 ['x-cp1255', 'windows-1255'], 243 ['cp1256', 'windows-1256'], 244 ['windows-1256', 'windows-1256'], 245 ['x-cp1256', 'windows-1256'], 246 ['cp1257', 'windows-1257'], 247 ['windows-1257', 'windows-1257'], 248 ['x-cp1257', 'windows-1257'], 249 ['cp1258', 'windows-1258'], 250 ['windows-1258', 'windows-1258'], 251 ['x-cp1258', 'windows-1258'], 252 ['x-mac-cyrillic', 'x-mac-cyrillic'], 253 ['x-mac-ukrainian', 'x-mac-cyrillic'], 254 ['chinese', 'gbk'], 255 ['csgb2312', 'gbk'], 256 ['csiso58gb231280', 'gbk'], 257 ['gb2312', 'gbk'], 258 ['gb_2312', 'gbk'], 259 ['gb_2312-80', 'gbk'], 260 ['gbk', 'gbk'], 261 ['iso-ir-58', 'gbk'], 262 ['x-gbk', 'gbk'], 263 ['gb18030', 'gb18030'], 264 ['big5', 'big5'], 265 ['big5-hkscs', 'big5'], 266 ['cn-big5', 'big5'], 267 ['csbig5', 'big5'], 268 ['x-x-big5', 'big5'], 269 ['cseucpkdfmtjapanese', 'euc-jp'], 270 ['euc-jp', 'euc-jp'], 271 ['x-euc-jp', 'euc-jp'], 272 ['csiso2022jp', 'iso-2022-jp'], 273 ['iso-2022-jp', 'iso-2022-jp'], 274 ['csshiftjis', 'shift_jis'], 275 ['ms932', 'shift_jis'], 276 ['ms_kanji', 'shift_jis'], 277 ['shift-jis', 'shift_jis'], 278 ['shift_jis', 'shift_jis'], 279 ['sjis', 'shift_jis'], 280 ['windows-31j', 'shift_jis'], 281 ['x-sjis', 'shift_jis'], 282 ['cseuckr', 'euc-kr'], 283 ['csksc56011987', 'euc-kr'], 284 ['euc-kr', 'euc-kr'], 285 ['iso-ir-149', 'euc-kr'], 286 ['korean', 'euc-kr'], 287 ['ks_c_5601-1987', 'euc-kr'], 288 ['ks_c_5601-1989', 'euc-kr'], 289 ['ksc5601', 'euc-kr'], 290 ['ksc_5601', 'euc-kr'], 291 ['windows-949', 'euc-kr'], 292 ['utf-16be', 'utf-16be'], 293 ['utf-16le', 'utf-16le'], 294 ['utf-16', 'utf-16le'], 295]); 296 297// Unfortunately, String.prototype.trim also removes non-ascii whitespace, 298// so we have to do this manually 299function trimAsciiWhitespace(label) { 300 let s = 0; 301 let e = label.length; 302 while (s < e && ( 303 label[s] === '\u0009' || 304 label[s] === '\u000a' || 305 label[s] === '\u000c' || 306 label[s] === '\u000d' || 307 label[s] === '\u0020')) { 308 s++; 309 } 310 while (e > s && ( 311 label[e - 1] === '\u0009' || 312 label[e - 1] === '\u000a' || 313 label[e - 1] === '\u000c' || 314 label[e - 1] === '\u000d' || 315 label[e - 1] === '\u0020')) { 316 e--; 317 } 318 return StringPrototypeSlice(label, s, e); 319} 320 321function getEncodingFromLabel(label) { 322 const enc = encodings.get(label); 323 if (enc !== undefined) return enc; 324 return encodings.get(trimAsciiWhitespace(label.toLowerCase())); 325} 326 327const encodeIntoResults = new Uint32Array(2); 328 329class TextEncoder { 330 constructor() { 331 this[kEncoder] = true; 332 } 333 334 get encoding() { 335 validateEncoder(this); 336 return 'utf-8'; 337 } 338 339 encode(input = '') { 340 validateEncoder(this); 341 return encodeUtf8String(`${input}`); 342 } 343 344 encodeInto(src, dest) { 345 validateEncoder(this); 346 validateString(src, 'src'); 347 if (!dest || !isUint8Array(dest)) 348 throw new ERR_INVALID_ARG_TYPE('dest', 'Uint8Array', dest); 349 encodeInto(src, dest, encodeIntoResults); 350 return { read: encodeIntoResults[0], written: encodeIntoResults[1] }; 351 } 352 353 [inspect](depth, opts) { 354 validateEncoder(this); 355 if (typeof depth === 'number' && depth < 0) 356 return this; 357 const ctor = getConstructorOf(this); 358 const obj = ObjectCreate({ 359 constructor: ctor === null ? TextEncoder : ctor, 360 }); 361 obj.encoding = this.encoding; 362 // Lazy to avoid circular dependency 363 return require('internal/util/inspect').inspect(obj, opts); 364 } 365} 366 367ObjectDefineProperties( 368 TextEncoder.prototype, { 369 'encode': kEnumerableProperty, 370 'encodeInto': kEnumerableProperty, 371 'encoding': kEnumerableProperty, 372 [SymbolToStringTag]: { __proto__: null, configurable: true, value: 'TextEncoder' }, 373 }); 374 375const TextDecoder = 376 internalBinding('config').hasIntl ? 377 makeTextDecoderICU() : 378 makeTextDecoderJS(); 379 380function makeTextDecoderICU() { 381 const { 382 decode: _decode, 383 getConverter, 384 } = internalBinding('icu'); 385 386 class TextDecoder { 387 constructor(encoding = 'utf-8', options = kEmptyObject) { 388 encoding = `${encoding}`; 389 validateObject(options, 'options', { 390 nullable: true, 391 allowArray: true, 392 allowFunction: true, 393 }); 394 395 const enc = getEncodingFromLabel(encoding); 396 if (enc === undefined) 397 throw new ERR_ENCODING_NOT_SUPPORTED(encoding); 398 399 let flags = 0; 400 if (options !== null) { 401 flags |= options.fatal ? CONVERTER_FLAGS_FATAL : 0; 402 flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; 403 } 404 405 this[kDecoder] = true; 406 this[kFlags] = flags; 407 this[kEncoding] = enc; 408 this[kIgnoreBOM] = Boolean(options?.ignoreBOM); 409 this[kFatal] = Boolean(options?.fatal); 410 // Only support fast path for UTF-8. 411 this[kUTF8FastPath] = enc === 'utf-8'; 412 this[kHandle] = undefined; 413 414 if (!this[kUTF8FastPath]) { 415 this.#prepareConverter(); 416 } 417 } 418 419 #prepareConverter() { 420 if (this[kHandle] !== undefined) return; 421 const handle = getConverter(this[kEncoding], this[kFlags]); 422 if (handle === undefined) 423 throw new ERR_ENCODING_NOT_SUPPORTED(this[kEncoding]); 424 this[kHandle] = handle; 425 } 426 427 decode(input = empty, options = kEmptyObject) { 428 validateDecoder(this); 429 430 this[kUTF8FastPath] &&= !(options?.stream); 431 432 if (this[kUTF8FastPath]) { 433 return decodeUTF8(input, this[kIgnoreBOM], this[kFatal]); 434 } 435 436 this.#prepareConverter(); 437 438 validateObject(options, 'options', { 439 nullable: true, 440 allowArray: true, 441 allowFunction: true, 442 }); 443 444 let flags = 0; 445 if (options !== null) 446 flags |= options.stream ? 0 : CONVERTER_FLAGS_FLUSH; 447 448 return _decode(this[kHandle], input, flags, this.encoding); 449 } 450 } 451 452 return TextDecoder; 453} 454 455function makeTextDecoderJS() { 456 let StringDecoder; 457 function lazyStringDecoder() { 458 if (StringDecoder === undefined) 459 ({ StringDecoder } = require('string_decoder')); 460 return StringDecoder; 461 } 462 463 const kBOMSeen = Symbol('BOM seen'); 464 465 function hasConverter(encoding) { 466 return encoding === 'utf-8' || encoding === 'utf-16le'; 467 } 468 469 class TextDecoder { 470 constructor(encoding = 'utf-8', options = kEmptyObject) { 471 encoding = `${encoding}`; 472 validateObject(options, 'options', { 473 nullable: true, 474 allowArray: true, 475 allowFunction: true, 476 }); 477 478 const enc = getEncodingFromLabel(encoding); 479 if (enc === undefined || !hasConverter(enc)) 480 throw new ERR_ENCODING_NOT_SUPPORTED(encoding); 481 482 let flags = 0; 483 if (options !== null) { 484 if (options.fatal) { 485 throw new ERR_NO_ICU('"fatal" option'); 486 } 487 flags |= options.ignoreBOM ? CONVERTER_FLAGS_IGNORE_BOM : 0; 488 } 489 490 this[kDecoder] = true; 491 // StringDecoder will normalize WHATWG encoding to Node.js encoding. 492 this[kHandle] = new (lazyStringDecoder())(enc); 493 this[kFlags] = flags; 494 this[kEncoding] = enc; 495 this[kBOMSeen] = false; 496 } 497 498 decode(input = empty, options = kEmptyObject) { 499 validateDecoder(this); 500 if (isAnyArrayBuffer(input)) { 501 try { 502 input = lazyBuffer().from(input); 503 } catch { 504 input = empty; 505 } 506 } else if (isArrayBufferView(input)) { 507 try { 508 input = lazyBuffer().from(input.buffer, input.byteOffset, 509 input.byteLength); 510 } catch { 511 input = empty; 512 } 513 } else { 514 throw new ERR_INVALID_ARG_TYPE('input', 515 ['ArrayBuffer', 'ArrayBufferView'], 516 input); 517 } 518 validateObject(options, 'options', { 519 nullable: true, 520 allowArray: true, 521 allowFunction: true, 522 }); 523 524 if (this[kFlags] & CONVERTER_FLAGS_FLUSH) { 525 this[kBOMSeen] = false; 526 } 527 528 if (options !== null && options.stream) { 529 this[kFlags] &= ~CONVERTER_FLAGS_FLUSH; 530 } else { 531 this[kFlags] |= CONVERTER_FLAGS_FLUSH; 532 } 533 534 let result = this[kFlags] & CONVERTER_FLAGS_FLUSH ? 535 this[kHandle].end(input) : 536 this[kHandle].write(input); 537 538 if (result.length > 0 && 539 !this[kBOMSeen] && 540 !(this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM)) { 541 // If the very first result in the stream is a BOM, and we are not 542 // explicitly told to ignore it, then we discard it. 543 if (result[0] === '\ufeff') { 544 result = StringPrototypeSlice(result, 1); 545 } 546 this[kBOMSeen] = true; 547 } 548 549 return result; 550 } 551 } 552 553 return TextDecoder; 554} 555 556// Mix in some shared properties. 557const sharedProperties = ObjectGetOwnPropertyDescriptors({ 558 get encoding() { 559 validateDecoder(this); 560 return this[kEncoding]; 561 }, 562 563 get fatal() { 564 validateDecoder(this); 565 return (this[kFlags] & CONVERTER_FLAGS_FATAL) === CONVERTER_FLAGS_FATAL; 566 }, 567 568 get ignoreBOM() { 569 validateDecoder(this); 570 return (this[kFlags] & CONVERTER_FLAGS_IGNORE_BOM) === 571 CONVERTER_FLAGS_IGNORE_BOM; 572 }, 573 574 [inspect](depth, opts) { 575 validateDecoder(this); 576 if (typeof depth === 'number' && depth < 0) 577 return this; 578 const constructor = getConstructorOf(this) || TextDecoder; 579 const obj = ObjectCreate({ constructor }); 580 obj.encoding = this.encoding; 581 obj.fatal = this.fatal; 582 obj.ignoreBOM = this.ignoreBOM; 583 if (opts.showHidden) { 584 obj[kFlags] = this[kFlags]; 585 obj[kHandle] = this[kHandle]; 586 } 587 // Lazy to avoid circular dependency 588 const { inspect } = require('internal/util/inspect'); 589 return `${constructor.name} ${inspect(obj)}`; 590 }, 591}); 592const propertiesValues = ObjectValues(sharedProperties); 593for (let i = 0; i < propertiesValues.length; i++) { 594 // We want to use null-prototype objects to not rely on globally mutable 595 // %Object.prototype%. 596 ObjectSetPrototypeOf(propertiesValues[i], null); 597} 598sharedProperties[inspect].enumerable = false; 599 600ObjectDefineProperties(TextDecoder.prototype, { 601 decode: kEnumerableProperty, 602 ...sharedProperties, 603 [SymbolToStringTag]: { 604 __proto__: null, 605 configurable: true, 606 value: 'TextDecoder', 607 }, 608}); 609 610module.exports = { 611 getEncodingFromLabel, 612 TextDecoder, 613 TextEncoder, 614}; 615