11cb0ef41Sopenharmony_ci'use strict'; 21cb0ef41Sopenharmony_ci 31cb0ef41Sopenharmony_civar Buffer = require('safer-buffer').Buffer; 41cb0ef41Sopenharmony_ci 51cb0ef41Sopenharmony_ci// == UTF32-LE/BE codec. ========================================================== 61cb0ef41Sopenharmony_ci 71cb0ef41Sopenharmony_ciexports._utf32 = Utf32Codec; 81cb0ef41Sopenharmony_ci 91cb0ef41Sopenharmony_cifunction Utf32Codec(codecOptions, iconv) { 101cb0ef41Sopenharmony_ci this.iconv = iconv; 111cb0ef41Sopenharmony_ci this.bomAware = true; 121cb0ef41Sopenharmony_ci this.isLE = codecOptions.isLE; 131cb0ef41Sopenharmony_ci} 141cb0ef41Sopenharmony_ci 151cb0ef41Sopenharmony_ciexports.utf32le = { type: '_utf32', isLE: true }; 161cb0ef41Sopenharmony_ciexports.utf32be = { type: '_utf32', isLE: false }; 171cb0ef41Sopenharmony_ci 181cb0ef41Sopenharmony_ci// Aliases 191cb0ef41Sopenharmony_ciexports.ucs4le = 'utf32le'; 201cb0ef41Sopenharmony_ciexports.ucs4be = 'utf32be'; 211cb0ef41Sopenharmony_ci 221cb0ef41Sopenharmony_ciUtf32Codec.prototype.encoder = Utf32Encoder; 231cb0ef41Sopenharmony_ciUtf32Codec.prototype.decoder = Utf32Decoder; 241cb0ef41Sopenharmony_ci 251cb0ef41Sopenharmony_ci// -- Encoding 261cb0ef41Sopenharmony_ci 271cb0ef41Sopenharmony_cifunction Utf32Encoder(options, codec) { 281cb0ef41Sopenharmony_ci this.isLE = codec.isLE; 291cb0ef41Sopenharmony_ci this.highSurrogate = 0; 301cb0ef41Sopenharmony_ci} 311cb0ef41Sopenharmony_ci 321cb0ef41Sopenharmony_ciUtf32Encoder.prototype.write = function(str) { 331cb0ef41Sopenharmony_ci var src = Buffer.from(str, 'ucs2'); 341cb0ef41Sopenharmony_ci var dst = Buffer.alloc(src.length * 2); 351cb0ef41Sopenharmony_ci var write32 = this.isLE ? dst.writeUInt32LE : dst.writeUInt32BE; 361cb0ef41Sopenharmony_ci var offset = 0; 371cb0ef41Sopenharmony_ci 381cb0ef41Sopenharmony_ci for (var i = 0; i < src.length; i += 2) { 391cb0ef41Sopenharmony_ci var code = src.readUInt16LE(i); 401cb0ef41Sopenharmony_ci var isHighSurrogate = (0xD800 <= code && code < 0xDC00); 411cb0ef41Sopenharmony_ci var isLowSurrogate = (0xDC00 <= code && code < 0xE000); 421cb0ef41Sopenharmony_ci 431cb0ef41Sopenharmony_ci if (this.highSurrogate) { 441cb0ef41Sopenharmony_ci if (isHighSurrogate || !isLowSurrogate) { 451cb0ef41Sopenharmony_ci // There shouldn't be two high surrogates in a row, nor a high surrogate which isn't followed by a low 461cb0ef41Sopenharmony_ci // surrogate. If this happens, keep the pending high surrogate as a stand-alone semi-invalid character 471cb0ef41Sopenharmony_ci // (technically wrong, but expected by some applications, like Windows file names). 481cb0ef41Sopenharmony_ci write32.call(dst, this.highSurrogate, offset); 491cb0ef41Sopenharmony_ci offset += 4; 501cb0ef41Sopenharmony_ci } 511cb0ef41Sopenharmony_ci else { 521cb0ef41Sopenharmony_ci // Create 32-bit value from high and low surrogates; 531cb0ef41Sopenharmony_ci var codepoint = (((this.highSurrogate - 0xD800) << 10) | (code - 0xDC00)) + 0x10000; 541cb0ef41Sopenharmony_ci 551cb0ef41Sopenharmony_ci write32.call(dst, codepoint, offset); 561cb0ef41Sopenharmony_ci offset += 4; 571cb0ef41Sopenharmony_ci this.highSurrogate = 0; 581cb0ef41Sopenharmony_ci 591cb0ef41Sopenharmony_ci continue; 601cb0ef41Sopenharmony_ci } 611cb0ef41Sopenharmony_ci } 621cb0ef41Sopenharmony_ci 631cb0ef41Sopenharmony_ci if (isHighSurrogate) 641cb0ef41Sopenharmony_ci this.highSurrogate = code; 651cb0ef41Sopenharmony_ci else { 661cb0ef41Sopenharmony_ci // Even if the current character is a low surrogate, with no previous high surrogate, we'll 671cb0ef41Sopenharmony_ci // encode it as a semi-invalid stand-alone character for the same reasons expressed above for 681cb0ef41Sopenharmony_ci // unpaired high surrogates. 691cb0ef41Sopenharmony_ci write32.call(dst, code, offset); 701cb0ef41Sopenharmony_ci offset += 4; 711cb0ef41Sopenharmony_ci this.highSurrogate = 0; 721cb0ef41Sopenharmony_ci } 731cb0ef41Sopenharmony_ci } 741cb0ef41Sopenharmony_ci 751cb0ef41Sopenharmony_ci if (offset < dst.length) 761cb0ef41Sopenharmony_ci dst = dst.slice(0, offset); 771cb0ef41Sopenharmony_ci 781cb0ef41Sopenharmony_ci return dst; 791cb0ef41Sopenharmony_ci}; 801cb0ef41Sopenharmony_ci 811cb0ef41Sopenharmony_ciUtf32Encoder.prototype.end = function() { 821cb0ef41Sopenharmony_ci // Treat any leftover high surrogate as a semi-valid independent character. 831cb0ef41Sopenharmony_ci if (!this.highSurrogate) 841cb0ef41Sopenharmony_ci return; 851cb0ef41Sopenharmony_ci 861cb0ef41Sopenharmony_ci var buf = Buffer.alloc(4); 871cb0ef41Sopenharmony_ci 881cb0ef41Sopenharmony_ci if (this.isLE) 891cb0ef41Sopenharmony_ci buf.writeUInt32LE(this.highSurrogate, 0); 901cb0ef41Sopenharmony_ci else 911cb0ef41Sopenharmony_ci buf.writeUInt32BE(this.highSurrogate, 0); 921cb0ef41Sopenharmony_ci 931cb0ef41Sopenharmony_ci this.highSurrogate = 0; 941cb0ef41Sopenharmony_ci 951cb0ef41Sopenharmony_ci return buf; 961cb0ef41Sopenharmony_ci}; 971cb0ef41Sopenharmony_ci 981cb0ef41Sopenharmony_ci// -- Decoding 991cb0ef41Sopenharmony_ci 1001cb0ef41Sopenharmony_cifunction Utf32Decoder(options, codec) { 1011cb0ef41Sopenharmony_ci this.isLE = codec.isLE; 1021cb0ef41Sopenharmony_ci this.badChar = codec.iconv.defaultCharUnicode.charCodeAt(0); 1031cb0ef41Sopenharmony_ci this.overflow = []; 1041cb0ef41Sopenharmony_ci} 1051cb0ef41Sopenharmony_ci 1061cb0ef41Sopenharmony_ciUtf32Decoder.prototype.write = function(src) { 1071cb0ef41Sopenharmony_ci if (src.length === 0) 1081cb0ef41Sopenharmony_ci return ''; 1091cb0ef41Sopenharmony_ci 1101cb0ef41Sopenharmony_ci var i = 0; 1111cb0ef41Sopenharmony_ci var codepoint = 0; 1121cb0ef41Sopenharmony_ci var dst = Buffer.alloc(src.length + 4); 1131cb0ef41Sopenharmony_ci var offset = 0; 1141cb0ef41Sopenharmony_ci var isLE = this.isLE; 1151cb0ef41Sopenharmony_ci var overflow = this.overflow; 1161cb0ef41Sopenharmony_ci var badChar = this.badChar; 1171cb0ef41Sopenharmony_ci 1181cb0ef41Sopenharmony_ci if (overflow.length > 0) { 1191cb0ef41Sopenharmony_ci for (; i < src.length && overflow.length < 4; i++) 1201cb0ef41Sopenharmony_ci overflow.push(src[i]); 1211cb0ef41Sopenharmony_ci 1221cb0ef41Sopenharmony_ci if (overflow.length === 4) { 1231cb0ef41Sopenharmony_ci // NOTE: codepoint is a signed int32 and can be negative. 1241cb0ef41Sopenharmony_ci // NOTE: We copied this block from below to help V8 optimize it (it works with array, not buffer). 1251cb0ef41Sopenharmony_ci if (isLE) { 1261cb0ef41Sopenharmony_ci codepoint = overflow[i] | (overflow[i+1] << 8) | (overflow[i+2] << 16) | (overflow[i+3] << 24); 1271cb0ef41Sopenharmony_ci } else { 1281cb0ef41Sopenharmony_ci codepoint = overflow[i+3] | (overflow[i+2] << 8) | (overflow[i+1] << 16) | (overflow[i] << 24); 1291cb0ef41Sopenharmony_ci } 1301cb0ef41Sopenharmony_ci overflow.length = 0; 1311cb0ef41Sopenharmony_ci 1321cb0ef41Sopenharmony_ci offset = _writeCodepoint(dst, offset, codepoint, badChar); 1331cb0ef41Sopenharmony_ci } 1341cb0ef41Sopenharmony_ci } 1351cb0ef41Sopenharmony_ci 1361cb0ef41Sopenharmony_ci // Main loop. Should be as optimized as possible. 1371cb0ef41Sopenharmony_ci for (; i < src.length - 3; i += 4) { 1381cb0ef41Sopenharmony_ci // NOTE: codepoint is a signed int32 and can be negative. 1391cb0ef41Sopenharmony_ci if (isLE) { 1401cb0ef41Sopenharmony_ci codepoint = src[i] | (src[i+1] << 8) | (src[i+2] << 16) | (src[i+3] << 24); 1411cb0ef41Sopenharmony_ci } else { 1421cb0ef41Sopenharmony_ci codepoint = src[i+3] | (src[i+2] << 8) | (src[i+1] << 16) | (src[i] << 24); 1431cb0ef41Sopenharmony_ci } 1441cb0ef41Sopenharmony_ci offset = _writeCodepoint(dst, offset, codepoint, badChar); 1451cb0ef41Sopenharmony_ci } 1461cb0ef41Sopenharmony_ci 1471cb0ef41Sopenharmony_ci // Keep overflowing bytes. 1481cb0ef41Sopenharmony_ci for (; i < src.length; i++) { 1491cb0ef41Sopenharmony_ci overflow.push(src[i]); 1501cb0ef41Sopenharmony_ci } 1511cb0ef41Sopenharmony_ci 1521cb0ef41Sopenharmony_ci return dst.slice(0, offset).toString('ucs2'); 1531cb0ef41Sopenharmony_ci}; 1541cb0ef41Sopenharmony_ci 1551cb0ef41Sopenharmony_cifunction _writeCodepoint(dst, offset, codepoint, badChar) { 1561cb0ef41Sopenharmony_ci // NOTE: codepoint is signed int32 and can be negative. We keep it that way to help V8 with optimizations. 1571cb0ef41Sopenharmony_ci if (codepoint < 0 || codepoint > 0x10FFFF) { 1581cb0ef41Sopenharmony_ci // Not a valid Unicode codepoint 1591cb0ef41Sopenharmony_ci codepoint = badChar; 1601cb0ef41Sopenharmony_ci } 1611cb0ef41Sopenharmony_ci 1621cb0ef41Sopenharmony_ci // Ephemeral Planes: Write high surrogate. 1631cb0ef41Sopenharmony_ci if (codepoint >= 0x10000) { 1641cb0ef41Sopenharmony_ci codepoint -= 0x10000; 1651cb0ef41Sopenharmony_ci 1661cb0ef41Sopenharmony_ci var high = 0xD800 | (codepoint >> 10); 1671cb0ef41Sopenharmony_ci dst[offset++] = high & 0xff; 1681cb0ef41Sopenharmony_ci dst[offset++] = high >> 8; 1691cb0ef41Sopenharmony_ci 1701cb0ef41Sopenharmony_ci // Low surrogate is written below. 1711cb0ef41Sopenharmony_ci var codepoint = 0xDC00 | (codepoint & 0x3FF); 1721cb0ef41Sopenharmony_ci } 1731cb0ef41Sopenharmony_ci 1741cb0ef41Sopenharmony_ci // Write BMP char or low surrogate. 1751cb0ef41Sopenharmony_ci dst[offset++] = codepoint & 0xff; 1761cb0ef41Sopenharmony_ci dst[offset++] = codepoint >> 8; 1771cb0ef41Sopenharmony_ci 1781cb0ef41Sopenharmony_ci return offset; 1791cb0ef41Sopenharmony_ci}; 1801cb0ef41Sopenharmony_ci 1811cb0ef41Sopenharmony_ciUtf32Decoder.prototype.end = function() { 1821cb0ef41Sopenharmony_ci this.overflow.length = 0; 1831cb0ef41Sopenharmony_ci}; 1841cb0ef41Sopenharmony_ci 1851cb0ef41Sopenharmony_ci// == UTF-32 Auto codec ============================================================= 1861cb0ef41Sopenharmony_ci// Decoder chooses automatically from UTF-32LE and UTF-32BE using BOM and space-based heuristic. 1871cb0ef41Sopenharmony_ci// Defaults to UTF-32LE. http://en.wikipedia.org/wiki/UTF-32 1881cb0ef41Sopenharmony_ci// Encoder/decoder default can be changed: iconv.decode(buf, 'utf32', {defaultEncoding: 'utf-32be'}); 1891cb0ef41Sopenharmony_ci 1901cb0ef41Sopenharmony_ci// Encoder prepends BOM (which can be overridden with (addBOM: false}). 1911cb0ef41Sopenharmony_ci 1921cb0ef41Sopenharmony_ciexports.utf32 = Utf32AutoCodec; 1931cb0ef41Sopenharmony_ciexports.ucs4 = 'utf32'; 1941cb0ef41Sopenharmony_ci 1951cb0ef41Sopenharmony_cifunction Utf32AutoCodec(options, iconv) { 1961cb0ef41Sopenharmony_ci this.iconv = iconv; 1971cb0ef41Sopenharmony_ci} 1981cb0ef41Sopenharmony_ci 1991cb0ef41Sopenharmony_ciUtf32AutoCodec.prototype.encoder = Utf32AutoEncoder; 2001cb0ef41Sopenharmony_ciUtf32AutoCodec.prototype.decoder = Utf32AutoDecoder; 2011cb0ef41Sopenharmony_ci 2021cb0ef41Sopenharmony_ci// -- Encoding 2031cb0ef41Sopenharmony_ci 2041cb0ef41Sopenharmony_cifunction Utf32AutoEncoder(options, codec) { 2051cb0ef41Sopenharmony_ci options = options || {}; 2061cb0ef41Sopenharmony_ci 2071cb0ef41Sopenharmony_ci if (options.addBOM === undefined) 2081cb0ef41Sopenharmony_ci options.addBOM = true; 2091cb0ef41Sopenharmony_ci 2101cb0ef41Sopenharmony_ci this.encoder = codec.iconv.getEncoder(options.defaultEncoding || 'utf-32le', options); 2111cb0ef41Sopenharmony_ci} 2121cb0ef41Sopenharmony_ci 2131cb0ef41Sopenharmony_ciUtf32AutoEncoder.prototype.write = function(str) { 2141cb0ef41Sopenharmony_ci return this.encoder.write(str); 2151cb0ef41Sopenharmony_ci}; 2161cb0ef41Sopenharmony_ci 2171cb0ef41Sopenharmony_ciUtf32AutoEncoder.prototype.end = function() { 2181cb0ef41Sopenharmony_ci return this.encoder.end(); 2191cb0ef41Sopenharmony_ci}; 2201cb0ef41Sopenharmony_ci 2211cb0ef41Sopenharmony_ci// -- Decoding 2221cb0ef41Sopenharmony_ci 2231cb0ef41Sopenharmony_cifunction Utf32AutoDecoder(options, codec) { 2241cb0ef41Sopenharmony_ci this.decoder = null; 2251cb0ef41Sopenharmony_ci this.initialBufs = []; 2261cb0ef41Sopenharmony_ci this.initialBufsLen = 0; 2271cb0ef41Sopenharmony_ci this.options = options || {}; 2281cb0ef41Sopenharmony_ci this.iconv = codec.iconv; 2291cb0ef41Sopenharmony_ci} 2301cb0ef41Sopenharmony_ci 2311cb0ef41Sopenharmony_ciUtf32AutoDecoder.prototype.write = function(buf) { 2321cb0ef41Sopenharmony_ci if (!this.decoder) { 2331cb0ef41Sopenharmony_ci // Codec is not chosen yet. Accumulate initial bytes. 2341cb0ef41Sopenharmony_ci this.initialBufs.push(buf); 2351cb0ef41Sopenharmony_ci this.initialBufsLen += buf.length; 2361cb0ef41Sopenharmony_ci 2371cb0ef41Sopenharmony_ci if (this.initialBufsLen < 32) // We need more bytes to use space heuristic (see below) 2381cb0ef41Sopenharmony_ci return ''; 2391cb0ef41Sopenharmony_ci 2401cb0ef41Sopenharmony_ci // We have enough bytes -> detect endianness. 2411cb0ef41Sopenharmony_ci var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding); 2421cb0ef41Sopenharmony_ci this.decoder = this.iconv.getDecoder(encoding, this.options); 2431cb0ef41Sopenharmony_ci 2441cb0ef41Sopenharmony_ci var resStr = ''; 2451cb0ef41Sopenharmony_ci for (var i = 0; i < this.initialBufs.length; i++) 2461cb0ef41Sopenharmony_ci resStr += this.decoder.write(this.initialBufs[i]); 2471cb0ef41Sopenharmony_ci 2481cb0ef41Sopenharmony_ci this.initialBufs.length = this.initialBufsLen = 0; 2491cb0ef41Sopenharmony_ci return resStr; 2501cb0ef41Sopenharmony_ci } 2511cb0ef41Sopenharmony_ci 2521cb0ef41Sopenharmony_ci return this.decoder.write(buf); 2531cb0ef41Sopenharmony_ci}; 2541cb0ef41Sopenharmony_ci 2551cb0ef41Sopenharmony_ciUtf32AutoDecoder.prototype.end = function() { 2561cb0ef41Sopenharmony_ci if (!this.decoder) { 2571cb0ef41Sopenharmony_ci var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding); 2581cb0ef41Sopenharmony_ci this.decoder = this.iconv.getDecoder(encoding, this.options); 2591cb0ef41Sopenharmony_ci 2601cb0ef41Sopenharmony_ci var resStr = ''; 2611cb0ef41Sopenharmony_ci for (var i = 0; i < this.initialBufs.length; i++) 2621cb0ef41Sopenharmony_ci resStr += this.decoder.write(this.initialBufs[i]); 2631cb0ef41Sopenharmony_ci 2641cb0ef41Sopenharmony_ci var trail = this.decoder.end(); 2651cb0ef41Sopenharmony_ci if (trail) 2661cb0ef41Sopenharmony_ci resStr += trail; 2671cb0ef41Sopenharmony_ci 2681cb0ef41Sopenharmony_ci this.initialBufs.length = this.initialBufsLen = 0; 2691cb0ef41Sopenharmony_ci return resStr; 2701cb0ef41Sopenharmony_ci } 2711cb0ef41Sopenharmony_ci 2721cb0ef41Sopenharmony_ci return this.decoder.end(); 2731cb0ef41Sopenharmony_ci}; 2741cb0ef41Sopenharmony_ci 2751cb0ef41Sopenharmony_cifunction detectEncoding(bufs, defaultEncoding) { 2761cb0ef41Sopenharmony_ci var b = []; 2771cb0ef41Sopenharmony_ci var charsProcessed = 0; 2781cb0ef41Sopenharmony_ci var invalidLE = 0, invalidBE = 0; // Number of invalid chars when decoded as LE or BE. 2791cb0ef41Sopenharmony_ci var bmpCharsLE = 0, bmpCharsBE = 0; // Number of BMP chars when decoded as LE or BE. 2801cb0ef41Sopenharmony_ci 2811cb0ef41Sopenharmony_ci outer_loop: 2821cb0ef41Sopenharmony_ci for (var i = 0; i < bufs.length; i++) { 2831cb0ef41Sopenharmony_ci var buf = bufs[i]; 2841cb0ef41Sopenharmony_ci for (var j = 0; j < buf.length; j++) { 2851cb0ef41Sopenharmony_ci b.push(buf[j]); 2861cb0ef41Sopenharmony_ci if (b.length === 4) { 2871cb0ef41Sopenharmony_ci if (charsProcessed === 0) { 2881cb0ef41Sopenharmony_ci // Check BOM first. 2891cb0ef41Sopenharmony_ci if (b[0] === 0xFF && b[1] === 0xFE && b[2] === 0 && b[3] === 0) { 2901cb0ef41Sopenharmony_ci return 'utf-32le'; 2911cb0ef41Sopenharmony_ci } 2921cb0ef41Sopenharmony_ci if (b[0] === 0 && b[1] === 0 && b[2] === 0xFE && b[3] === 0xFF) { 2931cb0ef41Sopenharmony_ci return 'utf-32be'; 2941cb0ef41Sopenharmony_ci } 2951cb0ef41Sopenharmony_ci } 2961cb0ef41Sopenharmony_ci 2971cb0ef41Sopenharmony_ci if (b[0] !== 0 || b[1] > 0x10) invalidBE++; 2981cb0ef41Sopenharmony_ci if (b[3] !== 0 || b[2] > 0x10) invalidLE++; 2991cb0ef41Sopenharmony_ci 3001cb0ef41Sopenharmony_ci if (b[0] === 0 && b[1] === 0 && (b[2] !== 0 || b[3] !== 0)) bmpCharsBE++; 3011cb0ef41Sopenharmony_ci if ((b[0] !== 0 || b[1] !== 0) && b[2] === 0 && b[3] === 0) bmpCharsLE++; 3021cb0ef41Sopenharmony_ci 3031cb0ef41Sopenharmony_ci b.length = 0; 3041cb0ef41Sopenharmony_ci charsProcessed++; 3051cb0ef41Sopenharmony_ci 3061cb0ef41Sopenharmony_ci if (charsProcessed >= 100) { 3071cb0ef41Sopenharmony_ci break outer_loop; 3081cb0ef41Sopenharmony_ci } 3091cb0ef41Sopenharmony_ci } 3101cb0ef41Sopenharmony_ci } 3111cb0ef41Sopenharmony_ci } 3121cb0ef41Sopenharmony_ci 3131cb0ef41Sopenharmony_ci // Make decisions. 3141cb0ef41Sopenharmony_ci if (bmpCharsBE - invalidBE > bmpCharsLE - invalidLE) return 'utf-32be'; 3151cb0ef41Sopenharmony_ci if (bmpCharsBE - invalidBE < bmpCharsLE - invalidLE) return 'utf-32le'; 3161cb0ef41Sopenharmony_ci 3171cb0ef41Sopenharmony_ci // Couldn't decide (likely all zeros or not enough data). 3181cb0ef41Sopenharmony_ci return defaultEncoding || 'utf-32le'; 3191cb0ef41Sopenharmony_ci} 320