11cb0ef41Sopenharmony_ci'use strict';
21cb0ef41Sopenharmony_ci
31cb0ef41Sopenharmony_civar Buffer = require('safer-buffer').Buffer;
41cb0ef41Sopenharmony_ci
51cb0ef41Sopenharmony_ci// == UTF32-LE/BE codec. ==========================================================
61cb0ef41Sopenharmony_ci
71cb0ef41Sopenharmony_ciexports._utf32 = Utf32Codec;
81cb0ef41Sopenharmony_ci
91cb0ef41Sopenharmony_cifunction Utf32Codec(codecOptions, iconv) {
101cb0ef41Sopenharmony_ci    this.iconv = iconv;
111cb0ef41Sopenharmony_ci    this.bomAware = true;
121cb0ef41Sopenharmony_ci    this.isLE = codecOptions.isLE;
131cb0ef41Sopenharmony_ci}
141cb0ef41Sopenharmony_ci
151cb0ef41Sopenharmony_ciexports.utf32le = { type: '_utf32', isLE: true };
161cb0ef41Sopenharmony_ciexports.utf32be = { type: '_utf32', isLE: false };
171cb0ef41Sopenharmony_ci
181cb0ef41Sopenharmony_ci// Aliases
191cb0ef41Sopenharmony_ciexports.ucs4le = 'utf32le';
201cb0ef41Sopenharmony_ciexports.ucs4be = 'utf32be';
211cb0ef41Sopenharmony_ci
221cb0ef41Sopenharmony_ciUtf32Codec.prototype.encoder = Utf32Encoder;
231cb0ef41Sopenharmony_ciUtf32Codec.prototype.decoder = Utf32Decoder;
241cb0ef41Sopenharmony_ci
251cb0ef41Sopenharmony_ci// -- Encoding
261cb0ef41Sopenharmony_ci
271cb0ef41Sopenharmony_cifunction Utf32Encoder(options, codec) {
281cb0ef41Sopenharmony_ci    this.isLE = codec.isLE;
291cb0ef41Sopenharmony_ci    this.highSurrogate = 0;
301cb0ef41Sopenharmony_ci}
311cb0ef41Sopenharmony_ci
321cb0ef41Sopenharmony_ciUtf32Encoder.prototype.write = function(str) {
331cb0ef41Sopenharmony_ci    var src = Buffer.from(str, 'ucs2');
341cb0ef41Sopenharmony_ci    var dst = Buffer.alloc(src.length * 2);
351cb0ef41Sopenharmony_ci    var write32 = this.isLE ? dst.writeUInt32LE : dst.writeUInt32BE;
361cb0ef41Sopenharmony_ci    var offset = 0;
371cb0ef41Sopenharmony_ci
381cb0ef41Sopenharmony_ci    for (var i = 0; i < src.length; i += 2) {
391cb0ef41Sopenharmony_ci        var code = src.readUInt16LE(i);
401cb0ef41Sopenharmony_ci        var isHighSurrogate = (0xD800 <= code && code < 0xDC00);
411cb0ef41Sopenharmony_ci        var isLowSurrogate = (0xDC00 <= code && code < 0xE000);
421cb0ef41Sopenharmony_ci
431cb0ef41Sopenharmony_ci        if (this.highSurrogate) {
441cb0ef41Sopenharmony_ci            if (isHighSurrogate || !isLowSurrogate) {
451cb0ef41Sopenharmony_ci                // There shouldn't be two high surrogates in a row, nor a high surrogate which isn't followed by a low
461cb0ef41Sopenharmony_ci                // surrogate. If this happens, keep the pending high surrogate as a stand-alone semi-invalid character
471cb0ef41Sopenharmony_ci                // (technically wrong, but expected by some applications, like Windows file names).
481cb0ef41Sopenharmony_ci                write32.call(dst, this.highSurrogate, offset);
491cb0ef41Sopenharmony_ci                offset += 4;
501cb0ef41Sopenharmony_ci            }
511cb0ef41Sopenharmony_ci            else {
521cb0ef41Sopenharmony_ci                // Create 32-bit value from high and low surrogates;
531cb0ef41Sopenharmony_ci                var codepoint = (((this.highSurrogate - 0xD800) << 10) | (code - 0xDC00)) + 0x10000;
541cb0ef41Sopenharmony_ci
551cb0ef41Sopenharmony_ci                write32.call(dst, codepoint, offset);
561cb0ef41Sopenharmony_ci                offset += 4;
571cb0ef41Sopenharmony_ci                this.highSurrogate = 0;
581cb0ef41Sopenharmony_ci
591cb0ef41Sopenharmony_ci                continue;
601cb0ef41Sopenharmony_ci            }
611cb0ef41Sopenharmony_ci        }
621cb0ef41Sopenharmony_ci
631cb0ef41Sopenharmony_ci        if (isHighSurrogate)
641cb0ef41Sopenharmony_ci            this.highSurrogate = code;
651cb0ef41Sopenharmony_ci        else {
661cb0ef41Sopenharmony_ci            // Even if the current character is a low surrogate, with no previous high surrogate, we'll
671cb0ef41Sopenharmony_ci            // encode it as a semi-invalid stand-alone character for the same reasons expressed above for
681cb0ef41Sopenharmony_ci            // unpaired high surrogates.
691cb0ef41Sopenharmony_ci            write32.call(dst, code, offset);
701cb0ef41Sopenharmony_ci            offset += 4;
711cb0ef41Sopenharmony_ci            this.highSurrogate = 0;
721cb0ef41Sopenharmony_ci        }
731cb0ef41Sopenharmony_ci    }
741cb0ef41Sopenharmony_ci
751cb0ef41Sopenharmony_ci    if (offset < dst.length)
761cb0ef41Sopenharmony_ci        dst = dst.slice(0, offset);
771cb0ef41Sopenharmony_ci
781cb0ef41Sopenharmony_ci    return dst;
791cb0ef41Sopenharmony_ci};
801cb0ef41Sopenharmony_ci
811cb0ef41Sopenharmony_ciUtf32Encoder.prototype.end = function() {
821cb0ef41Sopenharmony_ci    // Treat any leftover high surrogate as a semi-valid independent character.
831cb0ef41Sopenharmony_ci    if (!this.highSurrogate)
841cb0ef41Sopenharmony_ci        return;
851cb0ef41Sopenharmony_ci
861cb0ef41Sopenharmony_ci    var buf = Buffer.alloc(4);
871cb0ef41Sopenharmony_ci
881cb0ef41Sopenharmony_ci    if (this.isLE)
891cb0ef41Sopenharmony_ci        buf.writeUInt32LE(this.highSurrogate, 0);
901cb0ef41Sopenharmony_ci    else
911cb0ef41Sopenharmony_ci        buf.writeUInt32BE(this.highSurrogate, 0);
921cb0ef41Sopenharmony_ci
931cb0ef41Sopenharmony_ci    this.highSurrogate = 0;
941cb0ef41Sopenharmony_ci
951cb0ef41Sopenharmony_ci    return buf;
961cb0ef41Sopenharmony_ci};
971cb0ef41Sopenharmony_ci
981cb0ef41Sopenharmony_ci// -- Decoding
991cb0ef41Sopenharmony_ci
1001cb0ef41Sopenharmony_cifunction Utf32Decoder(options, codec) {
1011cb0ef41Sopenharmony_ci    this.isLE = codec.isLE;
1021cb0ef41Sopenharmony_ci    this.badChar = codec.iconv.defaultCharUnicode.charCodeAt(0);
1031cb0ef41Sopenharmony_ci    this.overflow = [];
1041cb0ef41Sopenharmony_ci}
1051cb0ef41Sopenharmony_ci
1061cb0ef41Sopenharmony_ciUtf32Decoder.prototype.write = function(src) {
1071cb0ef41Sopenharmony_ci    if (src.length === 0)
1081cb0ef41Sopenharmony_ci        return '';
1091cb0ef41Sopenharmony_ci
1101cb0ef41Sopenharmony_ci    var i = 0;
1111cb0ef41Sopenharmony_ci    var codepoint = 0;
1121cb0ef41Sopenharmony_ci    var dst = Buffer.alloc(src.length + 4);
1131cb0ef41Sopenharmony_ci    var offset = 0;
1141cb0ef41Sopenharmony_ci    var isLE = this.isLE;
1151cb0ef41Sopenharmony_ci    var overflow = this.overflow;
1161cb0ef41Sopenharmony_ci    var badChar = this.badChar;
1171cb0ef41Sopenharmony_ci
1181cb0ef41Sopenharmony_ci    if (overflow.length > 0) {
1191cb0ef41Sopenharmony_ci        for (; i < src.length && overflow.length < 4; i++)
1201cb0ef41Sopenharmony_ci            overflow.push(src[i]);
1211cb0ef41Sopenharmony_ci
1221cb0ef41Sopenharmony_ci        if (overflow.length === 4) {
1231cb0ef41Sopenharmony_ci            // NOTE: codepoint is a signed int32 and can be negative.
1241cb0ef41Sopenharmony_ci            // NOTE: We copied this block from below to help V8 optimize it (it works with array, not buffer).
1251cb0ef41Sopenharmony_ci            if (isLE) {
1261cb0ef41Sopenharmony_ci                codepoint = overflow[i] | (overflow[i+1] << 8) | (overflow[i+2] << 16) | (overflow[i+3] << 24);
1271cb0ef41Sopenharmony_ci            } else {
1281cb0ef41Sopenharmony_ci                codepoint = overflow[i+3] | (overflow[i+2] << 8) | (overflow[i+1] << 16) | (overflow[i] << 24);
1291cb0ef41Sopenharmony_ci            }
1301cb0ef41Sopenharmony_ci            overflow.length = 0;
1311cb0ef41Sopenharmony_ci
1321cb0ef41Sopenharmony_ci            offset = _writeCodepoint(dst, offset, codepoint, badChar);
1331cb0ef41Sopenharmony_ci        }
1341cb0ef41Sopenharmony_ci    }
1351cb0ef41Sopenharmony_ci
1361cb0ef41Sopenharmony_ci    // Main loop. Should be as optimized as possible.
1371cb0ef41Sopenharmony_ci    for (; i < src.length - 3; i += 4) {
1381cb0ef41Sopenharmony_ci        // NOTE: codepoint is a signed int32 and can be negative.
1391cb0ef41Sopenharmony_ci        if (isLE) {
1401cb0ef41Sopenharmony_ci            codepoint = src[i] | (src[i+1] << 8) | (src[i+2] << 16) | (src[i+3] << 24);
1411cb0ef41Sopenharmony_ci        } else {
1421cb0ef41Sopenharmony_ci            codepoint = src[i+3] | (src[i+2] << 8) | (src[i+1] << 16) | (src[i] << 24);
1431cb0ef41Sopenharmony_ci        }
1441cb0ef41Sopenharmony_ci        offset = _writeCodepoint(dst, offset, codepoint, badChar);
1451cb0ef41Sopenharmony_ci    }
1461cb0ef41Sopenharmony_ci
1471cb0ef41Sopenharmony_ci    // Keep overflowing bytes.
1481cb0ef41Sopenharmony_ci    for (; i < src.length; i++) {
1491cb0ef41Sopenharmony_ci        overflow.push(src[i]);
1501cb0ef41Sopenharmony_ci    }
1511cb0ef41Sopenharmony_ci
1521cb0ef41Sopenharmony_ci    return dst.slice(0, offset).toString('ucs2');
1531cb0ef41Sopenharmony_ci};
1541cb0ef41Sopenharmony_ci
1551cb0ef41Sopenharmony_cifunction _writeCodepoint(dst, offset, codepoint, badChar) {
1561cb0ef41Sopenharmony_ci    // NOTE: codepoint is signed int32 and can be negative. We keep it that way to help V8 with optimizations.
1571cb0ef41Sopenharmony_ci    if (codepoint < 0 || codepoint > 0x10FFFF) {
1581cb0ef41Sopenharmony_ci        // Not a valid Unicode codepoint
1591cb0ef41Sopenharmony_ci        codepoint = badChar;
1601cb0ef41Sopenharmony_ci    }
1611cb0ef41Sopenharmony_ci
1621cb0ef41Sopenharmony_ci    // Ephemeral Planes: Write high surrogate.
1631cb0ef41Sopenharmony_ci    if (codepoint >= 0x10000) {
1641cb0ef41Sopenharmony_ci        codepoint -= 0x10000;
1651cb0ef41Sopenharmony_ci
1661cb0ef41Sopenharmony_ci        var high = 0xD800 | (codepoint >> 10);
1671cb0ef41Sopenharmony_ci        dst[offset++] = high & 0xff;
1681cb0ef41Sopenharmony_ci        dst[offset++] = high >> 8;
1691cb0ef41Sopenharmony_ci
1701cb0ef41Sopenharmony_ci        // Low surrogate is written below.
1711cb0ef41Sopenharmony_ci        var codepoint = 0xDC00 | (codepoint & 0x3FF);
1721cb0ef41Sopenharmony_ci    }
1731cb0ef41Sopenharmony_ci
1741cb0ef41Sopenharmony_ci    // Write BMP char or low surrogate.
1751cb0ef41Sopenharmony_ci    dst[offset++] = codepoint & 0xff;
1761cb0ef41Sopenharmony_ci    dst[offset++] = codepoint >> 8;
1771cb0ef41Sopenharmony_ci
1781cb0ef41Sopenharmony_ci    return offset;
1791cb0ef41Sopenharmony_ci};
1801cb0ef41Sopenharmony_ci
1811cb0ef41Sopenharmony_ciUtf32Decoder.prototype.end = function() {
1821cb0ef41Sopenharmony_ci    this.overflow.length = 0;
1831cb0ef41Sopenharmony_ci};
1841cb0ef41Sopenharmony_ci
1851cb0ef41Sopenharmony_ci// == UTF-32 Auto codec =============================================================
1861cb0ef41Sopenharmony_ci// Decoder chooses automatically from UTF-32LE and UTF-32BE using BOM and space-based heuristic.
1871cb0ef41Sopenharmony_ci// Defaults to UTF-32LE. http://en.wikipedia.org/wiki/UTF-32
1881cb0ef41Sopenharmony_ci// Encoder/decoder default can be changed: iconv.decode(buf, 'utf32', {defaultEncoding: 'utf-32be'});
1891cb0ef41Sopenharmony_ci
1901cb0ef41Sopenharmony_ci// Encoder prepends BOM (which can be overridden with (addBOM: false}).
1911cb0ef41Sopenharmony_ci
1921cb0ef41Sopenharmony_ciexports.utf32 = Utf32AutoCodec;
1931cb0ef41Sopenharmony_ciexports.ucs4 = 'utf32';
1941cb0ef41Sopenharmony_ci
1951cb0ef41Sopenharmony_cifunction Utf32AutoCodec(options, iconv) {
1961cb0ef41Sopenharmony_ci    this.iconv = iconv;
1971cb0ef41Sopenharmony_ci}
1981cb0ef41Sopenharmony_ci
1991cb0ef41Sopenharmony_ciUtf32AutoCodec.prototype.encoder = Utf32AutoEncoder;
2001cb0ef41Sopenharmony_ciUtf32AutoCodec.prototype.decoder = Utf32AutoDecoder;
2011cb0ef41Sopenharmony_ci
2021cb0ef41Sopenharmony_ci// -- Encoding
2031cb0ef41Sopenharmony_ci
2041cb0ef41Sopenharmony_cifunction Utf32AutoEncoder(options, codec) {
2051cb0ef41Sopenharmony_ci    options = options || {};
2061cb0ef41Sopenharmony_ci
2071cb0ef41Sopenharmony_ci    if (options.addBOM === undefined)
2081cb0ef41Sopenharmony_ci        options.addBOM = true;
2091cb0ef41Sopenharmony_ci
2101cb0ef41Sopenharmony_ci    this.encoder = codec.iconv.getEncoder(options.defaultEncoding || 'utf-32le', options);
2111cb0ef41Sopenharmony_ci}
2121cb0ef41Sopenharmony_ci
2131cb0ef41Sopenharmony_ciUtf32AutoEncoder.prototype.write = function(str) {
2141cb0ef41Sopenharmony_ci    return this.encoder.write(str);
2151cb0ef41Sopenharmony_ci};
2161cb0ef41Sopenharmony_ci
2171cb0ef41Sopenharmony_ciUtf32AutoEncoder.prototype.end = function() {
2181cb0ef41Sopenharmony_ci    return this.encoder.end();
2191cb0ef41Sopenharmony_ci};
2201cb0ef41Sopenharmony_ci
2211cb0ef41Sopenharmony_ci// -- Decoding
2221cb0ef41Sopenharmony_ci
2231cb0ef41Sopenharmony_cifunction Utf32AutoDecoder(options, codec) {
2241cb0ef41Sopenharmony_ci    this.decoder = null;
2251cb0ef41Sopenharmony_ci    this.initialBufs = [];
2261cb0ef41Sopenharmony_ci    this.initialBufsLen = 0;
2271cb0ef41Sopenharmony_ci    this.options = options || {};
2281cb0ef41Sopenharmony_ci    this.iconv = codec.iconv;
2291cb0ef41Sopenharmony_ci}
2301cb0ef41Sopenharmony_ci
2311cb0ef41Sopenharmony_ciUtf32AutoDecoder.prototype.write = function(buf) {
2321cb0ef41Sopenharmony_ci    if (!this.decoder) {
2331cb0ef41Sopenharmony_ci        // Codec is not chosen yet. Accumulate initial bytes.
2341cb0ef41Sopenharmony_ci        this.initialBufs.push(buf);
2351cb0ef41Sopenharmony_ci        this.initialBufsLen += buf.length;
2361cb0ef41Sopenharmony_ci
2371cb0ef41Sopenharmony_ci        if (this.initialBufsLen < 32) // We need more bytes to use space heuristic (see below)
2381cb0ef41Sopenharmony_ci            return '';
2391cb0ef41Sopenharmony_ci
2401cb0ef41Sopenharmony_ci        // We have enough bytes -> detect endianness.
2411cb0ef41Sopenharmony_ci        var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
2421cb0ef41Sopenharmony_ci        this.decoder = this.iconv.getDecoder(encoding, this.options);
2431cb0ef41Sopenharmony_ci
2441cb0ef41Sopenharmony_ci        var resStr = '';
2451cb0ef41Sopenharmony_ci        for (var i = 0; i < this.initialBufs.length; i++)
2461cb0ef41Sopenharmony_ci            resStr += this.decoder.write(this.initialBufs[i]);
2471cb0ef41Sopenharmony_ci
2481cb0ef41Sopenharmony_ci        this.initialBufs.length = this.initialBufsLen = 0;
2491cb0ef41Sopenharmony_ci        return resStr;
2501cb0ef41Sopenharmony_ci    }
2511cb0ef41Sopenharmony_ci
2521cb0ef41Sopenharmony_ci    return this.decoder.write(buf);
2531cb0ef41Sopenharmony_ci};
2541cb0ef41Sopenharmony_ci
2551cb0ef41Sopenharmony_ciUtf32AutoDecoder.prototype.end = function() {
2561cb0ef41Sopenharmony_ci    if (!this.decoder) {
2571cb0ef41Sopenharmony_ci        var encoding = detectEncoding(this.initialBufs, this.options.defaultEncoding);
2581cb0ef41Sopenharmony_ci        this.decoder = this.iconv.getDecoder(encoding, this.options);
2591cb0ef41Sopenharmony_ci
2601cb0ef41Sopenharmony_ci        var resStr = '';
2611cb0ef41Sopenharmony_ci        for (var i = 0; i < this.initialBufs.length; i++)
2621cb0ef41Sopenharmony_ci            resStr += this.decoder.write(this.initialBufs[i]);
2631cb0ef41Sopenharmony_ci
2641cb0ef41Sopenharmony_ci        var trail = this.decoder.end();
2651cb0ef41Sopenharmony_ci        if (trail)
2661cb0ef41Sopenharmony_ci            resStr += trail;
2671cb0ef41Sopenharmony_ci
2681cb0ef41Sopenharmony_ci        this.initialBufs.length = this.initialBufsLen = 0;
2691cb0ef41Sopenharmony_ci        return resStr;
2701cb0ef41Sopenharmony_ci    }
2711cb0ef41Sopenharmony_ci
2721cb0ef41Sopenharmony_ci    return this.decoder.end();
2731cb0ef41Sopenharmony_ci};
2741cb0ef41Sopenharmony_ci
2751cb0ef41Sopenharmony_cifunction detectEncoding(bufs, defaultEncoding) {
2761cb0ef41Sopenharmony_ci    var b = [];
2771cb0ef41Sopenharmony_ci    var charsProcessed = 0;
2781cb0ef41Sopenharmony_ci    var invalidLE = 0, invalidBE = 0;   // Number of invalid chars when decoded as LE or BE.
2791cb0ef41Sopenharmony_ci    var bmpCharsLE = 0, bmpCharsBE = 0; // Number of BMP chars when decoded as LE or BE.
2801cb0ef41Sopenharmony_ci
2811cb0ef41Sopenharmony_ci    outer_loop:
2821cb0ef41Sopenharmony_ci    for (var i = 0; i < bufs.length; i++) {
2831cb0ef41Sopenharmony_ci        var buf = bufs[i];
2841cb0ef41Sopenharmony_ci        for (var j = 0; j < buf.length; j++) {
2851cb0ef41Sopenharmony_ci            b.push(buf[j]);
2861cb0ef41Sopenharmony_ci            if (b.length === 4) {
2871cb0ef41Sopenharmony_ci                if (charsProcessed === 0) {
2881cb0ef41Sopenharmony_ci                    // Check BOM first.
2891cb0ef41Sopenharmony_ci                    if (b[0] === 0xFF && b[1] === 0xFE && b[2] === 0 && b[3] === 0) {
2901cb0ef41Sopenharmony_ci                        return 'utf-32le';
2911cb0ef41Sopenharmony_ci                    }
2921cb0ef41Sopenharmony_ci                    if (b[0] === 0 && b[1] === 0 && b[2] === 0xFE && b[3] === 0xFF) {
2931cb0ef41Sopenharmony_ci                        return 'utf-32be';
2941cb0ef41Sopenharmony_ci                    }
2951cb0ef41Sopenharmony_ci                }
2961cb0ef41Sopenharmony_ci
2971cb0ef41Sopenharmony_ci                if (b[0] !== 0 || b[1] > 0x10) invalidBE++;
2981cb0ef41Sopenharmony_ci                if (b[3] !== 0 || b[2] > 0x10) invalidLE++;
2991cb0ef41Sopenharmony_ci
3001cb0ef41Sopenharmony_ci                if (b[0] === 0 && b[1] === 0 && (b[2] !== 0 || b[3] !== 0)) bmpCharsBE++;
3011cb0ef41Sopenharmony_ci                if ((b[0] !== 0 || b[1] !== 0) && b[2] === 0 && b[3] === 0) bmpCharsLE++;
3021cb0ef41Sopenharmony_ci
3031cb0ef41Sopenharmony_ci                b.length = 0;
3041cb0ef41Sopenharmony_ci                charsProcessed++;
3051cb0ef41Sopenharmony_ci
3061cb0ef41Sopenharmony_ci                if (charsProcessed >= 100) {
3071cb0ef41Sopenharmony_ci                    break outer_loop;
3081cb0ef41Sopenharmony_ci                }
3091cb0ef41Sopenharmony_ci            }
3101cb0ef41Sopenharmony_ci        }
3111cb0ef41Sopenharmony_ci    }
3121cb0ef41Sopenharmony_ci
3131cb0ef41Sopenharmony_ci    // Make decisions.
3141cb0ef41Sopenharmony_ci    if (bmpCharsBE - invalidBE > bmpCharsLE - invalidLE)  return 'utf-32be';
3151cb0ef41Sopenharmony_ci    if (bmpCharsBE - invalidBE < bmpCharsLE - invalidLE)  return 'utf-32le';
3161cb0ef41Sopenharmony_ci
3171cb0ef41Sopenharmony_ci    // Couldn't decide (likely all zeros or not enough data).
3181cb0ef41Sopenharmony_ci    return defaultEncoding || 'utf-32le';
3191cb0ef41Sopenharmony_ci}
320