1// META: title=Encoding API: Streaming decode 2// META: global=window,worker 3// META: script=resources/encodings.js 4// META: script=/common/sab.js 5 6var string = '\x00123ABCabc\x80\xFF\u0100\u1000\uFFFD\uD800\uDC00\uDBFF\uDFFF'; 7var octets = { 8 'utf-8': [0x00,0x31,0x32,0x33,0x41,0x42,0x43,0x61,0x62,0x63,0xc2,0x80, 9 0xc3,0xbf,0xc4,0x80,0xe1,0x80,0x80,0xef,0xbf,0xbd,0xf0,0x90, 10 0x80,0x80,0xf4,0x8f,0xbf,0xbf], 11 'utf-16le': [0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42,0x00, 12 0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF,0x00, 13 0x00,0x01,0x00,0x10,0xFD,0xFF,0x00,0xD8,0x00,0xDC,0xFF,0xDB, 14 0xFF,0xDF], 15 'utf-16be': [0x00,0x00,0x00,0x31,0x00,0x32,0x00,0x33,0x00,0x41,0x00,0x42, 16 0x00,0x43,0x00,0x61,0x00,0x62,0x00,0x63,0x00,0x80,0x00,0xFF, 17 0x01,0x00,0x10,0x00,0xFF,0xFD,0xD8,0x00,0xDC,0x00,0xDB,0xFF, 18 0xDF,0xFF] 19}; 20 21["ArrayBuffer", "SharedArrayBuffer"].forEach((arrayBufferOrSharedArrayBuffer) => { 22 Object.keys(octets).forEach(function(encoding) { 23 for (var len = 1; len <= 5; ++len) { 24 test(function() { 25 var encoded = octets[encoding]; 26 27 var out = ''; 28 var decoder = new TextDecoder(encoding); 29 for (var i = 0; i < encoded.length; i += len) { 30 var sub = []; 31 for (var j = i; j < encoded.length && j < i + len; ++j) { 32 sub.push(encoded[j]); 33 } 34 var uintArray = new Uint8Array(createBuffer(arrayBufferOrSharedArrayBuffer, sub.length)); 35 uintArray.set(sub); 36 out += decoder.decode(uintArray, {stream: true}); 37 } 38 out += decoder.decode(); 39 assert_equals(out, string); 40 }, 'Streaming decode: ' + encoding + ', ' + len + ' byte window (' + arrayBufferOrSharedArrayBuffer + ')'); 41 } 42 }); 43 44 test(() => { 45 function bytes(byteArray) { 46 const view = new Uint8Array(createBuffer(arrayBufferOrSharedArrayBuffer, byteArray.length)); 47 view.set(byteArray); 48 return view; 49 } 50 51 const decoder = new TextDecoder(); 52 53 assert_equals(decoder.decode(bytes([0xC1]), {stream: true}), "\uFFFD"); 54 assert_equals(decoder.decode(), ""); 55 56 assert_equals(decoder.decode(bytes([0xF5]), {stream: true}), "\uFFFD"); 57 assert_equals(decoder.decode(), ""); 58 59 assert_equals(decoder.decode(bytes([0xE0, 0x41]), {stream: true}), "\uFFFDA"); 60 assert_equals(decoder.decode(bytes([0x42])), "B"); 61 62 assert_equals(decoder.decode(bytes([0xE0, 0x80]), {stream: true}), "\uFFFD\uFFFD"); 63 assert_equals(decoder.decode(bytes([0x80])), "\uFFFD"); 64 65 assert_equals(decoder.decode(bytes([0xED, 0xA0]), {stream: true}), "\uFFFD\uFFFD"); 66 assert_equals(decoder.decode(bytes([0x80])), "\uFFFD"); 67 68 assert_equals(decoder.decode(bytes([0xF0, 0x41]), {stream: true}), "\uFFFDA"); 69 assert_equals(decoder.decode(bytes([0x42]), {stream: true}), "B"); 70 assert_equals(decoder.decode(bytes([0x43])), "C"); 71 72 assert_equals(decoder.decode(bytes([0xF0, 0x80]), {stream: true}), "\uFFFD\uFFFD"); 73 assert_equals(decoder.decode(bytes([0x80]), {stream: true}), "\uFFFD"); 74 assert_equals(decoder.decode(bytes([0x80])), "\uFFFD"); 75 76 assert_equals(decoder.decode(bytes([0xF4, 0xA0]), {stream: true}), "\uFFFD\uFFFD"); 77 assert_equals(decoder.decode(bytes([0x80]), {stream: true}), "\uFFFD"); 78 assert_equals(decoder.decode(bytes([0x80])), "\uFFFD"); 79 80 assert_equals(decoder.decode(bytes([0xF0, 0x90, 0x41]), {stream: true}), "\uFFFDA"); 81 assert_equals(decoder.decode(bytes([0x42])), "B"); 82 83 // 4-byte UTF-8 sequences always correspond to non-BMP characters. Here 84 // we make sure that, although the first 3 bytes are enough to emit the 85 // lead surrogate, it only gets emitted when the fourth byte is read. 86 assert_equals(decoder.decode(bytes([0xF0, 0x9F, 0x92]), {stream: true}), ""); 87 assert_equals(decoder.decode(bytes([0xA9])), "\u{1F4A9}"); 88 }, `Streaming decode: UTF-8 chunk tests (${arrayBufferOrSharedArrayBuffer})`); 89}) 90