193fb6ee3Sopenharmony_ciimport * as assert from 'node:assert'; 293fb6ee3Sopenharmony_ciimport * as fs from 'node:fs'; 393fb6ee3Sopenharmony_ciimport type { SAXParserOptions } from '../lib/index.js'; 493fb6ee3Sopenharmony_ciimport { SAXParser } from '../lib/index.js'; 593fb6ee3Sopenharmony_ciimport { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; 693fb6ee3Sopenharmony_ciimport { 793fb6ee3Sopenharmony_ci finished, 893fb6ee3Sopenharmony_ci getStringDiffMsg, 993fb6ee3Sopenharmony_ci writeChunkedToStream, 1093fb6ee3Sopenharmony_ci removeNewLines, 1193fb6ee3Sopenharmony_ci WritableStreamStub, 1293fb6ee3Sopenharmony_ci} from 'parse5-test-utils/utils/common.js'; 1393fb6ee3Sopenharmony_ci 1493fb6ee3Sopenharmony_cifunction sanitizeForComparison(str: string): string { 1593fb6ee3Sopenharmony_ci return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase(); 1693fb6ee3Sopenharmony_ci} 1793fb6ee3Sopenharmony_ci 1893fb6ee3Sopenharmony_cifunction createBasicTest(html: string, expected: string, options?: SAXParserOptions) { 1993fb6ee3Sopenharmony_ci return async function (): Promise<void> { 2093fb6ee3Sopenharmony_ci //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers 2193fb6ee3Sopenharmony_ci let actual = ''; 2293fb6ee3Sopenharmony_ci const parser = new SAXParser(options); 2393fb6ee3Sopenharmony_ci 2493fb6ee3Sopenharmony_ci parser.on('doctype', ({ name, publicId, systemId }) => { 2593fb6ee3Sopenharmony_ci actual += `<!DOCTYPE ${name}`; 2693fb6ee3Sopenharmony_ci 2793fb6ee3Sopenharmony_ci if (publicId !== null) { 2893fb6ee3Sopenharmony_ci actual += ` PUBLIC "${publicId}"`; 2993fb6ee3Sopenharmony_ci } else if (systemId !== null) { 3093fb6ee3Sopenharmony_ci actual += ' SYSTEM'; 3193fb6ee3Sopenharmony_ci } 3293fb6ee3Sopenharmony_ci 3393fb6ee3Sopenharmony_ci if (systemId !== null) { 3493fb6ee3Sopenharmony_ci actual += ` "${systemId}"`; 3593fb6ee3Sopenharmony_ci } 3693fb6ee3Sopenharmony_ci 3793fb6ee3Sopenharmony_ci actual += '>'; 3893fb6ee3Sopenharmony_ci }); 3993fb6ee3Sopenharmony_ci 4093fb6ee3Sopenharmony_ci parser.on('startTag', ({ tagName, attrs, selfClosing }) => { 4193fb6ee3Sopenharmony_ci actual += `<${tagName}`; 4293fb6ee3Sopenharmony_ci for (const attr of attrs) { 4393fb6ee3Sopenharmony_ci actual += ` ${attr.name}="${attr.value}"`; 4493fb6ee3Sopenharmony_ci } 4593fb6ee3Sopenharmony_ci actual += selfClosing ? '/>' : '>'; 4693fb6ee3Sopenharmony_ci }); 4793fb6ee3Sopenharmony_ci 4893fb6ee3Sopenharmony_ci parser.on('endTag', ({ tagName }) => { 4993fb6ee3Sopenharmony_ci actual += `</${tagName}>`; 5093fb6ee3Sopenharmony_ci }); 5193fb6ee3Sopenharmony_ci 5293fb6ee3Sopenharmony_ci parser.on('text', ({ text }) => { 5393fb6ee3Sopenharmony_ci actual += text; 5493fb6ee3Sopenharmony_ci }); 5593fb6ee3Sopenharmony_ci 5693fb6ee3Sopenharmony_ci parser.on('comment', ({ text }) => { 5793fb6ee3Sopenharmony_ci actual += `<!--${text}-->`; 5893fb6ee3Sopenharmony_ci }); 5993fb6ee3Sopenharmony_ci 6093fb6ee3Sopenharmony_ci writeChunkedToStream(html, parser); 6193fb6ee3Sopenharmony_ci 6293fb6ee3Sopenharmony_ci await finished(parser); 6393fb6ee3Sopenharmony_ci 6493fb6ee3Sopenharmony_ci expected = sanitizeForComparison(expected); 6593fb6ee3Sopenharmony_ci actual = sanitizeForComparison(actual); 6693fb6ee3Sopenharmony_ci 6793fb6ee3Sopenharmony_ci //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings 6893fb6ee3Sopenharmony_ci assert.ok(actual === expected, getStringDiffMsg(actual, expected)); 6993fb6ee3Sopenharmony_ci }; 7093fb6ee3Sopenharmony_ci} 7193fb6ee3Sopenharmony_ci 7293fb6ee3Sopenharmony_ciconst hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url); 7393fb6ee3Sopenharmony_ci 7493fb6ee3Sopenharmony_cidescribe('SAX parser', () => { 7593fb6ee3Sopenharmony_ci //Basic tests 7693fb6ee3Sopenharmony_ci for (const [idx, data] of loadSAXParserTestData().entries()) 7793fb6ee3Sopenharmony_ci it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected)); 7893fb6ee3Sopenharmony_ci 7993fb6ee3Sopenharmony_ci it('Piping and .stop()', async () => { 8093fb6ee3Sopenharmony_ci const parser = new SAXParser(); 8193fb6ee3Sopenharmony_ci const writable = new WritableStreamStub(); 8293fb6ee3Sopenharmony_ci let handlerCallCount = 0; 8393fb6ee3Sopenharmony_ci 8493fb6ee3Sopenharmony_ci function handler(): void { 8593fb6ee3Sopenharmony_ci handlerCallCount++; 8693fb6ee3Sopenharmony_ci 8793fb6ee3Sopenharmony_ci if (handlerCallCount === 10) { 8893fb6ee3Sopenharmony_ci parser.stop(); 8993fb6ee3Sopenharmony_ci } 9093fb6ee3Sopenharmony_ci } 9193fb6ee3Sopenharmony_ci 9293fb6ee3Sopenharmony_ci fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable); 9393fb6ee3Sopenharmony_ci 9493fb6ee3Sopenharmony_ci parser.on('startTag', handler); 9593fb6ee3Sopenharmony_ci parser.on('endTag', handler); 9693fb6ee3Sopenharmony_ci parser.on('doctype', handler); 9793fb6ee3Sopenharmony_ci parser.on('comment', handler); 9893fb6ee3Sopenharmony_ci parser.on('text', handler); 9993fb6ee3Sopenharmony_ci 10093fb6ee3Sopenharmony_ci await finished(writable); 10193fb6ee3Sopenharmony_ci 10293fb6ee3Sopenharmony_ci const expected = fs.readFileSync(hugePage).toString(); 10393fb6ee3Sopenharmony_ci 10493fb6ee3Sopenharmony_ci assert.strictEqual(handlerCallCount, 10); 10593fb6ee3Sopenharmony_ci assert.strictEqual(writable.writtenData, expected); 10693fb6ee3Sopenharmony_ci }); 10793fb6ee3Sopenharmony_ci 10893fb6ee3Sopenharmony_ci it('Parser silently exits on big files (GH-97)', () => { 10993fb6ee3Sopenharmony_ci const parser = new SAXParser(); 11093fb6ee3Sopenharmony_ci 11193fb6ee3Sopenharmony_ci fs.createReadStream(hugePage, 'utf8').pipe(parser); 11293fb6ee3Sopenharmony_ci 11393fb6ee3Sopenharmony_ci //NOTE: This is a smoke test - in case of regression it will fail with timeout. 11493fb6ee3Sopenharmony_ci return finished(parser); 11593fb6ee3Sopenharmony_ci }); 11693fb6ee3Sopenharmony_ci 11793fb6ee3Sopenharmony_ci it('Last text chunk must be flushed (GH-271)', async () => { 11893fb6ee3Sopenharmony_ci const parser = new SAXParser(); 11993fb6ee3Sopenharmony_ci let foundText = false; 12093fb6ee3Sopenharmony_ci 12193fb6ee3Sopenharmony_ci parser.on('text', ({ text }) => { 12293fb6ee3Sopenharmony_ci foundText = true; 12393fb6ee3Sopenharmony_ci assert.strictEqual(text, 'text'); 12493fb6ee3Sopenharmony_ci }); 12593fb6ee3Sopenharmony_ci 12693fb6ee3Sopenharmony_ci parser.write('text'); 12793fb6ee3Sopenharmony_ci parser.end(); 12893fb6ee3Sopenharmony_ci 12993fb6ee3Sopenharmony_ci await finished(parser); 13093fb6ee3Sopenharmony_ci 13193fb6ee3Sopenharmony_ci assert.ok(foundText); 13293fb6ee3Sopenharmony_ci }); 13393fb6ee3Sopenharmony_ci 13493fb6ee3Sopenharmony_ci it('Should not accept binary input (GH-269)', () => { 13593fb6ee3Sopenharmony_ci const stream = new SAXParser(); 13693fb6ee3Sopenharmony_ci const buf = Buffer.from('test'); 13793fb6ee3Sopenharmony_ci 13893fb6ee3Sopenharmony_ci assert.throws(() => stream.write(buf), TypeError); 13993fb6ee3Sopenharmony_ci }); 14093fb6ee3Sopenharmony_ci 14193fb6ee3Sopenharmony_ci it('Should treat NULL characters as normal text', async () => { 14293fb6ee3Sopenharmony_ci const parser = new SAXParser(); 14393fb6ee3Sopenharmony_ci let foundText = false; 14493fb6ee3Sopenharmony_ci 14593fb6ee3Sopenharmony_ci parser.on('text', ({ text }) => { 14693fb6ee3Sopenharmony_ci foundText = true; 14793fb6ee3Sopenharmony_ci assert.strictEqual(text, '\0'); 14893fb6ee3Sopenharmony_ci }); 14993fb6ee3Sopenharmony_ci 15093fb6ee3Sopenharmony_ci parser.write('\0'); 15193fb6ee3Sopenharmony_ci parser.end(); 15293fb6ee3Sopenharmony_ci 15393fb6ee3Sopenharmony_ci await finished(parser); 15493fb6ee3Sopenharmony_ci 15593fb6ee3Sopenharmony_ci assert.strictEqual(foundText, true); 15693fb6ee3Sopenharmony_ci }); 15793fb6ee3Sopenharmony_ci}); 158