193fb6ee3Sopenharmony_ciimport * as assert from 'node:assert';
293fb6ee3Sopenharmony_ciimport * as fs from 'node:fs';
393fb6ee3Sopenharmony_ciimport type { SAXParserOptions } from '../lib/index.js';
493fb6ee3Sopenharmony_ciimport { SAXParser } from '../lib/index.js';
593fb6ee3Sopenharmony_ciimport { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
693fb6ee3Sopenharmony_ciimport {
793fb6ee3Sopenharmony_ci    finished,
893fb6ee3Sopenharmony_ci    getStringDiffMsg,
993fb6ee3Sopenharmony_ci    writeChunkedToStream,
1093fb6ee3Sopenharmony_ci    removeNewLines,
1193fb6ee3Sopenharmony_ci    WritableStreamStub,
1293fb6ee3Sopenharmony_ci} from 'parse5-test-utils/utils/common.js';
1393fb6ee3Sopenharmony_ci
1493fb6ee3Sopenharmony_cifunction sanitizeForComparison(str: string): string {
1593fb6ee3Sopenharmony_ci    return removeNewLines(str).replace(/\s/g, '').replace(/'/g, '"').toLowerCase();
1693fb6ee3Sopenharmony_ci}
1793fb6ee3Sopenharmony_ci
1893fb6ee3Sopenharmony_cifunction createBasicTest(html: string, expected: string, options?: SAXParserOptions) {
1993fb6ee3Sopenharmony_ci    return async function (): Promise<void> {
2093fb6ee3Sopenharmony_ci        //NOTE: the idea of the test is to serialize back given HTML using SAXParser handlers
2193fb6ee3Sopenharmony_ci        let actual = '';
2293fb6ee3Sopenharmony_ci        const parser = new SAXParser(options);
2393fb6ee3Sopenharmony_ci
2493fb6ee3Sopenharmony_ci        parser.on('doctype', ({ name, publicId, systemId }) => {
2593fb6ee3Sopenharmony_ci            actual += `<!DOCTYPE ${name}`;
2693fb6ee3Sopenharmony_ci
2793fb6ee3Sopenharmony_ci            if (publicId !== null) {
2893fb6ee3Sopenharmony_ci                actual += ` PUBLIC "${publicId}"`;
2993fb6ee3Sopenharmony_ci            } else if (systemId !== null) {
3093fb6ee3Sopenharmony_ci                actual += ' SYSTEM';
3193fb6ee3Sopenharmony_ci            }
3293fb6ee3Sopenharmony_ci
3393fb6ee3Sopenharmony_ci            if (systemId !== null) {
3493fb6ee3Sopenharmony_ci                actual += ` "${systemId}"`;
3593fb6ee3Sopenharmony_ci            }
3693fb6ee3Sopenharmony_ci
3793fb6ee3Sopenharmony_ci            actual += '>';
3893fb6ee3Sopenharmony_ci        });
3993fb6ee3Sopenharmony_ci
4093fb6ee3Sopenharmony_ci        parser.on('startTag', ({ tagName, attrs, selfClosing }) => {
4193fb6ee3Sopenharmony_ci            actual += `<${tagName}`;
4293fb6ee3Sopenharmony_ci            for (const attr of attrs) {
4393fb6ee3Sopenharmony_ci                actual += ` ${attr.name}="${attr.value}"`;
4493fb6ee3Sopenharmony_ci            }
4593fb6ee3Sopenharmony_ci            actual += selfClosing ? '/>' : '>';
4693fb6ee3Sopenharmony_ci        });
4793fb6ee3Sopenharmony_ci
4893fb6ee3Sopenharmony_ci        parser.on('endTag', ({ tagName }) => {
4993fb6ee3Sopenharmony_ci            actual += `</${tagName}>`;
5093fb6ee3Sopenharmony_ci        });
5193fb6ee3Sopenharmony_ci
5293fb6ee3Sopenharmony_ci        parser.on('text', ({ text }) => {
5393fb6ee3Sopenharmony_ci            actual += text;
5493fb6ee3Sopenharmony_ci        });
5593fb6ee3Sopenharmony_ci
5693fb6ee3Sopenharmony_ci        parser.on('comment', ({ text }) => {
5793fb6ee3Sopenharmony_ci            actual += `<!--${text}-->`;
5893fb6ee3Sopenharmony_ci        });
5993fb6ee3Sopenharmony_ci
6093fb6ee3Sopenharmony_ci        writeChunkedToStream(html, parser);
6193fb6ee3Sopenharmony_ci
6293fb6ee3Sopenharmony_ci        await finished(parser);
6393fb6ee3Sopenharmony_ci
6493fb6ee3Sopenharmony_ci        expected = sanitizeForComparison(expected);
6593fb6ee3Sopenharmony_ci        actual = sanitizeForComparison(actual);
6693fb6ee3Sopenharmony_ci
6793fb6ee3Sopenharmony_ci        //NOTE: use ok assertion, so output will not be polluted by the whole content of the strings
6893fb6ee3Sopenharmony_ci        assert.ok(actual === expected, getStringDiffMsg(actual, expected));
6993fb6ee3Sopenharmony_ci    };
7093fb6ee3Sopenharmony_ci}
7193fb6ee3Sopenharmony_ci
7293fb6ee3Sopenharmony_ciconst hugePage = new URL('../../../test/data/huge-page/huge-page.html', import.meta.url);
7393fb6ee3Sopenharmony_ci
7493fb6ee3Sopenharmony_cidescribe('SAX parser', () => {
7593fb6ee3Sopenharmony_ci    //Basic tests
7693fb6ee3Sopenharmony_ci    for (const [idx, data] of loadSAXParserTestData().entries())
7793fb6ee3Sopenharmony_ci        it(`${idx + 1}.${data.name}`, createBasicTest(data.src, data.expected));
7893fb6ee3Sopenharmony_ci
7993fb6ee3Sopenharmony_ci    it('Piping and .stop()', async () => {
8093fb6ee3Sopenharmony_ci        const parser = new SAXParser();
8193fb6ee3Sopenharmony_ci        const writable = new WritableStreamStub();
8293fb6ee3Sopenharmony_ci        let handlerCallCount = 0;
8393fb6ee3Sopenharmony_ci
8493fb6ee3Sopenharmony_ci        function handler(): void {
8593fb6ee3Sopenharmony_ci            handlerCallCount++;
8693fb6ee3Sopenharmony_ci
8793fb6ee3Sopenharmony_ci            if (handlerCallCount === 10) {
8893fb6ee3Sopenharmony_ci                parser.stop();
8993fb6ee3Sopenharmony_ci            }
9093fb6ee3Sopenharmony_ci        }
9193fb6ee3Sopenharmony_ci
9293fb6ee3Sopenharmony_ci        fs.createReadStream(hugePage, 'utf8').pipe(parser).pipe(writable);
9393fb6ee3Sopenharmony_ci
9493fb6ee3Sopenharmony_ci        parser.on('startTag', handler);
9593fb6ee3Sopenharmony_ci        parser.on('endTag', handler);
9693fb6ee3Sopenharmony_ci        parser.on('doctype', handler);
9793fb6ee3Sopenharmony_ci        parser.on('comment', handler);
9893fb6ee3Sopenharmony_ci        parser.on('text', handler);
9993fb6ee3Sopenharmony_ci
10093fb6ee3Sopenharmony_ci        await finished(writable);
10193fb6ee3Sopenharmony_ci
10293fb6ee3Sopenharmony_ci        const expected = fs.readFileSync(hugePage).toString();
10393fb6ee3Sopenharmony_ci
10493fb6ee3Sopenharmony_ci        assert.strictEqual(handlerCallCount, 10);
10593fb6ee3Sopenharmony_ci        assert.strictEqual(writable.writtenData, expected);
10693fb6ee3Sopenharmony_ci    });
10793fb6ee3Sopenharmony_ci
10893fb6ee3Sopenharmony_ci    it('Parser silently exits on big files (GH-97)', () => {
10993fb6ee3Sopenharmony_ci        const parser = new SAXParser();
11093fb6ee3Sopenharmony_ci
11193fb6ee3Sopenharmony_ci        fs.createReadStream(hugePage, 'utf8').pipe(parser);
11293fb6ee3Sopenharmony_ci
11393fb6ee3Sopenharmony_ci        //NOTE: This is a smoke test - in case of regression it will fail with timeout.
11493fb6ee3Sopenharmony_ci        return finished(parser);
11593fb6ee3Sopenharmony_ci    });
11693fb6ee3Sopenharmony_ci
11793fb6ee3Sopenharmony_ci    it('Last text chunk must be flushed (GH-271)', async () => {
11893fb6ee3Sopenharmony_ci        const parser = new SAXParser();
11993fb6ee3Sopenharmony_ci        let foundText = false;
12093fb6ee3Sopenharmony_ci
12193fb6ee3Sopenharmony_ci        parser.on('text', ({ text }) => {
12293fb6ee3Sopenharmony_ci            foundText = true;
12393fb6ee3Sopenharmony_ci            assert.strictEqual(text, 'text');
12493fb6ee3Sopenharmony_ci        });
12593fb6ee3Sopenharmony_ci
12693fb6ee3Sopenharmony_ci        parser.write('text');
12793fb6ee3Sopenharmony_ci        parser.end();
12893fb6ee3Sopenharmony_ci
12993fb6ee3Sopenharmony_ci        await finished(parser);
13093fb6ee3Sopenharmony_ci
13193fb6ee3Sopenharmony_ci        assert.ok(foundText);
13293fb6ee3Sopenharmony_ci    });
13393fb6ee3Sopenharmony_ci
13493fb6ee3Sopenharmony_ci    it('Should not accept binary input (GH-269)', () => {
13593fb6ee3Sopenharmony_ci        const stream = new SAXParser();
13693fb6ee3Sopenharmony_ci        const buf = Buffer.from('test');
13793fb6ee3Sopenharmony_ci
13893fb6ee3Sopenharmony_ci        assert.throws(() => stream.write(buf), TypeError);
13993fb6ee3Sopenharmony_ci    });
14093fb6ee3Sopenharmony_ci
14193fb6ee3Sopenharmony_ci    it('Should treat NULL characters as normal text', async () => {
14293fb6ee3Sopenharmony_ci        const parser = new SAXParser();
14393fb6ee3Sopenharmony_ci        let foundText = false;
14493fb6ee3Sopenharmony_ci
14593fb6ee3Sopenharmony_ci        parser.on('text', ({ text }) => {
14693fb6ee3Sopenharmony_ci            foundText = true;
14793fb6ee3Sopenharmony_ci            assert.strictEqual(text, '\0');
14893fb6ee3Sopenharmony_ci        });
14993fb6ee3Sopenharmony_ci
15093fb6ee3Sopenharmony_ci        parser.write('\0');
15193fb6ee3Sopenharmony_ci        parser.end();
15293fb6ee3Sopenharmony_ci
15393fb6ee3Sopenharmony_ci        await finished(parser);
15493fb6ee3Sopenharmony_ci
15593fb6ee3Sopenharmony_ci        assert.strictEqual(foundText, true);
15693fb6ee3Sopenharmony_ci    });
15793fb6ee3Sopenharmony_ci});
158