193fb6ee3Sopenharmony_ciimport { Writable } from 'node:stream';
293fb6ee3Sopenharmony_ciimport { Parser, type ParserOptions, type TreeAdapterTypeMap, type DefaultTreeAdapterMap } from 'parse5';
393fb6ee3Sopenharmony_ci
493fb6ee3Sopenharmony_ci/* eslint-disable unicorn/consistent-function-scoping -- The rule seems to be broken here. */
593fb6ee3Sopenharmony_ci
693fb6ee3Sopenharmony_ci/**
793fb6ee3Sopenharmony_ci * Streaming HTML parser with scripting support.
893fb6ee3Sopenharmony_ci * A [writable stream](https://nodejs.org/api/stream.html#stream_class_stream_writable).
993fb6ee3Sopenharmony_ci *
1093fb6ee3Sopenharmony_ci * @example
1193fb6ee3Sopenharmony_ci *
1293fb6ee3Sopenharmony_ci * ```js
1393fb6ee3Sopenharmony_ci * const ParserStream = require('parse5-parser-stream');
1493fb6ee3Sopenharmony_ci * const http = require('http');
1593fb6ee3Sopenharmony_ci * const { finished } = require('node:stream');
1693fb6ee3Sopenharmony_ci *
1793fb6ee3Sopenharmony_ci * // Fetch the page content and obtain it's <head> node
1893fb6ee3Sopenharmony_ci * http.get('http://inikulin.github.io/parse5/', res => {
1993fb6ee3Sopenharmony_ci *     const parser = new ParserStream();
2093fb6ee3Sopenharmony_ci *
2193fb6ee3Sopenharmony_ci *     finished(parser, () => {
2293fb6ee3Sopenharmony_ci *         console.log(parser.document.childNodes[1].childNodes[0].tagName); //> 'head'
2393fb6ee3Sopenharmony_ci *     });
2493fb6ee3Sopenharmony_ci *
2593fb6ee3Sopenharmony_ci *     res.pipe(parser);
2693fb6ee3Sopenharmony_ci * });
2793fb6ee3Sopenharmony_ci * ```
2893fb6ee3Sopenharmony_ci *
2993fb6ee3Sopenharmony_ci */
3093fb6ee3Sopenharmony_ciexport class ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> extends Writable {
3193fb6ee3Sopenharmony_ci    static getFragmentStream<T extends TreeAdapterTypeMap>(
3293fb6ee3Sopenharmony_ci        fragmentContext?: T['parentNode'] | null,
3393fb6ee3Sopenharmony_ci        options?: ParserOptions<T>
3493fb6ee3Sopenharmony_ci    ): ParserStream<T> {
3593fb6ee3Sopenharmony_ci        const parser = Parser.getFragmentParser(fragmentContext, options);
3693fb6ee3Sopenharmony_ci        const stream = new ParserStream(options, parser);
3793fb6ee3Sopenharmony_ci        return stream;
3893fb6ee3Sopenharmony_ci    }
3993fb6ee3Sopenharmony_ci
4093fb6ee3Sopenharmony_ci    private lastChunkWritten = false;
4193fb6ee3Sopenharmony_ci    private writeCallback: undefined | (() => void) = undefined;
4293fb6ee3Sopenharmony_ci
4393fb6ee3Sopenharmony_ci    private pendingHtmlInsertions: string[] = [];
4493fb6ee3Sopenharmony_ci    /** The resulting document node. */
4593fb6ee3Sopenharmony_ci    public get document(): T['document'] {
4693fb6ee3Sopenharmony_ci        return this.parser.document;
4793fb6ee3Sopenharmony_ci    }
4893fb6ee3Sopenharmony_ci    public getFragment(): T['documentFragment'] {
4993fb6ee3Sopenharmony_ci        return this.parser.getFragment();
5093fb6ee3Sopenharmony_ci    }
5193fb6ee3Sopenharmony_ci
5293fb6ee3Sopenharmony_ci    /**
5393fb6ee3Sopenharmony_ci     * @param options Parsing options.
5493fb6ee3Sopenharmony_ci     */
5593fb6ee3Sopenharmony_ci    constructor(options?: ParserOptions<T>, public parser: Parser<T> = new Parser(options)) {
5693fb6ee3Sopenharmony_ci        super({ decodeStrings: false });
5793fb6ee3Sopenharmony_ci
5893fb6ee3Sopenharmony_ci        const resume = (): void => {
5993fb6ee3Sopenharmony_ci            for (let i = this.pendingHtmlInsertions.length - 1; i >= 0; i--) {
6093fb6ee3Sopenharmony_ci                this.parser.tokenizer.insertHtmlAtCurrentPos(this.pendingHtmlInsertions[i]);
6193fb6ee3Sopenharmony_ci            }
6293fb6ee3Sopenharmony_ci
6393fb6ee3Sopenharmony_ci            this.pendingHtmlInsertions.length = 0;
6493fb6ee3Sopenharmony_ci
6593fb6ee3Sopenharmony_ci            //NOTE: keep parsing if we don't wait for the next input chunk
6693fb6ee3Sopenharmony_ci            this.parser.tokenizer.resume(this.writeCallback);
6793fb6ee3Sopenharmony_ci        };
6893fb6ee3Sopenharmony_ci
6993fb6ee3Sopenharmony_ci        const documentWrite = (html: string): void => {
7093fb6ee3Sopenharmony_ci            if (!this.parser.stopped) {
7193fb6ee3Sopenharmony_ci                this.pendingHtmlInsertions.push(html);
7293fb6ee3Sopenharmony_ci            }
7393fb6ee3Sopenharmony_ci        };
7493fb6ee3Sopenharmony_ci
7593fb6ee3Sopenharmony_ci        const scriptHandler = (scriptElement: T['element']): void => {
7693fb6ee3Sopenharmony_ci            if (this.listenerCount('script') > 0) {
7793fb6ee3Sopenharmony_ci                this.parser.tokenizer.pause();
7893fb6ee3Sopenharmony_ci                this.emit('script', scriptElement, documentWrite, resume);
7993fb6ee3Sopenharmony_ci            }
8093fb6ee3Sopenharmony_ci        };
8193fb6ee3Sopenharmony_ci
8293fb6ee3Sopenharmony_ci        this.parser.scriptHandler = scriptHandler;
8393fb6ee3Sopenharmony_ci    }
8493fb6ee3Sopenharmony_ci
8593fb6ee3Sopenharmony_ci    //WritableStream implementation
8693fb6ee3Sopenharmony_ci    override _write(chunk: string, _encoding: string, callback: () => void): void {
8793fb6ee3Sopenharmony_ci        if (typeof chunk !== 'string') {
8893fb6ee3Sopenharmony_ci            throw new TypeError('Parser can work only with string streams.');
8993fb6ee3Sopenharmony_ci        }
9093fb6ee3Sopenharmony_ci
9193fb6ee3Sopenharmony_ci        this.writeCallback = callback;
9293fb6ee3Sopenharmony_ci        this.parser.tokenizer.write(chunk, this.lastChunkWritten, this.writeCallback);
9393fb6ee3Sopenharmony_ci    }
9493fb6ee3Sopenharmony_ci
9593fb6ee3Sopenharmony_ci    // TODO [engine:node@>=16]: Due to issues with Node < 16, we are overriding `end` instead of `_final`.
9693fb6ee3Sopenharmony_ci
9793fb6ee3Sopenharmony_ci    // eslint-disable-next-line @typescript-eslint/no-explicit-any
9893fb6ee3Sopenharmony_ci    override end(chunk?: any, encoding?: any, callback?: any): any {
9993fb6ee3Sopenharmony_ci        this.lastChunkWritten = true;
10093fb6ee3Sopenharmony_ci        super.end(chunk || '', encoding, callback);
10193fb6ee3Sopenharmony_ci    }
10293fb6ee3Sopenharmony_ci}
10393fb6ee3Sopenharmony_ci
10493fb6ee3Sopenharmony_ciexport interface ParserStream<T extends TreeAdapterTypeMap = DefaultTreeAdapterMap> {
10593fb6ee3Sopenharmony_ci    /**
10693fb6ee3Sopenharmony_ci     * Raised when parser encounters a `<script>` element. If this event has listeners, parsing will be suspended once
10793fb6ee3Sopenharmony_ci     * it is emitted. So, if `<script>` has the `src` attribute, you can fetch it, execute and then resume parsing just
10893fb6ee3Sopenharmony_ci     * like browsers do.
10993fb6ee3Sopenharmony_ci     *
11093fb6ee3Sopenharmony_ci     * @example
11193fb6ee3Sopenharmony_ci     *
11293fb6ee3Sopenharmony_ci     * ```js
11393fb6ee3Sopenharmony_ci     * const ParserStream = require('parse5-parser-stream');
11493fb6ee3Sopenharmony_ci     * const http = require('http');
11593fb6ee3Sopenharmony_ci     *
11693fb6ee3Sopenharmony_ci     * const parser = new ParserStream();
11793fb6ee3Sopenharmony_ci     *
11893fb6ee3Sopenharmony_ci     * parser.on('script', (scriptElement, documentWrite, resume) => {
11993fb6ee3Sopenharmony_ci     *     const src = scriptElement.attrs.find(({ name }) => name === 'src').value;
12093fb6ee3Sopenharmony_ci     *
12193fb6ee3Sopenharmony_ci     *     http.get(src, res => {
12293fb6ee3Sopenharmony_ci     *         // Fetch the script content, execute it with DOM built around `parser.document` and
12393fb6ee3Sopenharmony_ci     *         // `document.write` implemented using `documentWrite`.
12493fb6ee3Sopenharmony_ci     *         ...
12593fb6ee3Sopenharmony_ci     *         // Then resume parsing.
12693fb6ee3Sopenharmony_ci     *         resume();
12793fb6ee3Sopenharmony_ci     *     });
12893fb6ee3Sopenharmony_ci     * });
12993fb6ee3Sopenharmony_ci     *
13093fb6ee3Sopenharmony_ci     * parser.end('<script src="example.com/script.js"></script>');
13193fb6ee3Sopenharmony_ci     * ```
13293fb6ee3Sopenharmony_ci     *
13393fb6ee3Sopenharmony_ci     * @param event Name of the event
13493fb6ee3Sopenharmony_ci     * @param handler
13593fb6ee3Sopenharmony_ci     */
13693fb6ee3Sopenharmony_ci    on(
13793fb6ee3Sopenharmony_ci        event: 'script',
13893fb6ee3Sopenharmony_ci        handler: (scriptElement: T['element'], documentWrite: (html: string) => void, resume: () => void) => void
13993fb6ee3Sopenharmony_ci    ): void;
14093fb6ee3Sopenharmony_ci    /**
14193fb6ee3Sopenharmony_ci     * Base event handler.
14293fb6ee3Sopenharmony_ci     *
14393fb6ee3Sopenharmony_ci     * @param event Name of the event
14493fb6ee3Sopenharmony_ci     * @param handler Event handler
14593fb6ee3Sopenharmony_ci     */
14693fb6ee3Sopenharmony_ci    on(event: string, handler: (...args: any[]) => void): this;
14793fb6ee3Sopenharmony_ci}
148