193fb6ee3Sopenharmony_ciimport { readFile, writeFile } from 'node:fs/promises';
293fb6ee3Sopenharmony_ciimport { basename } from 'node:path';
393fb6ee3Sopenharmony_ciimport { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5';
493fb6ee3Sopenharmony_ciimport type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
593fb6ee3Sopenharmony_ciimport { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
693fb6ee3Sopenharmony_ciimport { addSlashes } from 'parse5-test-utils/utils/common.js';
793fb6ee3Sopenharmony_ci
893fb6ee3Sopenharmony_cifor (const file of process.argv.slice(2)) {
993fb6ee3Sopenharmony_ci    const content = await readFile(file, 'utf8');
1093fb6ee3Sopenharmony_ci    const feedbackTestContent = generateParserFeedbackTest(content);
1193fb6ee3Sopenharmony_ci    const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`;
1293fb6ee3Sopenharmony_ci
1393fb6ee3Sopenharmony_ci    await writeFile(feedbackTestFile, feedbackTestContent);
1493fb6ee3Sopenharmony_ci}
1593fb6ee3Sopenharmony_ci
1693fb6ee3Sopenharmony_cifunction collectParserTokens(html: string): HtmlLibToken[] {
1793fb6ee3Sopenharmony_ci    const tokens: HtmlLibToken[] = [];
1893fb6ee3Sopenharmony_ci
1993fb6ee3Sopenharmony_ci    class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
2093fb6ee3Sopenharmony_ci        private isTopLevel = true;
2193fb6ee3Sopenharmony_ci        /**
2293fb6ee3Sopenharmony_ci         * We only want to add tokens once. We guard against recursive calls
2393fb6ee3Sopenharmony_ci         * using the `isTopLevel` flag.
2493fb6ee3Sopenharmony_ci         */
2593fb6ee3Sopenharmony_ci        private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void {
2693fb6ee3Sopenharmony_ci            const { isTopLevel } = this;
2793fb6ee3Sopenharmony_ci            this.isTopLevel = false;
2893fb6ee3Sopenharmony_ci
2993fb6ee3Sopenharmony_ci            fn();
3093fb6ee3Sopenharmony_ci
3193fb6ee3Sopenharmony_ci            if (isTopLevel) {
3293fb6ee3Sopenharmony_ci                this.isTopLevel = true;
3393fb6ee3Sopenharmony_ci
3493fb6ee3Sopenharmony_ci                const token = getToken();
3593fb6ee3Sopenharmony_ci
3693fb6ee3Sopenharmony_ci                if (token[0] === 'Character') {
3793fb6ee3Sopenharmony_ci                    if (token[1] == null || token[1].length === 0) {
3893fb6ee3Sopenharmony_ci                        return;
3993fb6ee3Sopenharmony_ci                    }
4093fb6ee3Sopenharmony_ci
4193fb6ee3Sopenharmony_ci                    const lastToken = tokens[tokens.length - 1];
4293fb6ee3Sopenharmony_ci
4393fb6ee3Sopenharmony_ci                    if (lastToken?.[0] === 'Character') {
4493fb6ee3Sopenharmony_ci                        lastToken[1] += token[1];
4593fb6ee3Sopenharmony_ci                        return;
4693fb6ee3Sopenharmony_ci                    }
4793fb6ee3Sopenharmony_ci                }
4893fb6ee3Sopenharmony_ci
4993fb6ee3Sopenharmony_ci                tokens.push(token);
5093fb6ee3Sopenharmony_ci            }
5193fb6ee3Sopenharmony_ci        }
5293fb6ee3Sopenharmony_ci
5393fb6ee3Sopenharmony_ci        override onComment(token: Token.CommentToken): void {
5493fb6ee3Sopenharmony_ci            this.guardTopLevel(
5593fb6ee3Sopenharmony_ci                () => super.onComment(token),
5693fb6ee3Sopenharmony_ci                () => ['Comment', token.data]
5793fb6ee3Sopenharmony_ci            );
5893fb6ee3Sopenharmony_ci        }
5993fb6ee3Sopenharmony_ci        override onDoctype(token: Token.DoctypeToken): void {
6093fb6ee3Sopenharmony_ci            this.guardTopLevel(
6193fb6ee3Sopenharmony_ci                () => super.onDoctype(token),
6293fb6ee3Sopenharmony_ci                () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks]
6393fb6ee3Sopenharmony_ci            );
6493fb6ee3Sopenharmony_ci        }
6593fb6ee3Sopenharmony_ci        override onStartTag(token: Token.TagToken): void {
6693fb6ee3Sopenharmony_ci            this.guardTopLevel(
6793fb6ee3Sopenharmony_ci                () => super.onStartTag(token),
6893fb6ee3Sopenharmony_ci                () => {
6993fb6ee3Sopenharmony_ci                    const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
7093fb6ee3Sopenharmony_ci                    const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
7193fb6ee3Sopenharmony_ci
7293fb6ee3Sopenharmony_ci                    if (token.selfClosing) {
7393fb6ee3Sopenharmony_ci                        startTagEntry.push(true);
7493fb6ee3Sopenharmony_ci                    }
7593fb6ee3Sopenharmony_ci
7693fb6ee3Sopenharmony_ci                    return startTagEntry;
7793fb6ee3Sopenharmony_ci                }
7893fb6ee3Sopenharmony_ci            );
7993fb6ee3Sopenharmony_ci        }
8093fb6ee3Sopenharmony_ci        override onEndTag(token: Token.TagToken): void {
8193fb6ee3Sopenharmony_ci            this.guardTopLevel(
8293fb6ee3Sopenharmony_ci                () => super.onEndTag(token),
8393fb6ee3Sopenharmony_ci                // NOTE: parser feedback simulator can produce adjusted SVG
8493fb6ee3Sopenharmony_ci                // tag names for end tag tokens so we need to lower case it
8593fb6ee3Sopenharmony_ci                () => ['EndTag', token.tagName.toLowerCase()]
8693fb6ee3Sopenharmony_ci            );
8793fb6ee3Sopenharmony_ci        }
8893fb6ee3Sopenharmony_ci        override onCharacter(token: Token.CharacterToken): void {
8993fb6ee3Sopenharmony_ci            this.guardTopLevel(
9093fb6ee3Sopenharmony_ci                () => super.onCharacter(token),
9193fb6ee3Sopenharmony_ci                () => ['Character', token.chars]
9293fb6ee3Sopenharmony_ci            );
9393fb6ee3Sopenharmony_ci        }
9493fb6ee3Sopenharmony_ci        override onNullCharacter(token: Token.CharacterToken): void {
9593fb6ee3Sopenharmony_ci            this.guardTopLevel(
9693fb6ee3Sopenharmony_ci                () => super.onNullCharacter(token),
9793fb6ee3Sopenharmony_ci                () => ['Character', token.chars]
9893fb6ee3Sopenharmony_ci            );
9993fb6ee3Sopenharmony_ci        }
10093fb6ee3Sopenharmony_ci        override onWhitespaceCharacter(token: Token.CharacterToken): void {
10193fb6ee3Sopenharmony_ci            const { skipNextNewLine } = this;
10293fb6ee3Sopenharmony_ci            const { chars } = token;
10393fb6ee3Sopenharmony_ci
10493fb6ee3Sopenharmony_ci            this.guardTopLevel(
10593fb6ee3Sopenharmony_ci                () => super.onWhitespaceCharacter(token),
10693fb6ee3Sopenharmony_ci                () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars]
10793fb6ee3Sopenharmony_ci            );
10893fb6ee3Sopenharmony_ci        }
10993fb6ee3Sopenharmony_ci    }
11093fb6ee3Sopenharmony_ci
11193fb6ee3Sopenharmony_ci    ExtendedParser.parse(html);
11293fb6ee3Sopenharmony_ci
11393fb6ee3Sopenharmony_ci    return tokens;
11493fb6ee3Sopenharmony_ci}
11593fb6ee3Sopenharmony_ci
11693fb6ee3Sopenharmony_cifunction generateParserFeedbackTest(parserTestFile: string): string {
11793fb6ee3Sopenharmony_ci    const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter);
11893fb6ee3Sopenharmony_ci
11993fb6ee3Sopenharmony_ci    const feedbackTest = {
12093fb6ee3Sopenharmony_ci        tests: tests.map(({ input, fragmentContext }) => ({
12193fb6ee3Sopenharmony_ci            fragmentContext: fragmentContext?.tagName ?? null,
12293fb6ee3Sopenharmony_ci            description: addSlashes(input),
12393fb6ee3Sopenharmony_ci            input,
12493fb6ee3Sopenharmony_ci            output: collectParserTokens(input),
12593fb6ee3Sopenharmony_ci        })),
12693fb6ee3Sopenharmony_ci    };
12793fb6ee3Sopenharmony_ci
12893fb6ee3Sopenharmony_ci    return JSON.stringify(feedbackTest, null, 4);
12993fb6ee3Sopenharmony_ci}
130