1import { readFile, writeFile } from 'node:fs/promises';
2import { basename } from 'node:path';
3import { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5';
4import type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js';
5import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js';
6import { addSlashes } from 'parse5-test-utils/utils/common.js';
7
8for (const file of process.argv.slice(2)) {
9    const content = await readFile(file, 'utf8');
10    const feedbackTestContent = generateParserFeedbackTest(content);
11    const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`;
12
13    await writeFile(feedbackTestFile, feedbackTestContent);
14}
15
16function collectParserTokens(html: string): HtmlLibToken[] {
17    const tokens: HtmlLibToken[] = [];
18
19    class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> {
20        private isTopLevel = true;
21        /**
22         * We only want to add tokens once. We guard against recursive calls
23         * using the `isTopLevel` flag.
24         */
25        private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void {
26            const { isTopLevel } = this;
27            this.isTopLevel = false;
28
29            fn();
30
31            if (isTopLevel) {
32                this.isTopLevel = true;
33
34                const token = getToken();
35
36                if (token[0] === 'Character') {
37                    if (token[1] == null || token[1].length === 0) {
38                        return;
39                    }
40
41                    const lastToken = tokens[tokens.length - 1];
42
43                    if (lastToken?.[0] === 'Character') {
44                        lastToken[1] += token[1];
45                        return;
46                    }
47                }
48
49                tokens.push(token);
50            }
51        }
52
53        override onComment(token: Token.CommentToken): void {
54            this.guardTopLevel(
55                () => super.onComment(token),
56                () => ['Comment', token.data]
57            );
58        }
59        override onDoctype(token: Token.DoctypeToken): void {
60            this.guardTopLevel(
61                () => super.onDoctype(token),
62                () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks]
63            );
64        }
65        override onStartTag(token: Token.TagToken): void {
66            this.guardTopLevel(
67                () => super.onStartTag(token),
68                () => {
69                    const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value]));
70                    const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs];
71
72                    if (token.selfClosing) {
73                        startTagEntry.push(true);
74                    }
75
76                    return startTagEntry;
77                }
78            );
79        }
80        override onEndTag(token: Token.TagToken): void {
81            this.guardTopLevel(
82                () => super.onEndTag(token),
83                // NOTE: parser feedback simulator can produce adjusted SVG
84                // tag names for end tag tokens so we need to lower case it
85                () => ['EndTag', token.tagName.toLowerCase()]
86            );
87        }
88        override onCharacter(token: Token.CharacterToken): void {
89            this.guardTopLevel(
90                () => super.onCharacter(token),
91                () => ['Character', token.chars]
92            );
93        }
94        override onNullCharacter(token: Token.CharacterToken): void {
95            this.guardTopLevel(
96                () => super.onNullCharacter(token),
97                () => ['Character', token.chars]
98            );
99        }
100        override onWhitespaceCharacter(token: Token.CharacterToken): void {
101            const { skipNextNewLine } = this;
102            const { chars } = token;
103
104            this.guardTopLevel(
105                () => super.onWhitespaceCharacter(token),
106                () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars]
107            );
108        }
109    }
110
111    ExtendedParser.parse(html);
112
113    return tokens;
114}
115
116function generateParserFeedbackTest(parserTestFile: string): string {
117    const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter);
118
119    const feedbackTest = {
120        tests: tests.map(({ input, fragmentContext }) => ({
121            fragmentContext: fragmentContext?.tagName ?? null,
122            description: addSlashes(input),
123            input,
124            output: collectParserTokens(input),
125        })),
126    };
127
128    return JSON.stringify(feedbackTest, null, 4);
129}
130