193fb6ee3Sopenharmony_ciimport { readFile, writeFile } from 'node:fs/promises'; 293fb6ee3Sopenharmony_ciimport { basename } from 'node:path'; 393fb6ee3Sopenharmony_ciimport { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5'; 493fb6ee3Sopenharmony_ciimport type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js'; 593fb6ee3Sopenharmony_ciimport { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js'; 693fb6ee3Sopenharmony_ciimport { addSlashes } from 'parse5-test-utils/utils/common.js'; 793fb6ee3Sopenharmony_ci 893fb6ee3Sopenharmony_cifor (const file of process.argv.slice(2)) { 993fb6ee3Sopenharmony_ci const content = await readFile(file, 'utf8'); 1093fb6ee3Sopenharmony_ci const feedbackTestContent = generateParserFeedbackTest(content); 1193fb6ee3Sopenharmony_ci const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`; 1293fb6ee3Sopenharmony_ci 1393fb6ee3Sopenharmony_ci await writeFile(feedbackTestFile, feedbackTestContent); 1493fb6ee3Sopenharmony_ci} 1593fb6ee3Sopenharmony_ci 1693fb6ee3Sopenharmony_cifunction collectParserTokens(html: string): HtmlLibToken[] { 1793fb6ee3Sopenharmony_ci const tokens: HtmlLibToken[] = []; 1893fb6ee3Sopenharmony_ci 1993fb6ee3Sopenharmony_ci class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> { 2093fb6ee3Sopenharmony_ci private isTopLevel = true; 2193fb6ee3Sopenharmony_ci /** 2293fb6ee3Sopenharmony_ci * We only want to add tokens once. We guard against recursive calls 2393fb6ee3Sopenharmony_ci * using the `isTopLevel` flag. 2493fb6ee3Sopenharmony_ci */ 2593fb6ee3Sopenharmony_ci private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void { 2693fb6ee3Sopenharmony_ci const { isTopLevel } = this; 2793fb6ee3Sopenharmony_ci this.isTopLevel = false; 2893fb6ee3Sopenharmony_ci 2993fb6ee3Sopenharmony_ci fn(); 3093fb6ee3Sopenharmony_ci 3193fb6ee3Sopenharmony_ci if (isTopLevel) { 3293fb6ee3Sopenharmony_ci this.isTopLevel = true; 3393fb6ee3Sopenharmony_ci 3493fb6ee3Sopenharmony_ci const token = getToken(); 3593fb6ee3Sopenharmony_ci 3693fb6ee3Sopenharmony_ci if (token[0] === 'Character') { 3793fb6ee3Sopenharmony_ci if (token[1] == null || token[1].length === 0) { 3893fb6ee3Sopenharmony_ci return; 3993fb6ee3Sopenharmony_ci } 4093fb6ee3Sopenharmony_ci 4193fb6ee3Sopenharmony_ci const lastToken = tokens[tokens.length - 1]; 4293fb6ee3Sopenharmony_ci 4393fb6ee3Sopenharmony_ci if (lastToken?.[0] === 'Character') { 4493fb6ee3Sopenharmony_ci lastToken[1] += token[1]; 4593fb6ee3Sopenharmony_ci return; 4693fb6ee3Sopenharmony_ci } 4793fb6ee3Sopenharmony_ci } 4893fb6ee3Sopenharmony_ci 4993fb6ee3Sopenharmony_ci tokens.push(token); 5093fb6ee3Sopenharmony_ci } 5193fb6ee3Sopenharmony_ci } 5293fb6ee3Sopenharmony_ci 5393fb6ee3Sopenharmony_ci override onComment(token: Token.CommentToken): void { 5493fb6ee3Sopenharmony_ci this.guardTopLevel( 5593fb6ee3Sopenharmony_ci () => super.onComment(token), 5693fb6ee3Sopenharmony_ci () => ['Comment', token.data] 5793fb6ee3Sopenharmony_ci ); 5893fb6ee3Sopenharmony_ci } 5993fb6ee3Sopenharmony_ci override onDoctype(token: Token.DoctypeToken): void { 6093fb6ee3Sopenharmony_ci this.guardTopLevel( 6193fb6ee3Sopenharmony_ci () => super.onDoctype(token), 6293fb6ee3Sopenharmony_ci () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks] 6393fb6ee3Sopenharmony_ci ); 6493fb6ee3Sopenharmony_ci } 6593fb6ee3Sopenharmony_ci override onStartTag(token: Token.TagToken): void { 6693fb6ee3Sopenharmony_ci this.guardTopLevel( 6793fb6ee3Sopenharmony_ci () => super.onStartTag(token), 6893fb6ee3Sopenharmony_ci () => { 6993fb6ee3Sopenharmony_ci const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value])); 7093fb6ee3Sopenharmony_ci const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs]; 7193fb6ee3Sopenharmony_ci 7293fb6ee3Sopenharmony_ci if (token.selfClosing) { 7393fb6ee3Sopenharmony_ci startTagEntry.push(true); 7493fb6ee3Sopenharmony_ci } 7593fb6ee3Sopenharmony_ci 7693fb6ee3Sopenharmony_ci return startTagEntry; 7793fb6ee3Sopenharmony_ci } 7893fb6ee3Sopenharmony_ci ); 7993fb6ee3Sopenharmony_ci } 8093fb6ee3Sopenharmony_ci override onEndTag(token: Token.TagToken): void { 8193fb6ee3Sopenharmony_ci this.guardTopLevel( 8293fb6ee3Sopenharmony_ci () => super.onEndTag(token), 8393fb6ee3Sopenharmony_ci // NOTE: parser feedback simulator can produce adjusted SVG 8493fb6ee3Sopenharmony_ci // tag names for end tag tokens so we need to lower case it 8593fb6ee3Sopenharmony_ci () => ['EndTag', token.tagName.toLowerCase()] 8693fb6ee3Sopenharmony_ci ); 8793fb6ee3Sopenharmony_ci } 8893fb6ee3Sopenharmony_ci override onCharacter(token: Token.CharacterToken): void { 8993fb6ee3Sopenharmony_ci this.guardTopLevel( 9093fb6ee3Sopenharmony_ci () => super.onCharacter(token), 9193fb6ee3Sopenharmony_ci () => ['Character', token.chars] 9293fb6ee3Sopenharmony_ci ); 9393fb6ee3Sopenharmony_ci } 9493fb6ee3Sopenharmony_ci override onNullCharacter(token: Token.CharacterToken): void { 9593fb6ee3Sopenharmony_ci this.guardTopLevel( 9693fb6ee3Sopenharmony_ci () => super.onNullCharacter(token), 9793fb6ee3Sopenharmony_ci () => ['Character', token.chars] 9893fb6ee3Sopenharmony_ci ); 9993fb6ee3Sopenharmony_ci } 10093fb6ee3Sopenharmony_ci override onWhitespaceCharacter(token: Token.CharacterToken): void { 10193fb6ee3Sopenharmony_ci const { skipNextNewLine } = this; 10293fb6ee3Sopenharmony_ci const { chars } = token; 10393fb6ee3Sopenharmony_ci 10493fb6ee3Sopenharmony_ci this.guardTopLevel( 10593fb6ee3Sopenharmony_ci () => super.onWhitespaceCharacter(token), 10693fb6ee3Sopenharmony_ci () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars] 10793fb6ee3Sopenharmony_ci ); 10893fb6ee3Sopenharmony_ci } 10993fb6ee3Sopenharmony_ci } 11093fb6ee3Sopenharmony_ci 11193fb6ee3Sopenharmony_ci ExtendedParser.parse(html); 11293fb6ee3Sopenharmony_ci 11393fb6ee3Sopenharmony_ci return tokens; 11493fb6ee3Sopenharmony_ci} 11593fb6ee3Sopenharmony_ci 11693fb6ee3Sopenharmony_cifunction generateParserFeedbackTest(parserTestFile: string): string { 11793fb6ee3Sopenharmony_ci const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter); 11893fb6ee3Sopenharmony_ci 11993fb6ee3Sopenharmony_ci const feedbackTest = { 12093fb6ee3Sopenharmony_ci tests: tests.map(({ input, fragmentContext }) => ({ 12193fb6ee3Sopenharmony_ci fragmentContext: fragmentContext?.tagName ?? null, 12293fb6ee3Sopenharmony_ci description: addSlashes(input), 12393fb6ee3Sopenharmony_ci input, 12493fb6ee3Sopenharmony_ci output: collectParserTokens(input), 12593fb6ee3Sopenharmony_ci })), 12693fb6ee3Sopenharmony_ci }; 12793fb6ee3Sopenharmony_ci 12893fb6ee3Sopenharmony_ci return JSON.stringify(feedbackTest, null, 4); 12993fb6ee3Sopenharmony_ci} 130