1import { readFile, writeFile } from 'node:fs/promises'; 2import { basename } from 'node:path'; 3import { Parser, type DefaultTreeAdapterMap, type TreeAdapterTypeMap, type Token, defaultTreeAdapter } from 'parse5'; 4import type { HtmlLibToken } from 'parse5-test-utils/utils/generate-tokenization-tests.js'; 5import { parseDatFile } from 'parse5-test-utils/utils/parse-dat-file.js'; 6import { addSlashes } from 'parse5-test-utils/utils/common.js'; 7 8for (const file of process.argv.slice(2)) { 9 const content = await readFile(file, 'utf8'); 10 const feedbackTestContent = generateParserFeedbackTest(content); 11 const feedbackTestFile = `test/data/parser-feedback/${basename(file, '.dat')}.test`; 12 13 await writeFile(feedbackTestFile, feedbackTestContent); 14} 15 16function collectParserTokens(html: string): HtmlLibToken[] { 17 const tokens: HtmlLibToken[] = []; 18 19 class ExtendedParser<T extends TreeAdapterTypeMap> extends Parser<T> { 20 private isTopLevel = true; 21 /** 22 * We only want to add tokens once. We guard against recursive calls 23 * using the `isTopLevel` flag. 24 */ 25 private guardTopLevel(fn: () => void, getToken: () => HtmlLibToken): void { 26 const { isTopLevel } = this; 27 this.isTopLevel = false; 28 29 fn(); 30 31 if (isTopLevel) { 32 this.isTopLevel = true; 33 34 const token = getToken(); 35 36 if (token[0] === 'Character') { 37 if (token[1] == null || token[1].length === 0) { 38 return; 39 } 40 41 const lastToken = tokens[tokens.length - 1]; 42 43 if (lastToken?.[0] === 'Character') { 44 lastToken[1] += token[1]; 45 return; 46 } 47 } 48 49 tokens.push(token); 50 } 51 } 52 53 override onComment(token: Token.CommentToken): void { 54 this.guardTopLevel( 55 () => super.onComment(token), 56 () => ['Comment', token.data] 57 ); 58 } 59 override onDoctype(token: Token.DoctypeToken): void { 60 this.guardTopLevel( 61 () => super.onDoctype(token), 62 () => ['DOCTYPE', token.name, token.publicId, token.systemId, !token.forceQuirks] 63 ); 64 } 65 override onStartTag(token: Token.TagToken): void { 66 this.guardTopLevel( 67 () => super.onStartTag(token), 68 () => { 69 const reformatedAttrs = Object.fromEntries(token.attrs.map(({ name, value }) => [name, value])); 70 const startTagEntry: HtmlLibToken = ['StartTag', token.tagName, reformatedAttrs]; 71 72 if (token.selfClosing) { 73 startTagEntry.push(true); 74 } 75 76 return startTagEntry; 77 } 78 ); 79 } 80 override onEndTag(token: Token.TagToken): void { 81 this.guardTopLevel( 82 () => super.onEndTag(token), 83 // NOTE: parser feedback simulator can produce adjusted SVG 84 // tag names for end tag tokens so we need to lower case it 85 () => ['EndTag', token.tagName.toLowerCase()] 86 ); 87 } 88 override onCharacter(token: Token.CharacterToken): void { 89 this.guardTopLevel( 90 () => super.onCharacter(token), 91 () => ['Character', token.chars] 92 ); 93 } 94 override onNullCharacter(token: Token.CharacterToken): void { 95 this.guardTopLevel( 96 () => super.onNullCharacter(token), 97 () => ['Character', token.chars] 98 ); 99 } 100 override onWhitespaceCharacter(token: Token.CharacterToken): void { 101 const { skipNextNewLine } = this; 102 const { chars } = token; 103 104 this.guardTopLevel( 105 () => super.onWhitespaceCharacter(token), 106 () => ['Character', skipNextNewLine && chars.startsWith('\n') ? chars.slice(1) : chars] 107 ); 108 } 109 } 110 111 ExtendedParser.parse(html); 112 113 return tokens; 114} 115 116function generateParserFeedbackTest(parserTestFile: string): string { 117 const tests = parseDatFile<DefaultTreeAdapterMap>(parserTestFile, defaultTreeAdapter); 118 119 const feedbackTest = { 120 tests: tests.map(({ input, fragmentContext }) => ({ 121 fragmentContext: fragmentContext?.tagName ?? null, 122 description: addSlashes(input), 123 input, 124 output: collectParserTokens(input), 125 })), 126 }; 127 128 return JSON.stringify(feedbackTest, null, 4); 129} 130