1import {
2    Tokenizer,
3    type TokenizerOptions,
4    TokenizerMode,
5    type TokenHandler,
6    Token,
7    foreignContent,
8    html,
9} from 'parse5';
10
11const $ = html.TAG_ID;
12
13const REPLACEMENT_CHARACTER = '\uFFFD';
14const LINE_FEED_CODE_POINT = 0x0a;
15
16/**
17 * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction.
18 */
19export class ParserFeedbackSimulator implements TokenHandler {
20    private namespaceStack: html.NS[] = [];
21    public inForeignContent = false;
22    public skipNextNewLine = false;
23    public tokenizer: Tokenizer;
24
25    constructor(options: TokenizerOptions, private handler: TokenHandler) {
26        this.tokenizer = new Tokenizer(options, this);
27        this._enterNamespace(html.NS.HTML);
28    }
29
30    /** @internal */
31    onNullCharacter(token: Token.CharacterToken): void {
32        this.skipNextNewLine = false;
33
34        if (this.inForeignContent) {
35            this.handler.onCharacter({
36                type: Token.TokenType.CHARACTER,
37                chars: REPLACEMENT_CHARACTER,
38                location: token.location,
39            });
40        } else {
41            this.handler.onNullCharacter(token);
42        }
43    }
44
45    /** @internal */
46    onWhitespaceCharacter(token: Token.CharacterToken): void {
47        if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) {
48            this.skipNextNewLine = false;
49
50            if (token.chars.length === 1) {
51                return;
52            }
53
54            token.chars = token.chars.substr(1);
55        }
56
57        this.handler.onWhitespaceCharacter(token);
58    }
59
60    /** @internal */
61    onCharacter(token: Token.CharacterToken): void {
62        this.skipNextNewLine = false;
63        this.handler.onCharacter(token);
64    }
65
66    /** @internal */
67    onComment(token: Token.CommentToken): void {
68        this.skipNextNewLine = false;
69        this.handler.onComment(token);
70    }
71
72    /** @internal */
73    onDoctype(token: Token.DoctypeToken): void {
74        this.skipNextNewLine = false;
75        this.handler.onDoctype(token);
76    }
77
78    /** @internal */
79    onEof(token: Token.EOFToken): void {
80        this.skipNextNewLine = false;
81        this.handler.onEof(token);
82    }
83
84    //Namespace stack mutations
85    private _enterNamespace(namespace: html.NS): void {
86        this.namespaceStack.unshift(namespace);
87        this.inForeignContent = namespace !== html.NS.HTML;
88        this.tokenizer.inForeignNode = this.inForeignContent;
89    }
90
91    private _leaveCurrentNamespace(): void {
92        this.namespaceStack.shift();
93        this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML;
94        this.tokenizer.inForeignNode = this.inForeignContent;
95    }
96
97    //Token handlers
98    private _ensureTokenizerMode(tn: html.TAG_ID): void {
99        switch (tn) {
100            case $.TEXTAREA:
101            case $.TITLE: {
102                this.tokenizer.state = TokenizerMode.RCDATA;
103                break;
104            }
105            case $.PLAINTEXT: {
106                this.tokenizer.state = TokenizerMode.PLAINTEXT;
107                break;
108            }
109            case $.SCRIPT: {
110                this.tokenizer.state = TokenizerMode.SCRIPT_DATA;
111                break;
112            }
113            case $.STYLE:
114            case $.IFRAME:
115            case $.XMP:
116            case $.NOEMBED:
117            case $.NOFRAMES:
118            case $.NOSCRIPT: {
119                this.tokenizer.state = TokenizerMode.RAWTEXT;
120                break;
121            }
122            default:
123            // Do nothing
124        }
125    }
126
127    /** @internal */
128    onStartTag(token: Token.TagToken): void {
129        let tn = token.tagID;
130
131        switch (tn) {
132            case $.SVG: {
133                this._enterNamespace(html.NS.SVG);
134                break;
135            }
136            case $.MATH: {
137                this._enterNamespace(html.NS.MATHML);
138                break;
139            }
140            default:
141            // Do nothing
142        }
143
144        if (this.inForeignContent) {
145            if (foreignContent.causesExit(token)) {
146                this._leaveCurrentNamespace();
147            } else {
148                const currentNs = this.namespaceStack[0];
149                tn = token.tagID;
150
151                if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) {
152                    this._enterNamespace(html.NS.HTML);
153                }
154            }
155        } else {
156            switch (tn) {
157                case $.PRE:
158                case $.TEXTAREA:
159                case $.LISTING: {
160                    this.skipNextNewLine = true;
161                    break;
162                }
163                case $.IMAGE: {
164                    token.tagName = html.TAG_NAMES.IMG;
165                    token.tagID = $.IMG;
166                    break;
167                }
168                default:
169                // Do nothing
170            }
171
172            this._ensureTokenizerMode(tn);
173        }
174
175        this.handler.onStartTag(token);
176    }
177
178    /** @internal */
179    onEndTag(token: Token.TagToken): void {
180        let tn = token.tagID;
181
182        if (!this.inForeignContent) {
183            const previousNs = this.namespaceStack[1];
184
185            if (previousNs === html.NS.SVG) {
186                const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName);
187
188                if (adjustedTagName) {
189                    tn = html.getTagID(adjustedTagName);
190                }
191            }
192
193            //NOTE: check for exit from integration point
194            if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) {
195                this._leaveCurrentNamespace();
196            }
197        } else if (
198            (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) ||
199            (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML)
200        ) {
201            this._leaveCurrentNamespace();
202        }
203
204        this.handler.onEndTag(token);
205    }
206}
207