1import { 2 Tokenizer, 3 type TokenizerOptions, 4 TokenizerMode, 5 type TokenHandler, 6 Token, 7 foreignContent, 8 html, 9} from 'parse5'; 10 11const $ = html.TAG_ID; 12 13const REPLACEMENT_CHARACTER = '\uFFFD'; 14const LINE_FEED_CODE_POINT = 0x0a; 15 16/** 17 * Simulates adjustments of the Tokenizer which are performed by the standard parser during tree construction. 18 */ 19export class ParserFeedbackSimulator implements TokenHandler { 20 private namespaceStack: html.NS[] = []; 21 public inForeignContent = false; 22 public skipNextNewLine = false; 23 public tokenizer: Tokenizer; 24 25 constructor(options: TokenizerOptions, private handler: TokenHandler) { 26 this.tokenizer = new Tokenizer(options, this); 27 this._enterNamespace(html.NS.HTML); 28 } 29 30 /** @internal */ 31 onNullCharacter(token: Token.CharacterToken): void { 32 this.skipNextNewLine = false; 33 34 if (this.inForeignContent) { 35 this.handler.onCharacter({ 36 type: Token.TokenType.CHARACTER, 37 chars: REPLACEMENT_CHARACTER, 38 location: token.location, 39 }); 40 } else { 41 this.handler.onNullCharacter(token); 42 } 43 } 44 45 /** @internal */ 46 onWhitespaceCharacter(token: Token.CharacterToken): void { 47 if (this.skipNextNewLine && token.chars.charCodeAt(0) === LINE_FEED_CODE_POINT) { 48 this.skipNextNewLine = false; 49 50 if (token.chars.length === 1) { 51 return; 52 } 53 54 token.chars = token.chars.substr(1); 55 } 56 57 this.handler.onWhitespaceCharacter(token); 58 } 59 60 /** @internal */ 61 onCharacter(token: Token.CharacterToken): void { 62 this.skipNextNewLine = false; 63 this.handler.onCharacter(token); 64 } 65 66 /** @internal */ 67 onComment(token: Token.CommentToken): void { 68 this.skipNextNewLine = false; 69 this.handler.onComment(token); 70 } 71 72 /** @internal */ 73 onDoctype(token: Token.DoctypeToken): void { 74 this.skipNextNewLine = false; 75 this.handler.onDoctype(token); 76 } 77 78 /** @internal */ 79 onEof(token: Token.EOFToken): void { 80 this.skipNextNewLine = false; 81 this.handler.onEof(token); 82 } 83 84 //Namespace stack mutations 85 private _enterNamespace(namespace: html.NS): void { 86 this.namespaceStack.unshift(namespace); 87 this.inForeignContent = namespace !== html.NS.HTML; 88 this.tokenizer.inForeignNode = this.inForeignContent; 89 } 90 91 private _leaveCurrentNamespace(): void { 92 this.namespaceStack.shift(); 93 this.inForeignContent = this.namespaceStack[0] !== html.NS.HTML; 94 this.tokenizer.inForeignNode = this.inForeignContent; 95 } 96 97 //Token handlers 98 private _ensureTokenizerMode(tn: html.TAG_ID): void { 99 switch (tn) { 100 case $.TEXTAREA: 101 case $.TITLE: { 102 this.tokenizer.state = TokenizerMode.RCDATA; 103 break; 104 } 105 case $.PLAINTEXT: { 106 this.tokenizer.state = TokenizerMode.PLAINTEXT; 107 break; 108 } 109 case $.SCRIPT: { 110 this.tokenizer.state = TokenizerMode.SCRIPT_DATA; 111 break; 112 } 113 case $.STYLE: 114 case $.IFRAME: 115 case $.XMP: 116 case $.NOEMBED: 117 case $.NOFRAMES: 118 case $.NOSCRIPT: { 119 this.tokenizer.state = TokenizerMode.RAWTEXT; 120 break; 121 } 122 default: 123 // Do nothing 124 } 125 } 126 127 /** @internal */ 128 onStartTag(token: Token.TagToken): void { 129 let tn = token.tagID; 130 131 switch (tn) { 132 case $.SVG: { 133 this._enterNamespace(html.NS.SVG); 134 break; 135 } 136 case $.MATH: { 137 this._enterNamespace(html.NS.MATHML); 138 break; 139 } 140 default: 141 // Do nothing 142 } 143 144 if (this.inForeignContent) { 145 if (foreignContent.causesExit(token)) { 146 this._leaveCurrentNamespace(); 147 } else { 148 const currentNs = this.namespaceStack[0]; 149 tn = token.tagID; 150 151 if (!token.selfClosing && foreignContent.isIntegrationPoint(tn, currentNs, token.attrs)) { 152 this._enterNamespace(html.NS.HTML); 153 } 154 } 155 } else { 156 switch (tn) { 157 case $.PRE: 158 case $.TEXTAREA: 159 case $.LISTING: { 160 this.skipNextNewLine = true; 161 break; 162 } 163 case $.IMAGE: { 164 token.tagName = html.TAG_NAMES.IMG; 165 token.tagID = $.IMG; 166 break; 167 } 168 default: 169 // Do nothing 170 } 171 172 this._ensureTokenizerMode(tn); 173 } 174 175 this.handler.onStartTag(token); 176 } 177 178 /** @internal */ 179 onEndTag(token: Token.TagToken): void { 180 let tn = token.tagID; 181 182 if (!this.inForeignContent) { 183 const previousNs = this.namespaceStack[1]; 184 185 if (previousNs === html.NS.SVG) { 186 const adjustedTagName = foreignContent.SVG_TAG_NAMES_ADJUSTMENT_MAP.get(token.tagName); 187 188 if (adjustedTagName) { 189 tn = html.getTagID(adjustedTagName); 190 } 191 } 192 193 //NOTE: check for exit from integration point 194 if (foreignContent.isIntegrationPoint(tn, previousNs, token.attrs)) { 195 this._leaveCurrentNamespace(); 196 } 197 } else if ( 198 (tn === $.SVG && this.namespaceStack[0] === html.NS.SVG) || 199 (tn === $.MATH && this.namespaceStack[0] === html.NS.MATHML) 200 ) { 201 this._leaveCurrentNamespace(); 202 } 203 204 this.handler.onEndTag(token); 205 } 206} 207