1import * as assert from 'node:assert'; 2import { outdent } from 'outdent'; 3import { RewritingStream } from '../lib/index.js'; 4import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js'; 5import { 6 finished, 7 getStringDiffMsg, 8 writeChunkedToStream, 9 WritableStreamStub, 10} from 'parse5-test-utils/utils/common.js'; 11 12const srcHtml = outdent` 13 <!DOCTYPE html ""> 14 <html> 15 <!-- comment1 --> 16 <head /// 123> 17 </head> 18 <!-- comment2 --> 19 <body =123> 20 <div>Hey ya</div> 21 </body> 22 </html> 23`; 24 25const LONG_TEXT = 'a'.repeat((1 << 16) + 1); 26const LONG_TEXT_WITH_COMMENT = `${'a'.repeat((1 << 16) - 5)}<!-- comment -->`; 27 28function createRewriterTest({ 29 src, 30 expected, 31 assignTokenHandlers = (): void => { 32 /* Ignore */ 33 }, 34}: { 35 src: string; 36 expected: string; 37 assignTokenHandlers?: (rewriter: RewritingStream) => void; 38}) { 39 return async (): Promise<void> => { 40 const rewriter = new RewritingStream(); 41 const writable = new WritableStreamStub(); 42 43 rewriter.pipe(writable); 44 45 assignTokenHandlers(rewriter); 46 writeChunkedToStream(src, rewriter); 47 48 await finished(writable); 49 50 assert.ok(writable.writtenData === expected, getStringDiffMsg(writable.writtenData, expected)); 51 }; 52} 53 54describe('RewritingStream', () => { 55 // Raw data tests 56 for (const [idx, data] of loadSAXParserTestData().entries()) { 57 // NOTE: if we don't have any event handlers assigned, stream should use raw 58 // data for the serialization, so serialized content should identical to the original. 59 it( 60 `Raw token serialization - ${idx + 1}.${data.name}`, 61 createRewriterTest({ 62 src: data.src, 63 expected: data.src, 64 }) 65 ); 66 } 67 68 it( 69 'rewrite start tags', 70 createRewriterTest({ 71 src: srcHtml, 72 expected: outdent` 73 <!DOCTYPE html ""> 74 <html> 75 <!-- comment1 --> 76 <body 123=""> 77 </head> 78 <!-- comment2 --> 79 <head =123=""> 80 <div>Hey ya</div> 81 </body> 82 </html> 83 `, 84 assignTokenHandlers: (rewriter) => { 85 rewriter.on('startTag', (token) => { 86 if (token.tagName === 'head') { 87 token.tagName = 'body'; 88 } else if (token.tagName === 'body') { 89 token.tagName = 'head'; 90 } 91 92 rewriter.emitStartTag(token); 93 }); 94 }, 95 }) 96 ); 97 98 it( 99 'rewrite end tags', 100 createRewriterTest({ 101 src: srcHtml, 102 expected: outdent` 103 <!DOCTYPE html ""> 104 <html> 105 <!-- comment1 --> 106 <head /// 123> 107 </rewritten> 108 <!-- comment2 --> 109 <body =123> 110 <div>Hey ya</rewritten> 111 </rewritten> 112 </rewritten> 113 `, 114 assignTokenHandlers: (rewriter) => { 115 rewriter.on('endTag', (token) => { 116 token.tagName = 'rewritten'; 117 118 rewriter.emitEndTag(token); 119 }); 120 }, 121 }) 122 ); 123 124 it( 125 'rewrite text', 126 createRewriterTest({ 127 src: srcHtml, 128 expected: outdent` 129 <!DOCTYPE html ""> 130 <html> 131 <!-- comment1 --> 132 <head /// 123> 133 </head> 134 <!-- comment2 --> 135 <body =123> 136 <div>42</div> 137 </body> 138 </html> 139 `, 140 assignTokenHandlers: (rewriter) => { 141 rewriter.on('text', (token) => { 142 if (token.text.trim().length > 0) { 143 token.text = '42'; 144 } 145 146 rewriter.emitText(token); 147 }); 148 }, 149 }) 150 ); 151 152 it( 153 'rewrite comment', 154 createRewriterTest({ 155 src: srcHtml, 156 expected: outdent` 157 <!DOCTYPE html ""> 158 <html> 159 <!--42--> 160 <head /// 123> 161 </head> 162 <!--42--> 163 <body =123> 164 <div>Hey ya</div> 165 </body> 166 </html> 167 `, 168 assignTokenHandlers: (rewriter) => { 169 rewriter.on('comment', (token) => { 170 token.text = '42'; 171 172 rewriter.emitComment(token); 173 }); 174 }, 175 }) 176 ); 177 178 it( 179 'rewrite doctype', 180 createRewriterTest({ 181 src: srcHtml, 182 expected: outdent` 183 <!DOCTYPE html PUBLIC "42" "hey"> 184 <html> 185 <!-- comment1 --> 186 <head /// 123> 187 </head> 188 <!-- comment2 --> 189 <body =123> 190 <div>Hey ya</div> 191 </body> 192 </html> 193 `, 194 assignTokenHandlers: (rewriter) => { 195 rewriter.on('doctype', (token) => { 196 token.publicId = '42'; 197 token.systemId = 'hey'; 198 199 rewriter.emitDoctype(token); 200 }); 201 }, 202 }) 203 ); 204 205 it( 206 'rewrite doctype (no public id)', 207 createRewriterTest({ 208 src: srcHtml, 209 expected: outdent` 210 <!DOCTYPE html SYSTEM "hey"> 211 <html> 212 <!-- comment1 --> 213 <head /// 123> 214 </head> 215 <!-- comment2 --> 216 <body =123> 217 <div>Hey ya</div> 218 </body> 219 </html> 220 `, 221 assignTokenHandlers: (rewriter) => { 222 rewriter.on('doctype', (token) => { 223 token.publicId = null; 224 token.systemId = 'hey'; 225 226 rewriter.emitDoctype(token); 227 }); 228 }, 229 }) 230 ); 231 232 it( 233 'emit multiple', 234 createRewriterTest({ 235 src: srcHtml, 236 expected: outdent` 237 <!DOCTYPE html ""> 238 <wrap><html></wrap> 239 <!-- comment1 --> 240 <wrap><head 123=""/></wrap> 241 </head> 242 <!-- comment2 --> 243 <wrap><body =123=""></wrap> 244 <wrap><div></wrap>Hey ya</div> 245 </body> 246 </html> 247 `, 248 assignTokenHandlers: (rewriter) => { 249 rewriter.on('startTag', (token) => { 250 rewriter.emitRaw('<wrap>'); 251 252 if (token.tagName === 'head') { 253 token.selfClosing = true; 254 } 255 256 rewriter.emitStartTag(token); 257 rewriter.emitRaw('</wrap>'); 258 }); 259 }, 260 }) 261 ); 262 263 it( 264 'rewrite raw', 265 createRewriterTest({ 266 src: srcHtml, 267 expected: outdent` 268 <!DOCTYPE html "">42 269 <html>42 270 <!-- comment1 -->42 271 <head /// 123>42 272 </head>42 273 <!-- comment2 -->42 274 <body =123>42 275 <div>42Hey ya</div>42 276 </body>42 277 </html>42 278 `, 279 assignTokenHandlers: (rewriter) => { 280 const rewriteRaw = (_: unknown, raw: string): void => { 281 rewriter.emitRaw(`${raw}42`); 282 }; 283 284 rewriter 285 .on('doctype', rewriteRaw) 286 .on('startTag', rewriteRaw) 287 .on('endTag', rewriteRaw) 288 .on('comment', rewriteRaw); 289 }, 290 }) 291 ); 292 293 it( 294 'Should escape entities in attributes and text', 295 createRewriterTest({ 296 src: outdent` 297 <!DOCTYPE html ""> 298 <html> 299 <head foo='bar"baz"'> 300 </head> 301 <body> 302 <div>foo&bar</div> 303 </body> 304 </html> 305 `, 306 expected: outdent` 307 <!DOCTYPE html ""> 308 <html> 309 <head foo="bar"baz""> 310 </head> 311 <body> 312 <div>foo&bar</div> 313 </body> 314 </html> 315 `, 316 assignTokenHandlers: (rewriter) => { 317 rewriter.on('startTag', (token) => rewriter.emitStartTag(token)); 318 rewriter.on('text', (token) => rewriter.emitText(token)); 319 }, 320 }) 321 ); 322 323 it('Last text chunk must be flushed (GH-271)', async () => { 324 const parser = new RewritingStream(); 325 let foundText = false; 326 327 parser.on('text', ({ text }) => { 328 foundText = true; 329 assert.strictEqual(text, 'text'); 330 }); 331 332 parser.write('text'); 333 parser.end(); 334 335 await finished(parser); 336 337 assert.ok(foundText); 338 }); 339 340 it('Should not accept binary input (GH-269)', () => { 341 const stream = new RewritingStream(); 342 const buf = Buffer.from('test'); 343 344 assert.throws(() => stream.write(buf), TypeError); 345 }); 346 347 it( 348 'Should pass long text correctly (GH-292)', 349 createRewriterTest({ 350 src: LONG_TEXT, 351 expected: LONG_TEXT, 352 }) 353 ); 354 355 it( 356 'Should emit comment after text correctly', 357 createRewriterTest({ 358 src: LONG_TEXT_WITH_COMMENT, 359 expected: LONG_TEXT_WITH_COMMENT, 360 }) 361 ); 362 363 it( 364 'Should emit text in script without escaping (GH-339)', 365 createRewriterTest({ 366 src: '<script></script>', 367 expected: '<script>foo && bar</script>', 368 assignTokenHandlers: (rewriter) => { 369 // On a script tag, emit the text without escaping 370 rewriter.on('startTag', (token) => { 371 rewriter.emitStartTag(token); 372 if (token.tagName === 'script') { 373 rewriter.emitText({ text: 'foo && bar' }); 374 } 375 }); 376 }, 377 }) 378 ); 379}); 380