1import * as assert from 'node:assert';
2import { outdent } from 'outdent';
3import { RewritingStream } from '../lib/index.js';
4import { loadSAXParserTestData } from 'parse5-test-utils/utils/load-sax-parser-test-data.js';
5import {
6    finished,
7    getStringDiffMsg,
8    writeChunkedToStream,
9    WritableStreamStub,
10} from 'parse5-test-utils/utils/common.js';
11
12const srcHtml = outdent`
13  <!DOCTYPE html "">
14  <html>
15      <!-- comment1 -->
16      <head /// 123>
17      </head>
18      <!-- comment2 -->
19      <body =123>
20          <div>Hey ya</div>
21      </body>
22  </html>
23`;
24
25const LONG_TEXT = 'a'.repeat((1 << 16) + 1);
26const LONG_TEXT_WITH_COMMENT = `${'a'.repeat((1 << 16) - 5)}<!-- comment -->`;
27
28function createRewriterTest({
29    src,
30    expected,
31    assignTokenHandlers = (): void => {
32        /* Ignore */
33    },
34}: {
35    src: string;
36    expected: string;
37    assignTokenHandlers?: (rewriter: RewritingStream) => void;
38}) {
39    return async (): Promise<void> => {
40        const rewriter = new RewritingStream();
41        const writable = new WritableStreamStub();
42
43        rewriter.pipe(writable);
44
45        assignTokenHandlers(rewriter);
46        writeChunkedToStream(src, rewriter);
47
48        await finished(writable);
49
50        assert.ok(writable.writtenData === expected, getStringDiffMsg(writable.writtenData, expected));
51    };
52}
53
54describe('RewritingStream', () => {
55    // Raw data tests
56    for (const [idx, data] of loadSAXParserTestData().entries()) {
57        // NOTE: if we don't have any event handlers assigned, stream should use raw
58        // data for the serialization, so serialized content should identical to the original.
59        it(
60            `Raw token serialization - ${idx + 1}.${data.name}`,
61            createRewriterTest({
62                src: data.src,
63                expected: data.src,
64            })
65        );
66    }
67
68    it(
69        'rewrite start tags',
70        createRewriterTest({
71            src: srcHtml,
72            expected: outdent`
73              <!DOCTYPE html "">
74              <html>
75                  <!-- comment1 -->
76                  <body 123="">
77                  </head>
78                  <!-- comment2 -->
79                  <head =123="">
80                      <div>Hey ya</div>
81                  </body>
82              </html>
83            `,
84            assignTokenHandlers: (rewriter) => {
85                rewriter.on('startTag', (token) => {
86                    if (token.tagName === 'head') {
87                        token.tagName = 'body';
88                    } else if (token.tagName === 'body') {
89                        token.tagName = 'head';
90                    }
91
92                    rewriter.emitStartTag(token);
93                });
94            },
95        })
96    );
97
98    it(
99        'rewrite end tags',
100        createRewriterTest({
101            src: srcHtml,
102            expected: outdent`
103              <!DOCTYPE html "">
104              <html>
105                  <!-- comment1 -->
106                  <head /// 123>
107                  </rewritten>
108                  <!-- comment2 -->
109                  <body =123>
110                      <div>Hey ya</rewritten>
111                  </rewritten>
112              </rewritten>
113            `,
114            assignTokenHandlers: (rewriter) => {
115                rewriter.on('endTag', (token) => {
116                    token.tagName = 'rewritten';
117
118                    rewriter.emitEndTag(token);
119                });
120            },
121        })
122    );
123
124    it(
125        'rewrite text',
126        createRewriterTest({
127            src: srcHtml,
128            expected: outdent`
129              <!DOCTYPE html "">
130              <html>
131                  <!-- comment1 -->
132                  <head /// 123>
133                  </head>
134                  <!-- comment2 -->
135                  <body =123>
136                      <div>42</div>
137                  </body>
138              </html>
139            `,
140            assignTokenHandlers: (rewriter) => {
141                rewriter.on('text', (token) => {
142                    if (token.text.trim().length > 0) {
143                        token.text = '42';
144                    }
145
146                    rewriter.emitText(token);
147                });
148            },
149        })
150    );
151
152    it(
153        'rewrite comment',
154        createRewriterTest({
155            src: srcHtml,
156            expected: outdent`
157              <!DOCTYPE html "">
158              <html>
159                  <!--42-->
160                  <head /// 123>
161                  </head>
162                  <!--42-->
163                  <body =123>
164                      <div>Hey ya</div>
165                  </body>
166              </html>
167            `,
168            assignTokenHandlers: (rewriter) => {
169                rewriter.on('comment', (token) => {
170                    token.text = '42';
171
172                    rewriter.emitComment(token);
173                });
174            },
175        })
176    );
177
178    it(
179        'rewrite doctype',
180        createRewriterTest({
181            src: srcHtml,
182            expected: outdent`
183              <!DOCTYPE html PUBLIC "42" "hey">
184              <html>
185                  <!-- comment1 -->
186                  <head /// 123>
187                  </head>
188                  <!-- comment2 -->
189                  <body =123>
190                      <div>Hey ya</div>
191                  </body>
192              </html>
193            `,
194            assignTokenHandlers: (rewriter) => {
195                rewriter.on('doctype', (token) => {
196                    token.publicId = '42';
197                    token.systemId = 'hey';
198
199                    rewriter.emitDoctype(token);
200                });
201            },
202        })
203    );
204
205    it(
206        'rewrite doctype (no public id)',
207        createRewriterTest({
208            src: srcHtml,
209            expected: outdent`
210              <!DOCTYPE html SYSTEM "hey">
211              <html>
212                  <!-- comment1 -->
213                  <head /// 123>
214                  </head>
215                  <!-- comment2 -->
216                  <body =123>
217                      <div>Hey ya</div>
218                  </body>
219              </html>
220            `,
221            assignTokenHandlers: (rewriter) => {
222                rewriter.on('doctype', (token) => {
223                    token.publicId = null;
224                    token.systemId = 'hey';
225
226                    rewriter.emitDoctype(token);
227                });
228            },
229        })
230    );
231
232    it(
233        'emit multiple',
234        createRewriterTest({
235            src: srcHtml,
236            expected: outdent`
237              <!DOCTYPE html "">
238              <wrap><html></wrap>
239                  <!-- comment1 -->
240                  <wrap><head 123=""/></wrap>
241                  </head>
242                  <!-- comment2 -->
243                  <wrap><body =123=""></wrap>
244                      <wrap><div></wrap>Hey ya</div>
245                  </body>
246              </html>
247            `,
248            assignTokenHandlers: (rewriter) => {
249                rewriter.on('startTag', (token) => {
250                    rewriter.emitRaw('<wrap>');
251
252                    if (token.tagName === 'head') {
253                        token.selfClosing = true;
254                    }
255
256                    rewriter.emitStartTag(token);
257                    rewriter.emitRaw('</wrap>');
258                });
259            },
260        })
261    );
262
263    it(
264        'rewrite raw',
265        createRewriterTest({
266            src: srcHtml,
267            expected: outdent`
268              <!DOCTYPE html "">42
269              <html>42
270                  <!-- comment1 -->42
271                  <head /// 123>42
272                  </head>42
273                  <!-- comment2 -->42
274                  <body =123>42
275                      <div>42Hey ya</div>42
276                  </body>42
277              </html>42
278            `,
279            assignTokenHandlers: (rewriter) => {
280                const rewriteRaw = (_: unknown, raw: string): void => {
281                    rewriter.emitRaw(`${raw}42`);
282                };
283
284                rewriter
285                    .on('doctype', rewriteRaw)
286                    .on('startTag', rewriteRaw)
287                    .on('endTag', rewriteRaw)
288                    .on('comment', rewriteRaw);
289            },
290        })
291    );
292
293    it(
294        'Should escape entities in attributes and text',
295        createRewriterTest({
296            src: outdent`
297              <!DOCTYPE html "">
298              <html>
299                  <head foo='bar"baz"'>
300                  </head>
301                  <body>
302                      <div>foo&amp;bar</div>
303                  </body>
304              </html>
305            `,
306            expected: outdent`
307              <!DOCTYPE html "">
308              <html>
309                  <head foo="bar&quot;baz&quot;">
310                  </head>
311                  <body>
312                      <div>foo&amp;bar</div>
313                  </body>
314              </html>
315            `,
316            assignTokenHandlers: (rewriter) => {
317                rewriter.on('startTag', (token) => rewriter.emitStartTag(token));
318                rewriter.on('text', (token) => rewriter.emitText(token));
319            },
320        })
321    );
322
323    it('Last text chunk must be flushed (GH-271)', async () => {
324        const parser = new RewritingStream();
325        let foundText = false;
326
327        parser.on('text', ({ text }) => {
328            foundText = true;
329            assert.strictEqual(text, 'text');
330        });
331
332        parser.write('text');
333        parser.end();
334
335        await finished(parser);
336
337        assert.ok(foundText);
338    });
339
340    it('Should not accept binary input (GH-269)', () => {
341        const stream = new RewritingStream();
342        const buf = Buffer.from('test');
343
344        assert.throws(() => stream.write(buf), TypeError);
345    });
346
347    it(
348        'Should pass long text correctly (GH-292)',
349        createRewriterTest({
350            src: LONG_TEXT,
351            expected: LONG_TEXT,
352        })
353    );
354
355    it(
356        'Should emit comment after text correctly',
357        createRewriterTest({
358            src: LONG_TEXT_WITH_COMMENT,
359            expected: LONG_TEXT_WITH_COMMENT,
360        })
361    );
362
363    it(
364        'Should emit text in script without escaping (GH-339)',
365        createRewriterTest({
366            src: '<script></script>',
367            expected: '<script>foo && bar</script>',
368            assignTokenHandlers: (rewriter) => {
369                // On a script tag, emit the text without escaping
370                rewriter.on('startTag', (token) => {
371                    rewriter.emitStartTag(token);
372                    if (token.tagName === 'script') {
373                        rewriter.emitText({ text: 'foo && bar' });
374                    }
375                });
376            },
377        })
378    );
379});
380