tag
* rewriter.on('text', (_, raw) => {
* // Use the raw representation of text without HTML entities decoding
* rewriter.emitRaw(`${raw}`);
* });
*
* http.get('http://google.com', res => {
* // Assumes response is UTF-8.
* res.setEncoding('utf8');
* // `RewritingStream` is a `Transform` stream, which means you can pipe
* // through it.
* res.pipe(rewriter).pipe(file);
* });
* ```
*/
export class RewritingStream extends SAXParser {
/** Note: `sourceCodeLocationInfo` is always enabled. */
constructor() {
super({ sourceCodeLocationInfo: true });
}
override _transformChunk(chunk: string): string {
// NOTE: ignore upstream return values as we want to push to
// the `Writable` part of the `Transform` stream ourselves.
super._transformChunk(chunk);
return '';
}
private _getRawHtml(location: Token.Location): string {
const { droppedBufferSize, html } = this.tokenizer.preprocessor;
const start = location.startOffset - droppedBufferSize;
const end = location.endOffset - droppedBufferSize;
return html.slice(start, end);
}
// Events
protected override emitIfListenerExists(eventName: string, token: SaxToken): boolean {
if (!super.emitIfListenerExists(eventName, token)) {
this.emitRaw(this._getRawHtml(token.sourceCodeLocation!));
}
// NOTE: don't skip new lines after `` and other tags,
// otherwise we'll have incorrect raw data.
this.parserFeedbackSimulator.skipNextNewLine = false;
return true;
}
// Emitter API
protected override _emitToken(eventName: string, token: SaxToken): void {
this.emit(eventName, token, this._getRawHtml(token.sourceCodeLocation!));
}
/** Emits a serialized document type token into the output stream. */
public emitDoctype(token: Doctype): void {
let res = `';
this.push(res);
}
/** Emits a serialized start tag token into the output stream. */
public emitStartTag(token: StartTag): void {
let res = `<${token.tagName}`;
for (const attr of token.attrs) {
res += ` ${attr.name}="${escapeAttribute(attr.value)}"`;
}
res += token.selfClosing ? '/>' : '>';
this.push(res);
}
/** Emits a serialized end tag token into the output stream. */
public emitEndTag(token: EndTag): void {
this.push(`${token.tagName}>`);
}
/** Emits a serialized text token into the output stream. */
public emitText({ text }: Text): void {
this.push(
!this.parserFeedbackSimulator.inForeignContent &&
html.hasUnescapedText(this.tokenizer.lastStartTagName, true)
? text
: escapeText(text)
);
}
/** Emits a serialized comment token into the output stream. */
public emitComment(token: Comment): void {
this.push(``);
}
/** Emits a raw HTML string into the output stream. */
public emitRaw(html: string): void {
this.push(html);
}
}
export interface RewritingStream {
/** Raised when the rewriter encounters a start tag. */
on(event: 'startTag', listener: (startTag: StartTag, rawHtml: string) => void): this;
/** Raised when rewriter encounters an end tag. */
on(event: 'endTag', listener: (endTag: EndTag, rawHtml: string) => void): this;
/** Raised when rewriter encounters a comment. */
on(event: 'comment', listener: (comment: Comment, rawHtml: string) => void): this;
/** Raised when rewriter encounters text content. */
on(event: 'text', listener: (text: Text, rawHtml: string) => void): this;
/** Raised when rewriter encounters a [document type declaration](https://en.wikipedia.org/wiki/Document_type_declaration). */
on(event: 'doctype', listener: (doctype: Doctype, rawHtml: string) => void): this;
/**
* Base event handler.
*
* @param event Name of the event
* @param handler Event handler
*/
on(event: string, handler: (...args: any[]) => void): this;
}