'use strict'; const assert = require('assert'); const Stream = require('stream'); /* * This filter consumes a stream of characters and emits one string per line. */ class LineSplitter extends Stream { constructor() { super(); this.buffer = ''; this.writable = true; } write(data) { const lines = (this.buffer + data).split(/\r\n|\n\r|\n|\r/); for (let i = 0; i < lines.length - 1; i++) { this.emit('data', lines[i]); } this.buffer = lines[lines.length - 1]; return true; } end(data) { this.write(data || ''); if (this.buffer) { this.emit('data', this.buffer); } this.emit('end'); } } /* * This filter consumes lines and emits paragraph objects. */ class ParagraphParser extends Stream { constructor() { super(); this.blockIsLicenseBlock = false; this.writable = true; this.resetBlock(false); } write(data) { this.parseLine(data + ''); return true; } end(data) { if (data) this.parseLine(data + ''); this.flushParagraph(); this.emit('end'); } resetParagraph() { this.paragraphLineIndent = -1; this.paragraph = { li: '', inLicenseBlock: this.blockIsLicenseBlock, lines: [] }; } resetBlock(isLicenseBlock) { this.blockIsLicenseBlock = isLicenseBlock; this.blockHasCStyleComment = false; this.resetParagraph(); } flushParagraph() { if (this.paragraph.lines.length || this.paragraph.li) { this.emit('data', this.paragraph); } this.resetParagraph(); } parseLine(line) { // Strip trailing whitespace line = line.trimRight(); // Detect block separator if (/^\s*(=|"){3,}\s*$/.test(line)) { this.flushParagraph(); this.resetBlock(!this.blockIsLicenseBlock); return; } // Strip comments around block if (this.blockIsLicenseBlock) { if (!this.blockHasCStyleComment) this.blockHasCStyleComment = /^\s*(\/\*)/.test(line); if (this.blockHasCStyleComment) { const prev = line; line = line.replace(/^(\s*?)(?:\s?\*\/|\/\*\s|\s\*\s?)/, '$1'); if (prev === line) line = line.replace(/^\s{2}/, ''); if (/\*\//.test(prev)) this.blockHasCStyleComment = false; } else { // Strip C++ and perl style comments. line = line.replace(/^(\s*)(?:\/\/\s?|#\s?)/, '$1'); } } // Detect blank line (paragraph separator) if (!/\S/.test(line)) { this.flushParagraph(); return; } // Detect separator "lines" within a block. These mark a paragraph break // and are stripped from the output. if (/^\s*[=*-]{5,}\s*$/.test(line)) { this.flushParagraph(); return; } // Find out indentation level and the start of a lied or numbered list; const result = /^(\s*)(\d+\.|\*|-)?\s*/.exec(line); assert.ok(result); // The number of characters that will be stripped from the beginning of // the line. const lineStripLength = result[0].length; // The indentation size that will be used to detect indentation jumps. // Fudge by 1 space. const lineIndent = Math.floor(lineStripLength / 2) * 2; // The indentation level that will be exported const level = Math.floor(result[1].length / 2); // The list indicator that precedes the actual content, if any. const lineLi = result[2]; // Flush the paragraph when there is a li or an indentation jump if (lineLi || (lineIndent !== this.paragraphLineIndent && this.paragraphLineIndent !== -1)) { this.flushParagraph(); this.paragraph.li = lineLi; } // Set the paragraph indent that we use to detect indentation jumps. When // we just detected a list indicator, wait // for the next line to arrive before setting this. if (!lineLi && this.paragraphLineIndent !== -1) { this.paragraphLineIndent = lineIndent; } // Set the output indent level if it has not been set yet. if (this.paragraph.level === undefined) this.paragraph.level = level; // Strip leading whitespace and li. line = line.slice(lineStripLength); if (line) this.paragraph.lines.push(line); } } /* * This filter consumes paragraph objects and emits modified paragraph objects. * The lines within the paragraph are unwrapped where appropriate. It also * replaces multiple consecutive whitespace characters by a single one. */ class Unwrapper extends Stream { constructor() { super(); this.writable = true; } write(paragraph) { const lines = paragraph.lines; const breakAfter = []; let i; for (i = 0; i < lines.length - 1; i++) { const line = lines[i]; // When a line is really short, the line was probably kept separate for a // reason. if (line.length < 50) { // If the first word on the next line really didn't fit after the line, // it probably was just ordinary wrapping after all. const nextFirstWordLength = lines[i + 1].replace(/\s.*$/, '').length; if (line.length + nextFirstWordLength < 60) { breakAfter[i] = true; } } } for (i = 0; i < lines.length - 1;) { if (!breakAfter[i]) { lines[i] += ` ${lines.splice(i + 1, 1)[0]}`; } else { i++; } } for (i = 0; i < lines.length; i++) { // Replace multiple whitespace characters by a single one, and strip // trailing whitespace. lines[i] = lines[i].replace(/\s+/g, ' ').replace(/\s+$/, ''); } this.emit('data', paragraph); } end(data) { if (data) this.write(data); this.emit('end'); } } function rtfEscape(string) { function toHex(number, length) { return (~~number).toString(16).padStart(length, '0'); } return string .replace(/[\\{}]/g, (m) => `\\${m}`) .replace(/\t/g, () => '\\tab ') // eslint-disable-next-line no-control-regex .replace(/[\x00-\x1f\x7f-\xff]/g, (m) => `\\'${toHex(m.charCodeAt(0), 2)}`) .replace(/\ufeff/g, '') .replace(/[\u0100-\uffff]/g, (m) => `\\u${toHex(m.charCodeAt(0), 4)}?`); } /* * This filter generates an rtf document from a stream of paragraph objects. */ class RtfGenerator extends Stream { constructor() { super(); this.didWriteAnything = false; this.writable = true; } write({ li, level, lines, inLicenseBlock: lic }) { if (!this.didWriteAnything) { this.emitHeader(); this.didWriteAnything = true; } if (li) level++; let rtf = '\\pard\\sa150\\sl300\\slmult1'; if (level > 0) rtf += `\\li${level * 240}`; if (li) rtf += `\\tx${level * 240}\\fi-240`; if (lic) rtf += '\\ri240'; if (!lic) rtf += '\\b'; if (li) rtf += ` ${li}\\tab`; rtf += ` ${lines.map(rtfEscape).join('\\line ')}`; if (!lic) rtf += '\\b0'; rtf += '\\par\n'; this.emit('data', rtf); } end(data) { if (data) this.write(data); if (this.didWriteAnything) this.emitFooter(); this.emit('end'); } emitHeader() { this.emit('data', '{\\rtf1\\ansi\\ansicpg1252\\uc1\\deff0\\deflang1033' + '{\\fonttbl{\\f0\\fswiss\\fcharset0 Tahoma;}}\\fs20\n' + '{\\*\\generator txt2rtf 0.0.1;}\n'); } emitFooter() { this.emit('data', '}'); } } const stdin = process.stdin; const stdout = process.stdout; const lineSplitter = new LineSplitter(); const paragraphParser = new ParagraphParser(); const unwrapper = new Unwrapper(); const rtfGenerator = new RtfGenerator(); stdin.setEncoding('utf-8'); stdin.resume(); stdin.pipe(lineSplitter); lineSplitter.pipe(paragraphParser); paragraphParser.pipe(unwrapper); unwrapper.pipe(rtfGenerator); rtfGenerator.pipe(stdout);