/* * Copyright (C) 2024 Puter Technologies Inc. * * This file is part of Puter. * * Puter is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published * by the Free Software Foundation, either version 3 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ const lib = {}; lib.dedent_lines = lines => { // If any lines are just spaces, remove the spaces for ( let i=0 ; i < lines.length ; i++ ) { if ( /^\s+$/.test(lines[i]) ) lines[i] = ''; } // Remove leading and trailing blanks while ( lines[0] === '' ) lines.shift(); while ( lines[lines.length-1] === '' ) lines.pop(); let min_indent = Number.MAX_SAFE_INTEGER; for ( let i=0 ; i < lines.length ; i++ ) { if ( lines[i] === '' ) continue; let n_spaces = 0; for ( let j=0 ; j < lines[i].length ; j++ ) { if ( lines[i][j] === ' ' ) n_spaces++; else break; } if ( n_spaces < min_indent ) min_indent = n_spaces; } for ( let i=0 ; i < lines.length ; i++ ) { if ( lines[i] === '' ) continue; lines[i] = lines[i].slice(min_indent); } }; const StringStream = (str, { state_ } = {}) => { const state = state_ ?? { pos: 0 }; return { skip_whitespace () { while ( /^\s/.test(str[state.pos]) ) state.pos++; }, // INCOMPLETE: only handles single chars skip_matching (items) { while ( items.some(item => { return str[state.pos] === item; }) ) state.pos++; }, fwd (amount) { state.pos += amount ?? 1; }, fork () { return StringStream(str, { state_: { pos: state.pos } }); }, async get_pos () { return state.pos; }, async get_char () { return str[state.pos]; }, async matches (re_or_lit) { if ( re_or_lit instanceof RegExp ) { const re = re_or_lit; return re.test(str.slice(state.pos)); } const lit = re_or_lit; return lit === str.slice(state.pos, state.pos + lit.length); }, async get_until (re_or_lit) { let index; if ( re_or_lit instanceof RegExp ) { const re = re_or_lit; const result = re.exec(str.slice(state.pos)); if ( ! result ) return; index = state.pos + result.index; } else { const lit = re_or_lit; const ind = str.slice(state.pos).indexOf(lit); // TODO: parser warnings? if ( ind === -1 ) return; index = state.pos + ind; } const start_pos = state.pos; state.pos = index; return str.slice(start_pos, index); }, async debug () { const l1 = str.length; const l2 = str.length - state.pos; const clean = s => s.replace(/\n/, '{LF}'); return `[stream : "${ clean(str.slice(0, Math.min(6, l1))) }"... |${state.pos}| ..."${ clean(str.slice(state.pos, state.pos + Math.min(6, l2))) }"]` } }; }; const LinesCommentParser = ({ prefix }) => { return { parse: async (stream) => { stream.skip_whitespace(); const lines = []; while ( await stream.matches(prefix) ) { const line = await stream.get_until('\n'); if ( ! line ) return; lines.push(line); stream.fwd(); stream.skip_matching([' ', '\t']); if ( await stream.get_char() === '\n' ){ stream.fwd(); break; } stream.skip_whitespace(); } if ( lines.length === 0 ) return; for ( let i=0 ; i < lines.length ; i++ ) { lines[i] = lines[i].slice(prefix.length); } lib.dedent_lines(lines); return { lines, }; } }; }; const BlockCommentParser = ({ start, end, ignore_line_prefix, }) => { return { parse: async (stream) => { stream.skip_whitespace(); if ( ! await stream.matches(start) ) return; stream.fwd(start.length); const contents = await stream.get_until(end); if ( ! contents ) return; stream.fwd(end.length); // console.log('ending at', await stream.debug()) const lines = contents.split('\n'); // === Formatting Time! === // // Special case: remove the last '*' after '/**' if ( lines[0].trim() === ignore_line_prefix ) { lines.shift(); } // First dedent pass lib.dedent_lines(lines); // If all the lines start with asterisks, remove let allofem = true; for ( let i=0 ; i < lines.length ; i++ ) { if ( lines[i] === '' ) continue; if ( ! lines[i].startsWith(ignore_line_prefix) ) { allofem = false; break } } if ( allofem ) { for ( let i=0 ; i < lines.length ; i++ ) { if ( lines[i] === '' ) continue; lines[i] = lines[i].slice(ignore_line_prefix.length); } // Second dedent pass lib.dedent_lines(lines); } return { lines }; } }; }; const LinesCommentWriter = ({ prefix }) => { return { write: (lines) => { lib.dedent_lines(lines); for ( let i=0 ; i < lines.length ; i++ ) { lines[i] = prefix + lines[i]; } return lines.join('\n') + '\n'; } }; }; const BlockCommentWriter = ({ start, end, prefix }) => { return { write: (lines) => { lib.dedent_lines(lines); for ( let i=0 ; i < lines.length ; i++ ) { lines[i] = prefix + lines[i]; } let s = start + '\n'; s += lines.join('\n') + '\n'; s += end + '\n'; return s; } }; }; const CommentParser = () => { const registry_ = { object: { parsers: { lines: LinesCommentParser, block: BlockCommentParser, }, writers: { lines: LinesCommentWriter, block: BlockCommentWriter, }, }, data: { extensions: { js: 'javascript', cjs: 'javascript', mjs: 'javascript', }, languages: { javascript: { parsers: [ ['lines', { prefix: '//', }], ['block', { start: '/*', end: '*/', ignore_line_prefix: '*', }], ], writers: { lines: ['lines', { prefix: '// ' }], block: ['block', { start: '/*', end: ' */', prefix: ' * ', }] }, } }, } }; const get_language_by_filename = ({ filename }) => { const { language } = (({ filename }) => { const { language_id } = (({ filename }) => { const { extension } = (({ filename }) => { const components = ('' + filename).split('.'); const extension = components[components.length - 1]; return { extension }; })({ filename }); const language_id = registry_.data.extensions[extension]; if ( ! language_id ) { throw new Error(`unrecognized language id: ` + language_id); } return { language_id }; })({ filename }); const language = registry_.data.languages[language_id]; return { language }; })({ filename }); if ( ! language ) { // TODO: use strutil quot here throw new Error(`unrecognized language: ${language}`) } return { language }; } const supports = ({ filename }) => { try { get_language_by_filename({ filename }); } catch (e) { return false; } return true; }; const extract_top_comments = async ({ filename, source }) => { const { language } = get_language_by_filename({ filename }); // TODO: registry has `data` and `object`... // ... maybe add `virt` (virtual), which will // behave in the way the above code is written. const inst_ = spec => registry_.object.parsers[spec[0]](spec[1]); let ss = StringStream(source); const results = []; for (;;) { let comment; for ( let parser of language.parsers ) { const parser_name = parser[0]; parser = inst_(parser); const ss_ = ss.fork(); const start_pos = await ss_.get_pos(); comment = await parser.parse(ss_); const end_pos = await ss_.get_pos(); if ( comment ) { ss = ss_; comment.type = parser_name; comment.range = [start_pos, end_pos]; break; } } // console.log('comment?', comment); if ( ! comment ) break; results.push(comment); } return results; } const output_comment = ({ filename, style, text }) => { const { language } = get_language_by_filename({ filename }); const inst_ = spec => registry_.object.writers[spec[0]](spec[1]); let writer = language.writers[style]; writer = inst_(writer); const lines = text.split('\n'); const s = writer.write(lines); return s; } return { supports, extract_top_comments, output_comment, }; }; module.exports = { StringStream, LinesCommentParser, BlockCommentParser, CommentParser, };