puter/tools/comment-parser/main.js
2024-07-10 00:41:08 -04:00

346 lines
10 KiB
JavaScript

const lib = {};
lib.dedent_lines = lines => {
// If any lines are just spaces, remove the spaces
for ( let i=0 ; i < lines.length ; i++ ) {
if ( /^\s+$/.test(lines[i]) ) lines[i] = '';
}
// Remove leading and trailing blanks
while ( lines[0] === '' ) lines.shift();
while ( lines[lines.length-1] === '' ) lines.pop();
let min_indent = Number.MAX_SAFE_INTEGER;
for ( let i=0 ; i < lines.length ; i++ ) {
if ( lines[i] === '' ) continue;
let n_spaces = 0;
for ( let j=0 ; j < lines[i].length ; j++ ) {
if ( lines[i][j] === ' ' ) n_spaces++;
else break;
}
if ( n_spaces < min_indent ) min_indent = n_spaces;
}
for ( let i=0 ; i < lines.length ; i++ ) {
if ( lines[i] === '' ) continue;
lines[i] = lines[i].slice(min_indent);
}
};
const StringStream = (str, { state_ } = {}) => {
const state = state_ ?? { pos: 0 };
return {
skip_whitespace () {
while ( /^\s/.test(str[state.pos]) ) state.pos++;
},
// INCOMPLETE: only handles single chars
skip_matching (items) {
while ( items.some(item => {
return str[state.pos] === item;
}) ) state.pos++;
},
fwd (amount) {
state.pos += amount ?? 1;
},
fork () {
return StringStream(str, { state_: { pos: state.pos } });
},
async get_pos () {
return state.pos;
},
async get_char () {
return str[state.pos];
},
async matches (re_or_lit) {
if ( re_or_lit instanceof RegExp ) {
const re = re_or_lit;
return re.test(str.slice(state.pos));
}
const lit = re_or_lit;
return lit === str.slice(state.pos, state.pos + lit.length);
},
async get_until (re_or_lit) {
let index;
if ( re_or_lit instanceof RegExp ) {
const re = re_or_lit;
const result = re.exec(str.slice(state.pos));
if ( ! result ) return;
index = state.pos + result.index;
} else {
const lit = re_or_lit;
const ind = str.slice(state.pos).indexOf(lit);
// TODO: parser warnings?
if ( ind === -1 ) return;
index = state.pos + ind;
}
const start_pos = state.pos;
state.pos = index;
return str.slice(start_pos, index);
},
async debug () {
const l1 = str.length;
const l2 = str.length - state.pos;
const clean = s => s.replace(/\n/, '{LF}');
return `[stream : "${
clean(str.slice(0, Math.min(6, l1)))
}"... |${state.pos}| ..."${
clean(str.slice(state.pos, state.pos + Math.min(6, l2)))
}"]`
}
};
};
const LinesCommentParser = ({
prefix
}) => {
return {
parse: async (stream) => {
stream.skip_whitespace();
const lines = [];
while ( await stream.matches(prefix) ) {
const line = await stream.get_until('\n');
if ( ! line ) return;
lines.push(line);
stream.fwd();
stream.skip_matching([' ', '\t']);
if ( await stream.get_char() === '\n' ){
stream.fwd();
break;
}
stream.skip_whitespace();
}
if ( lines.length === 0 ) return;
for ( let i=0 ; i < lines.length ; i++ ) {
lines[i] = lines[i].slice(prefix.length);
}
lib.dedent_lines(lines);
return {
lines,
};
}
};
};
const BlockCommentParser = ({
start,
end,
ignore_line_prefix,
}) => {
return {
parse: async (stream) => {
stream.skip_whitespace();
stream.debug('starting at', await stream.debug())
if ( ! stream.matches(start) ) return;
stream.fwd(start.length);
const contents = await stream.get_until(end);
if ( ! contents ) return;
stream.fwd(end.length);
// console.log('ending at', await stream.debug())
const lines = contents.split('\n');
// === Formatting Time! === //
// Special case: remove the last '*' after '/**'
if ( lines[0].trim() === ignore_line_prefix ) {
lines.shift();
}
// First dedent pass
lib.dedent_lines(lines);
// If all the lines start with asterisks, remove
let allofem = true;
for ( let i=0 ; i < lines.length ; i++ ) {
if ( lines[i] === '' ) continue;
if ( ! lines[i].startsWith(ignore_line_prefix) ) {
allofem = false;
break
}
}
if ( allofem ) {
for ( let i=0 ; i < lines.length ; i++ ) {
if ( lines[i] === '' ) continue;
lines[i] = lines[i].slice(ignore_line_prefix.length);
}
// Second dedent pass
lib.dedent_lines(lines);
}
return { lines };
}
};
};
const LinesCommentWriter = ({ prefix }) => {
return {
write: (lines) => {
lib.dedent_lines(lines);
for ( let i=0 ; i < lines.length ; i++ ) {
lines[i] = prefix + lines[i];
}
return lines.join('\n') + '\n';
}
};
};
const BlockCommentWriter = ({ start, end, prefix }) => {
return {
write: (lines) => {
lib.dedent_lines(lines);
for ( let i=0 ; i < lines.length ; i++ ) {
lines[i] = prefix + lines[i];
}
let s = start + '\n';
s += lines.join('\n') + '\n';
s += end + '\n';
return s;
}
};
};
const CommentParser = () => {
const registry_ = {
object: {
parsers: {
lines: LinesCommentParser,
block: BlockCommentParser,
},
writers: {
lines: LinesCommentWriter,
block: BlockCommentWriter,
},
},
data: {
extensions: {
js: 'javascript',
cjs: 'javascript',
mjs: 'javascript',
},
languages: {
javascript: {
parsers: [
['lines', {
prefix: '// ',
}],
['block', {
start: '/*',
end: '*/',
ignore_line_prefix: '*',
}],
],
writers: {
lines: ['lines', {
prefix: '//'
}],
block: ['block', {
start: '/*',
end: '*/',
prefix: ' * ',
}]
},
}
},
}
};
const get_language_by_filename = ({ filename }) => {
const { language } = (({ filename }) => {
const { language_id } = (({ filename }) => {
const { extension } = (({ filename }) => {
const components = ('' + filename).split('.');
const extension = components[components.length - 1];
return { extension };
})({ filename });
const language_id = registry_.data.extensions[extension];
if ( ! language_id ) {
throw new Error(`unrecognized language id: ` +
language_id);
}
return { language_id };
})({ filename });
const language = registry_.data.languages[language_id];
return { language };
})({ filename });
if ( ! language ) {
// TODO: use strutil quot here
throw new Error(`unrecognized language: ${language}`)
}
return { language };
}
const supports = ({ filename }) => {
try {
get_language_by_filename({ filename });
} catch (e) {
return false;
}
return true;
};
const extract_top_comments = async ({ filename, source }) => {
const { language } = get_language_by_filename({ filename });
// TODO: registry has `data` and `object`...
// ... maybe add `virt` (virtual), which will
// behave in the way the above code is written.
const inst_ = spec => registry_.object.parsers[spec[0]](spec[1]);
let ss = StringStream(source);
const results = [];
for (;;) {
let comment;
for ( let parser of language.parsers ) {
const parser_name = parser[0];
parser = inst_(parser);
const ss_ = ss.fork();
const start_pos = await ss_.get_pos();
comment = await parser.parse(ss_);
const end_pos = await ss_.get_pos();
if ( comment ) {
ss = ss_;
comment.type = parser_name;
comment.range = [start_pos, end_pos];
break;
}
}
if ( ! comment ) break;
results.push(comment);
}
return results;
}
const output_comment = ({ filename, style, text }) => {
const { language } = get_language_by_filename({ filename });
const inst_ = spec => registry_.object.writers[spec[0]](spec[1]);
let writer = language.writers[style];
writer = inst_(writer);
const lines = text.split('\n');
const s = writer.write(lines);
return s;
}
return {
supports,
extract_top_comments,
output_comment,
};
};
module.exports = {
StringStream,
LinesCommentParser,
BlockCommentParser,
CommentParser,
};