diff --git a/packages/phoenix/packages/newparser/parsers/terminals.js b/packages/phoenix/packages/newparser/parsers/terminals.js deleted file mode 100644 index 4a82cd14..00000000 --- a/packages/phoenix/packages/newparser/parsers/terminals.js +++ /dev/null @@ -1,46 +0,0 @@ -import { Parser, UNRECOGNIZED, VALUE } from '../lib.js'; - -export class Literal extends Parser { - _create (value) { - this.value = value; - } - - _parse (stream) { - const subStream = stream.fork(); - for ( let i=0 ; i < this.value.length ; i++ ) { - let { done, value } = subStream.next(); - if ( done ) return UNRECOGNIZED; - if ( this.value[i] !== value ) return UNRECOGNIZED; - } - - stream.join(subStream); - return { status: VALUE, $: 'literal', value: this.value }; - } -} - -export class StringOf extends Parser { - _create (values) { - this.values = values; - } - - _parse (stream) { - const subStream = stream.fork(); - let text = ''; - - while (true) { - let { done, value } = subStream.look(); - if ( done ) break; - if ( ! this.values.includes(value) ) break; - - subStream.next(); - text += value; - } - - if (text.length === 0) { - return UNRECOGNIZED; - } - - stream.join(subStream); - return { status: VALUE, $: 'stringOf', value: text }; - } -} \ No newline at end of file diff --git a/packages/phoenix/packages/newparser/exports.js b/packages/phoenix/packages/parsely/exports.js similarity index 62% rename from packages/phoenix/packages/newparser/exports.js rename to packages/phoenix/packages/parsely/exports.js index dfab2185..b729a589 100644 --- a/packages/phoenix/packages/newparser/exports.js +++ b/packages/phoenix/packages/parsely/exports.js @@ -1,31 +1,6 @@ -import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from './lib.js'; -import { Discard, FirstMatch, None, Optional, Repeat, Sequence } from './parsers/combinators.js'; -import { Literal, StringOf } from './parsers/terminals.js'; - -class Symbol extends Parser { - _create(symbolName) { - this.symbolName = symbolName; - } - - _parse (stream) { - const parser = this.symbol_registry[this.symbolName]; - if ( ! parser ) { - throw new Error(`No symbol defined named '${this.symbolName}'`); - } - const subStream = stream.fork(); - const result = parser.parse(subStream); - console.log(`Result of parsing symbol('${this.symbolName}'):`, result); - if ( result.status === UNRECOGNIZED ) { - return UNRECOGNIZED; - } - if ( result.status === INVALID ) { - return { status: INVALID, value: result }; - } - stream.join(subStream); - result.$ = this.symbolName; - return result; - } -} +import { adapt_parser, VALUE } from './parser.js'; +import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js'; +import { Literal, None, StringOf, Symbol } from './parsers/terminals.js'; class ParserWithAction { #parser; @@ -55,6 +30,12 @@ export class GrammarContext { return new GrammarContext({...this.parsers, ...more_parsers}); } + /** + * Construct a parsing function for the given grammar. + * @param grammar An object of symbol-names to a DSL for parsing that symbol. + * @param actions An object of symbol-names to a function run to process the symbol after it has been parsed. + * @returns {function(*, *, {must_consume_all_input?: boolean}=): *} A function to run the parser. Throws if parsing fails. + */ define_parser (grammar, actions) { const symbol_registry = {}; const api = {}; @@ -76,12 +57,23 @@ export class GrammarContext { } } - return (stream, entry_symbol) => { + return (stream, entry_symbol, { must_consume_all_input = true } = {}) => { const entry_parser = symbol_registry[entry_symbol]; if (!entry_parser) { throw new Error(`Entry symbol '${entry_symbol}' not found in grammar.`); } - return entry_parser.parse(stream); + const result = entry_parser.parse(stream); + + if (result.status !== VALUE) { + throw new Error('Failed to parse input against grammar.'); + } + + // Ensure the entire stream is consumed. + if (must_consume_all_input && !stream.is_eof()) { + throw new Error('Parsing did not consume all input.'); + } + + return result; }; } } diff --git a/packages/phoenix/packages/newparser/lib.js b/packages/phoenix/packages/parsely/parser.js similarity index 75% rename from packages/phoenix/packages/newparser/lib.js rename to packages/phoenix/packages/parsely/parser.js index 89f185e0..c588ccf1 100644 --- a/packages/phoenix/packages/newparser/lib.js +++ b/packages/phoenix/packages/parsely/parser.js @@ -4,6 +4,12 @@ export const UNRECOGNIZED = Symbol('unrecognized'); export const INVALID = Symbol('invalid'); export const VALUE = Symbol('value'); +/** + * Base class for parsers. + * To implement your own, subclass it and define these methods: + * - _create(): Acts as the constructor + * - _parse(stream): Performs the parsing on the stream, and returns either UNRECOGNIZED, INVALID, or a result object. + */ export class Parser { result (o) { if (o.value && o.value.$discard) { diff --git a/packages/phoenix/packages/newparser/parsers/combinators.js b/packages/phoenix/packages/parsely/parsers/combinators.js similarity index 53% rename from packages/phoenix/packages/newparser/parsers/combinators.js rename to packages/phoenix/packages/parsely/parsers/combinators.js index 54b23c05..b597e857 100644 --- a/packages/phoenix/packages/newparser/parsers/combinators.js +++ b/packages/phoenix/packages/parsely/parsers/combinators.js @@ -1,5 +1,9 @@ -import { INVALID, UNRECOGNIZED, VALUE, adapt_parser, Parser } from '../lib.js'; +import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js'; +/** + * Runs its child parser, and discards its result. + * @param parser Child parser + */ export class Discard extends Parser { _create (parser) { this.parser = adapt_parser(parser); @@ -19,6 +23,10 @@ export class Discard extends Parser { } } +/** + * Runs its child parsers in order, and returns the first successful result. + * @param parsers Child parsers + */ export class FirstMatch extends Parser { _create (...parsers) { this.parsers = parsers.map(adapt_parser); @@ -42,14 +50,10 @@ export class FirstMatch extends Parser { } } -export class None extends Parser { - _create () {} - - _parse (stream) { - return { status: VALUE, $: 'none', $discard: true }; - } -} - +/** + * Runs its child parser, and then returns its result, or nothing. + * @param parser Child parser + */ export class Optional extends Parser { _create (parser) { this.parser = adapt_parser(parser); @@ -66,6 +70,12 @@ export class Optional extends Parser { } } +/** + * Parses a repeated sequence of values with separators between them. + * @param value_parser Parser for the value + * @param separator_parser Parser for the separator, optional + * @param trailing Whether to allow a trailing separator + */ export class Repeat extends Parser { _create (value_parser, separator_parser, { trailing = false } = {}) { this.value_parser = adapt_parser(value_parser); @@ -75,45 +85,58 @@ export class Repeat extends Parser { _parse (stream) { const results = []; - for ( ;; ) { - const subStream = stream.fork(); + const subStream = stream.fork(); - // Value - const result = this.value_parser.parse(subStream); - if ( result.status === UNRECOGNIZED ) { - break; - } - if ( result.status === INVALID ) { - return { status: INVALID, value: result }; - } + // Parse first value + const result = this.value_parser.parse(subStream); + if ( result.status === INVALID ) + return { status: INVALID, value: result }; + + if ( result.status === VALUE ) { stream.join(subStream); - if ( ! result.$discard ) results.push(result); + if (!result.$discard) results.push(result); - // Separator - if ( ! this.separator_parser ) { - continue; - } - const separatorResult = this.separator_parser.parse(subStream); - if ( separatorResult.status === UNRECOGNIZED ) { - break; - } - if ( separatorResult.status === INVALID ) { - return { status: INVALID, value: separatorResult }; - } - stream.join(subStream); - if ( ! result.$discard ) results.push(separatorResult); + // Repeatedly parse + for (;;) { + // Separator + if (!this.separator_parser) + continue; - // TODO: Detect trailing separator and reject it if trailing==false + const separatorResult = this.separator_parser.parse(subStream); + if (separatorResult.status === UNRECOGNIZED) + break; + if (separatorResult.status === INVALID) + return { status: INVALID, value: separatorResult }; + stream.join(subStream); + if (!separatorResult.$discard) results.push(separatorResult); + + // Value + const result = this.value_parser.parse(subStream); + if (result.status === UNRECOGNIZED) { + // If we failed to parse a value, we have a trailing separator + if (this.trailing === false) + return { status: INVALID, value: result }; + break; + } + if (result.status === INVALID) + return { status: INVALID, value: result }; + + stream.join(subStream); + if (!result.$discard) results.push(result); + } } - if ( results.length === 0 ) { + if ( results.length === 0 ) return UNRECOGNIZED; - } return { status: VALUE, value: results }; } } +/** + * Runs a sequence of child parsers, and returns their result as an array if they all succeed. + * @param parsers Child parsers + */ export class Sequence extends Parser { _create (...parsers) { this.parsers = parsers.map(adapt_parser); diff --git a/packages/phoenix/packages/parsely/parsers/terminals.js b/packages/phoenix/packages/parsely/parsers/terminals.js new file mode 100644 index 00000000..10936540 --- /dev/null +++ b/packages/phoenix/packages/parsely/parsers/terminals.js @@ -0,0 +1,93 @@ +import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js'; + +/** + * Parses a literal value. + * @param value The value to parse + */ +export class Literal extends Parser { + _create (value) { + this.value = value; + } + + _parse (stream) { + const subStream = stream.fork(); + for ( let i=0 ; i < this.value.length ; i++ ) { + let { done, value } = subStream.next(); + if ( done ) return UNRECOGNIZED; + if ( this.value[i] !== value ) return UNRECOGNIZED; + } + + stream.join(subStream); + return { status: VALUE, $: 'literal', value: this.value }; + } +} + +/** + * Parses matching characters as a string. + * @param test Function that takes a character, and returns whether to include it. + */ +export class StringOf extends Parser { + _create (test) { + this.test = test; + } + + _parse (stream) { + const subStream = stream.fork(); + let text = ''; + + while (true) { + let { done, value } = subStream.look(); + if ( done ) break; + if ( ! this.test(value) ) break; + + subStream.next(); + text += value; + } + + if (text.length === 0) { + return UNRECOGNIZED; + } + + stream.join(subStream); + return { status: VALUE, $: 'stringOf', value: text }; + } +} + +/** + * Parses an object defined by the symbol registry. + * @param symbolName The name of the symbol to parse. + */ +export class Symbol extends Parser { + _create(symbolName) { + this.symbolName = symbolName; + } + + _parse (stream) { + const parser = this.symbol_registry[this.symbolName]; + if ( ! parser ) { + throw new Error(`No symbol defined named '${this.symbolName}'`); + } + const subStream = stream.fork(); + const result = parser.parse(subStream); + if ( result.status === UNRECOGNIZED ) { + return UNRECOGNIZED; + } + if ( result.status === INVALID ) { + return { status: INVALID, value: result }; + } + stream.join(subStream); + result.$ = this.symbolName; + return result; + } +} + +/** + * Does no parsing and returns a discarded result. + */ +export class None extends Parser { + _create () {} + + _parse (stream) { + return { status: VALUE, $: 'none', $discard: true }; + } +} diff --git a/packages/phoenix/packages/parsely/streams.js b/packages/phoenix/packages/parsely/streams.js new file mode 100644 index 00000000..23254b62 --- /dev/null +++ b/packages/phoenix/packages/parsely/streams.js @@ -0,0 +1,52 @@ +/** + * Base class for input streams. + * Defines which methods are expected for any stream implementations. + */ +export class ParserStream { + value_at (index) { throw new Error(`${this.constructor.name}.value_at() not implemented`); } + look () { throw new Error(`${this.constructor.name}.look() not implemented`); } + next () { throw new Error(`${this.constructor.name}.next() not implemented`); } + fork () { throw new Error(`${this.constructor.name}.fork() not implemented`); } + join () { throw new Error(`${this.constructor.name}.join() not implemented`); } + + is_eof () { + return this.look().done; + } +} + +/** + * ParserStream that takes a string, and processes it character by character. + */ +export class StringStream extends ParserStream { + constructor (str, startIndex = 0) { + super(); + this.str = str; + this.i = startIndex; + } + + value_at (index) { + if ( index >= this.str.length ) { + return { done: true, value: undefined }; + } + + return { done: false, value: this.str[index] }; + } + + look () { + return this.value_at(this.i); + } + + next () { + const result = this.value_at(this.i); + this.i++; + return result; + } + + fork () { + return new StringStream(this.str, this.i); + } + + join (forked) { + this.i = forked.i; + } +} diff --git a/packages/phoenix/src/puter-shell/coreutils/concept-parser.js b/packages/phoenix/src/puter-shell/coreutils/concept-parser.js index 31888220..968c3928 100644 --- a/packages/phoenix/src/puter-shell/coreutils/concept-parser.js +++ b/packages/phoenix/src/puter-shell/coreutils/concept-parser.js @@ -1,5 +1,6 @@ -import { GrammarContext, standard_parsers } from '../../../packages/newparser/exports.js'; -import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/newparser/lib.js'; +import { GrammarContext, standard_parsers } from '../../../packages/parsely/exports.js'; +import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/parsely/parser.js'; +import { StringStream } from '../../../packages/parsely/streams.js'; class NumberParser extends Parser { static data = { @@ -163,39 +164,6 @@ class StringParser extends Parser { } } -class StringStream { - constructor (str, startIndex = 0) { - this.str = str; - this.i = startIndex; - } - - value_at (index) { - if ( index >= this.str.length ) { - return { done: true, value: undefined }; - } - - return { done: false, value: this.str[index] }; - } - - look () { - return this.value_at(this.i); - } - - next () { - const result = this.value_at(this.i); - this.i++; - return result; - } - - fork () { - return new StringStream(this.str, this.i); - } - - join (forked) { - this.i = forked.i; - } -} - export default { name: 'concept-parser', args: { @@ -204,15 +172,13 @@ export default { }, execute: async ctx => { const { in_, out, err } = ctx.externs; - await out.write("STARTING CONCEPT PARSER\n"); const grammar_context = new GrammarContext(standard_parsers()); - await out.write("Constructed a grammar context\n"); const parser = grammar_context.define_parser({ element: a => a.sequence( - a.symbol('whitespace'), + a.optional(a.symbol('whitespace')), a.symbol('value'), - a.symbol('whitespace'), + a.optional(a.symbol('whitespace')), ), value: a => a.firstMatch( a.symbol('object'), @@ -225,37 +191,33 @@ export default { ), array: a => a.sequence( a.literal('['), - a.symbol('whitespace'), - a.optional( + a.firstMatch( a.repeat( a.symbol('element'), a.literal(','), - { trailing: true }, + { trailing: false }, ), + a.optional(a.symbol('whitespace')), ), - a.symbol('whitespace'), a.literal(']'), ), member: a => a.sequence( - a.symbol('whitespace'), + a.optional(a.symbol('whitespace')), a.symbol('string'), - a.symbol('whitespace'), + a.optional(a.symbol('whitespace')), a.literal(':'), - a.symbol('whitespace'), - a.symbol('value'), - a.symbol('whitespace'), + a.symbol('element'), ), object: a => a.sequence( a.literal('{'), - a.symbol('whitespace'), - a.optional( + a.firstMatch( a.repeat( a.symbol('member'), a.literal(','), - { trailing: true }, + { trailing: false }, ), + a.optional(a.symbol('whitespace')), ), - a.symbol('whitespace'), a.literal('}'), ), true: a => a.literal('true'), @@ -263,37 +225,31 @@ export default { null: a => a.literal('null'), number: a => new NumberParser(), string: a => new StringParser(), - whitespace: a => a.optional( - a.stringOf(' \r\n\t'.split('')), - ), + whitespace: a => a.stringOf(c => ' \r\n\t'.includes(c)), }, { - element: it => it[0].value, + element: it => it.filter(it => it.$ === 'value')[0].value, value: it => it, array: it => { // A parsed array contains 3 values: `[`, the entries array, and `]`, so we only care about index 1. // If it's less than 3, there were no entries. if (it.length < 3) return []; return (it[1].value || []) - .filter(it => it.$ !== 'literal') + .filter(it => it.$ === 'element') .map(it => it.value); }, member: it => { - // A parsed member contains 3 values: a name, `:`, and a value. - const [ name_part, colon, value_part ] = it; + const [ name_part, value_part ] = it.filter(it => it.$ === 'string' || it.$ === 'element'); return { name: name_part.value, value: value_part.value }; }, object: it => { - console.log('OBJECT!!!!'); - console.log(it[1]); // A parsed object contains 3 values: `{`, the members array, and `}`, so we only care about index 1. // If it's less than 3, there were no members. if (it.length < 3) return {}; const result = {}; - // FIXME: This is all wrong!!! (it[1].value || []) .filter(it => it.$ === 'member') .forEach(it => { - result[it.name] = it.value; + result[it.value.name] = it.value.value; }); return result; }, @@ -305,7 +261,6 @@ export default { whitespace: _ => {}, }); - // TODO: What do we want our streams to be like? const input = ctx.locals.positionals.shift(); const stream = new StringStream(input); try { @@ -317,4 +272,4 @@ export default { await err.write(e.stack + '\n'); } } -} \ No newline at end of file +}