Merge pull request #423 from AtkinsSJ/parsely

Tidy up "newparser" so it can be used properly
This commit is contained in:
Eric Dubé 2024-05-30 12:42:03 -04:00 committed by GitHub
commit 895358e6e5
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
7 changed files with 252 additions and 177 deletions

View File

@ -1,46 +0,0 @@
import { Parser, UNRECOGNIZED, VALUE } from '../lib.js';
export class Literal extends Parser {
_create (value) {
this.value = value;
}
_parse (stream) {
const subStream = stream.fork();
for ( let i=0 ; i < this.value.length ; i++ ) {
let { done, value } = subStream.next();
if ( done ) return UNRECOGNIZED;
if ( this.value[i] !== value ) return UNRECOGNIZED;
}
stream.join(subStream);
return { status: VALUE, $: 'literal', value: this.value };
}
}
export class StringOf extends Parser {
_create (values) {
this.values = values;
}
_parse (stream) {
const subStream = stream.fork();
let text = '';
while (true) {
let { done, value } = subStream.look();
if ( done ) break;
if ( ! this.values.includes(value) ) break;
subStream.next();
text += value;
}
if (text.length === 0) {
return UNRECOGNIZED;
}
stream.join(subStream);
return { status: VALUE, $: 'stringOf', value: text };
}
}

View File

@ -1,31 +1,6 @@
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from './lib.js';
import { Discard, FirstMatch, None, Optional, Repeat, Sequence } from './parsers/combinators.js';
import { Literal, StringOf } from './parsers/terminals.js';
class Symbol extends Parser {
_create(symbolName) {
this.symbolName = symbolName;
}
_parse (stream) {
const parser = this.symbol_registry[this.symbolName];
if ( ! parser ) {
throw new Error(`No symbol defined named '${this.symbolName}'`);
}
const subStream = stream.fork();
const result = parser.parse(subStream);
console.log(`Result of parsing symbol('${this.symbolName}'):`, result);
if ( result.status === UNRECOGNIZED ) {
return UNRECOGNIZED;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
stream.join(subStream);
result.$ = this.symbolName;
return result;
}
}
import { adapt_parser, VALUE } from './parser.js';
import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js';
import { Literal, None, StringOf, Symbol } from './parsers/terminals.js';
class ParserWithAction {
#parser;
@ -55,6 +30,12 @@ export class GrammarContext {
return new GrammarContext({...this.parsers, ...more_parsers});
}
/**
* Construct a parsing function for the given grammar.
* @param grammar An object of symbol-names to a DSL for parsing that symbol.
* @param actions An object of symbol-names to a function run to process the symbol after it has been parsed.
* @returns {function(*, *, {must_consume_all_input?: boolean}=): *} A function to run the parser. Throws if parsing fails.
*/
define_parser (grammar, actions) {
const symbol_registry = {};
const api = {};
@ -76,12 +57,23 @@ export class GrammarContext {
}
}
return (stream, entry_symbol) => {
return (stream, entry_symbol, { must_consume_all_input = true } = {}) => {
const entry_parser = symbol_registry[entry_symbol];
if (!entry_parser) {
throw new Error(`Entry symbol '${entry_symbol}' not found in grammar.`);
}
return entry_parser.parse(stream);
const result = entry_parser.parse(stream);
if (result.status !== VALUE) {
throw new Error('Failed to parse input against grammar.');
}
// Ensure the entire stream is consumed.
if (must_consume_all_input && !stream.is_eof()) {
throw new Error('Parsing did not consume all input.');
}
return result;
};
}
}

View File

@ -4,6 +4,12 @@ export const UNRECOGNIZED = Symbol('unrecognized');
export const INVALID = Symbol('invalid');
export const VALUE = Symbol('value');
/**
* Base class for parsers.
* To implement your own, subclass it and define these methods:
* - _create(): Acts as the constructor
* - _parse(stream): Performs the parsing on the stream, and returns either UNRECOGNIZED, INVALID, or a result object.
*/
export class Parser {
result (o) {
if (o.value && o.value.$discard) {

View File

@ -1,5 +1,9 @@
import { INVALID, UNRECOGNIZED, VALUE, adapt_parser, Parser } from '../lib.js';
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
/**
* Runs its child parser, and discards its result.
* @param parser Child parser
*/
export class Discard extends Parser {
_create (parser) {
this.parser = adapt_parser(parser);
@ -19,6 +23,10 @@ export class Discard extends Parser {
}
}
/**
* Runs its child parsers in order, and returns the first successful result.
* @param parsers Child parsers
*/
export class FirstMatch extends Parser {
_create (...parsers) {
this.parsers = parsers.map(adapt_parser);
@ -42,14 +50,10 @@ export class FirstMatch extends Parser {
}
}
export class None extends Parser {
_create () {}
_parse (stream) {
return { status: VALUE, $: 'none', $discard: true };
}
}
/**
* Runs its child parser, and then returns its result, or nothing.
* @param parser Child parser
*/
export class Optional extends Parser {
_create (parser) {
this.parser = adapt_parser(parser);
@ -66,6 +70,12 @@ export class Optional extends Parser {
}
}
/**
* Parses a repeated sequence of values with separators between them.
* @param value_parser Parser for the value
* @param separator_parser Parser for the separator, optional
* @param trailing Whether to allow a trailing separator
*/
export class Repeat extends Parser {
_create (value_parser, separator_parser, { trailing = false } = {}) {
this.value_parser = adapt_parser(value_parser);
@ -75,45 +85,58 @@ export class Repeat extends Parser {
_parse (stream) {
const results = [];
for ( ;; ) {
const subStream = stream.fork();
const subStream = stream.fork();
// Value
const result = this.value_parser.parse(subStream);
if ( result.status === UNRECOGNIZED ) {
break;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
// Parse first value
const result = this.value_parser.parse(subStream);
if ( result.status === INVALID )
return { status: INVALID, value: result };
if ( result.status === VALUE ) {
stream.join(subStream);
if ( ! result.$discard ) results.push(result);
if (!result.$discard) results.push(result);
// Separator
if ( ! this.separator_parser ) {
continue;
}
const separatorResult = this.separator_parser.parse(subStream);
if ( separatorResult.status === UNRECOGNIZED ) {
break;
}
if ( separatorResult.status === INVALID ) {
return { status: INVALID, value: separatorResult };
}
stream.join(subStream);
if ( ! result.$discard ) results.push(separatorResult);
// Repeatedly parse <separator> <value>
for (;;) {
// Separator
if (!this.separator_parser)
continue;
// TODO: Detect trailing separator and reject it if trailing==false
const separatorResult = this.separator_parser.parse(subStream);
if (separatorResult.status === UNRECOGNIZED)
break;
if (separatorResult.status === INVALID)
return { status: INVALID, value: separatorResult };
stream.join(subStream);
if (!separatorResult.$discard) results.push(separatorResult);
// Value
const result = this.value_parser.parse(subStream);
if (result.status === UNRECOGNIZED) {
// If we failed to parse a value, we have a trailing separator
if (this.trailing === false)
return { status: INVALID, value: result };
break;
}
if (result.status === INVALID)
return { status: INVALID, value: result };
stream.join(subStream);
if (!result.$discard) results.push(result);
}
}
if ( results.length === 0 ) {
if ( results.length === 0 )
return UNRECOGNIZED;
}
return { status: VALUE, value: results };
}
}
/**
* Runs a sequence of child parsers, and returns their result as an array if they all succeed.
* @param parsers Child parsers
*/
export class Sequence extends Parser {
_create (...parsers) {
this.parsers = parsers.map(adapt_parser);

View File

@ -0,0 +1,93 @@
import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
/**
* Parses a literal value.
* @param value The value to parse
*/
export class Literal extends Parser {
_create (value) {
this.value = value;
}
_parse (stream) {
const subStream = stream.fork();
for ( let i=0 ; i < this.value.length ; i++ ) {
let { done, value } = subStream.next();
if ( done ) return UNRECOGNIZED;
if ( this.value[i] !== value ) return UNRECOGNIZED;
}
stream.join(subStream);
return { status: VALUE, $: 'literal', value: this.value };
}
}
/**
* Parses matching characters as a string.
* @param test Function that takes a character, and returns whether to include it.
*/
export class StringOf extends Parser {
_create (test) {
this.test = test;
}
_parse (stream) {
const subStream = stream.fork();
let text = '';
while (true) {
let { done, value } = subStream.look();
if ( done ) break;
if ( ! this.test(value) ) break;
subStream.next();
text += value;
}
if (text.length === 0) {
return UNRECOGNIZED;
}
stream.join(subStream);
return { status: VALUE, $: 'stringOf', value: text };
}
}
/**
* Parses an object defined by the symbol registry.
* @param symbolName The name of the symbol to parse.
*/
export class Symbol extends Parser {
_create(symbolName) {
this.symbolName = symbolName;
}
_parse (stream) {
const parser = this.symbol_registry[this.symbolName];
if ( ! parser ) {
throw new Error(`No symbol defined named '${this.symbolName}'`);
}
const subStream = stream.fork();
const result = parser.parse(subStream);
if ( result.status === UNRECOGNIZED ) {
return UNRECOGNIZED;
}
if ( result.status === INVALID ) {
return { status: INVALID, value: result };
}
stream.join(subStream);
result.$ = this.symbolName;
return result;
}
}
/**
* Does no parsing and returns a discarded result.
*/
export class None extends Parser {
_create () {}
_parse (stream) {
return { status: VALUE, $: 'none', $discard: true };
}
}

View File

@ -0,0 +1,52 @@
/**
* Base class for input streams.
* Defines which methods are expected for any stream implementations.
*/
export class ParserStream {
value_at (index) { throw new Error(`${this.constructor.name}.value_at() not implemented`); }
look () { throw new Error(`${this.constructor.name}.look() not implemented`); }
next () { throw new Error(`${this.constructor.name}.next() not implemented`); }
fork () { throw new Error(`${this.constructor.name}.fork() not implemented`); }
join () { throw new Error(`${this.constructor.name}.join() not implemented`); }
is_eof () {
return this.look().done;
}
}
/**
* ParserStream that takes a string, and processes it character by character.
*/
export class StringStream extends ParserStream {
constructor (str, startIndex = 0) {
super();
this.str = str;
this.i = startIndex;
}
value_at (index) {
if ( index >= this.str.length ) {
return { done: true, value: undefined };
}
return { done: false, value: this.str[index] };
}
look () {
return this.value_at(this.i);
}
next () {
const result = this.value_at(this.i);
this.i++;
return result;
}
fork () {
return new StringStream(this.str, this.i);
}
join (forked) {
this.i = forked.i;
}
}

View File

@ -1,5 +1,6 @@
import { GrammarContext, standard_parsers } from '../../../packages/newparser/exports.js';
import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/newparser/lib.js';
import { GrammarContext, standard_parsers } from '../../../packages/parsely/exports.js';
import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/parsely/parser.js';
import { StringStream } from '../../../packages/parsely/streams.js';
class NumberParser extends Parser {
static data = {
@ -163,39 +164,6 @@ class StringParser extends Parser {
}
}
class StringStream {
constructor (str, startIndex = 0) {
this.str = str;
this.i = startIndex;
}
value_at (index) {
if ( index >= this.str.length ) {
return { done: true, value: undefined };
}
return { done: false, value: this.str[index] };
}
look () {
return this.value_at(this.i);
}
next () {
const result = this.value_at(this.i);
this.i++;
return result;
}
fork () {
return new StringStream(this.str, this.i);
}
join (forked) {
this.i = forked.i;
}
}
export default {
name: 'concept-parser',
args: {
@ -204,15 +172,13 @@ export default {
},
execute: async ctx => {
const { in_, out, err } = ctx.externs;
await out.write("STARTING CONCEPT PARSER\n");
const grammar_context = new GrammarContext(standard_parsers());
await out.write("Constructed a grammar context\n");
const parser = grammar_context.define_parser({
element: a => a.sequence(
a.symbol('whitespace'),
a.optional(a.symbol('whitespace')),
a.symbol('value'),
a.symbol('whitespace'),
a.optional(a.symbol('whitespace')),
),
value: a => a.firstMatch(
a.symbol('object'),
@ -225,37 +191,33 @@ export default {
),
array: a => a.sequence(
a.literal('['),
a.symbol('whitespace'),
a.optional(
a.firstMatch(
a.repeat(
a.symbol('element'),
a.literal(','),
{ trailing: true },
{ trailing: false },
),
a.optional(a.symbol('whitespace')),
),
a.symbol('whitespace'),
a.literal(']'),
),
member: a => a.sequence(
a.symbol('whitespace'),
a.optional(a.symbol('whitespace')),
a.symbol('string'),
a.symbol('whitespace'),
a.optional(a.symbol('whitespace')),
a.literal(':'),
a.symbol('whitespace'),
a.symbol('value'),
a.symbol('whitespace'),
a.symbol('element'),
),
object: a => a.sequence(
a.literal('{'),
a.symbol('whitespace'),
a.optional(
a.firstMatch(
a.repeat(
a.symbol('member'),
a.literal(','),
{ trailing: true },
{ trailing: false },
),
a.optional(a.symbol('whitespace')),
),
a.symbol('whitespace'),
a.literal('}'),
),
true: a => a.literal('true'),
@ -263,37 +225,31 @@ export default {
null: a => a.literal('null'),
number: a => new NumberParser(),
string: a => new StringParser(),
whitespace: a => a.optional(
a.stringOf(' \r\n\t'.split('')),
),
whitespace: a => a.stringOf(c => ' \r\n\t'.includes(c)),
}, {
element: it => it[0].value,
element: it => it.filter(it => it.$ === 'value')[0].value,
value: it => it,
array: it => {
// A parsed array contains 3 values: `[`, the entries array, and `]`, so we only care about index 1.
// If it's less than 3, there were no entries.
if (it.length < 3) return [];
return (it[1].value || [])
.filter(it => it.$ !== 'literal')
.filter(it => it.$ === 'element')
.map(it => it.value);
},
member: it => {
// A parsed member contains 3 values: a name, `:`, and a value.
const [ name_part, colon, value_part ] = it;
const [ name_part, value_part ] = it.filter(it => it.$ === 'string' || it.$ === 'element');
return { name: name_part.value, value: value_part.value };
},
object: it => {
console.log('OBJECT!!!!');
console.log(it[1]);
// A parsed object contains 3 values: `{`, the members array, and `}`, so we only care about index 1.
// If it's less than 3, there were no members.
if (it.length < 3) return {};
const result = {};
// FIXME: This is all wrong!!!
(it[1].value || [])
.filter(it => it.$ === 'member')
.forEach(it => {
result[it.name] = it.value;
result[it.value.name] = it.value.value;
});
return result;
},
@ -305,7 +261,6 @@ export default {
whitespace: _ => {},
});
// TODO: What do we want our streams to be like?
const input = ctx.locals.positionals.shift();
const stream = new StringStream(input);
try {
@ -317,4 +272,4 @@ export default {
await err.write(e.stack + '\n');
}
}
}
}