mirror of
https://github.com/HeyPuter/puter
synced 2024-11-14 22:06:00 +00:00
Merge pull request #423 from AtkinsSJ/parsely
Tidy up "newparser" so it can be used properly
This commit is contained in:
commit
895358e6e5
@ -1,46 +0,0 @@
|
||||
import { Parser, UNRECOGNIZED, VALUE } from '../lib.js';
|
||||
|
||||
export class Literal extends Parser {
|
||||
_create (value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const subStream = stream.fork();
|
||||
for ( let i=0 ; i < this.value.length ; i++ ) {
|
||||
let { done, value } = subStream.next();
|
||||
if ( done ) return UNRECOGNIZED;
|
||||
if ( this.value[i] !== value ) return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
stream.join(subStream);
|
||||
return { status: VALUE, $: 'literal', value: this.value };
|
||||
}
|
||||
}
|
||||
|
||||
export class StringOf extends Parser {
|
||||
_create (values) {
|
||||
this.values = values;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const subStream = stream.fork();
|
||||
let text = '';
|
||||
|
||||
while (true) {
|
||||
let { done, value } = subStream.look();
|
||||
if ( done ) break;
|
||||
if ( ! this.values.includes(value) ) break;
|
||||
|
||||
subStream.next();
|
||||
text += value;
|
||||
}
|
||||
|
||||
if (text.length === 0) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
stream.join(subStream);
|
||||
return { status: VALUE, $: 'stringOf', value: text };
|
||||
}
|
||||
}
|
@ -1,31 +1,6 @@
|
||||
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from './lib.js';
|
||||
import { Discard, FirstMatch, None, Optional, Repeat, Sequence } from './parsers/combinators.js';
|
||||
import { Literal, StringOf } from './parsers/terminals.js';
|
||||
|
||||
class Symbol extends Parser {
|
||||
_create(symbolName) {
|
||||
this.symbolName = symbolName;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const parser = this.symbol_registry[this.symbolName];
|
||||
if ( ! parser ) {
|
||||
throw new Error(`No symbol defined named '${this.symbolName}'`);
|
||||
}
|
||||
const subStream = stream.fork();
|
||||
const result = parser.parse(subStream);
|
||||
console.log(`Result of parsing symbol('${this.symbolName}'):`, result);
|
||||
if ( result.status === UNRECOGNIZED ) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
if ( result.status === INVALID ) {
|
||||
return { status: INVALID, value: result };
|
||||
}
|
||||
stream.join(subStream);
|
||||
result.$ = this.symbolName;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
import { adapt_parser, VALUE } from './parser.js';
|
||||
import { Discard, FirstMatch, Optional, Repeat, Sequence } from './parsers/combinators.js';
|
||||
import { Literal, None, StringOf, Symbol } from './parsers/terminals.js';
|
||||
|
||||
class ParserWithAction {
|
||||
#parser;
|
||||
@ -55,6 +30,12 @@ export class GrammarContext {
|
||||
return new GrammarContext({...this.parsers, ...more_parsers});
|
||||
}
|
||||
|
||||
/**
|
||||
* Construct a parsing function for the given grammar.
|
||||
* @param grammar An object of symbol-names to a DSL for parsing that symbol.
|
||||
* @param actions An object of symbol-names to a function run to process the symbol after it has been parsed.
|
||||
* @returns {function(*, *, {must_consume_all_input?: boolean}=): *} A function to run the parser. Throws if parsing fails.
|
||||
*/
|
||||
define_parser (grammar, actions) {
|
||||
const symbol_registry = {};
|
||||
const api = {};
|
||||
@ -76,12 +57,23 @@ export class GrammarContext {
|
||||
}
|
||||
}
|
||||
|
||||
return (stream, entry_symbol) => {
|
||||
return (stream, entry_symbol, { must_consume_all_input = true } = {}) => {
|
||||
const entry_parser = symbol_registry[entry_symbol];
|
||||
if (!entry_parser) {
|
||||
throw new Error(`Entry symbol '${entry_symbol}' not found in grammar.`);
|
||||
}
|
||||
return entry_parser.parse(stream);
|
||||
const result = entry_parser.parse(stream);
|
||||
|
||||
if (result.status !== VALUE) {
|
||||
throw new Error('Failed to parse input against grammar.');
|
||||
}
|
||||
|
||||
// Ensure the entire stream is consumed.
|
||||
if (must_consume_all_input && !stream.is_eof()) {
|
||||
throw new Error('Parsing did not consume all input.');
|
||||
}
|
||||
|
||||
return result;
|
||||
};
|
||||
}
|
||||
}
|
@ -4,6 +4,12 @@ export const UNRECOGNIZED = Symbol('unrecognized');
|
||||
export const INVALID = Symbol('invalid');
|
||||
export const VALUE = Symbol('value');
|
||||
|
||||
/**
|
||||
* Base class for parsers.
|
||||
* To implement your own, subclass it and define these methods:
|
||||
* - _create(): Acts as the constructor
|
||||
* - _parse(stream): Performs the parsing on the stream, and returns either UNRECOGNIZED, INVALID, or a result object.
|
||||
*/
|
||||
export class Parser {
|
||||
result (o) {
|
||||
if (o.value && o.value.$discard) {
|
@ -1,5 +1,9 @@
|
||||
import { INVALID, UNRECOGNIZED, VALUE, adapt_parser, Parser } from '../lib.js';
|
||||
import { adapt_parser, INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
|
||||
|
||||
/**
|
||||
* Runs its child parser, and discards its result.
|
||||
* @param parser Child parser
|
||||
*/
|
||||
export class Discard extends Parser {
|
||||
_create (parser) {
|
||||
this.parser = adapt_parser(parser);
|
||||
@ -19,6 +23,10 @@ export class Discard extends Parser {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs its child parsers in order, and returns the first successful result.
|
||||
* @param parsers Child parsers
|
||||
*/
|
||||
export class FirstMatch extends Parser {
|
||||
_create (...parsers) {
|
||||
this.parsers = parsers.map(adapt_parser);
|
||||
@ -42,14 +50,10 @@ export class FirstMatch extends Parser {
|
||||
}
|
||||
}
|
||||
|
||||
export class None extends Parser {
|
||||
_create () {}
|
||||
|
||||
_parse (stream) {
|
||||
return { status: VALUE, $: 'none', $discard: true };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs its child parser, and then returns its result, or nothing.
|
||||
* @param parser Child parser
|
||||
*/
|
||||
export class Optional extends Parser {
|
||||
_create (parser) {
|
||||
this.parser = adapt_parser(parser);
|
||||
@ -66,6 +70,12 @@ export class Optional extends Parser {
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses a repeated sequence of values with separators between them.
|
||||
* @param value_parser Parser for the value
|
||||
* @param separator_parser Parser for the separator, optional
|
||||
* @param trailing Whether to allow a trailing separator
|
||||
*/
|
||||
export class Repeat extends Parser {
|
||||
_create (value_parser, separator_parser, { trailing = false } = {}) {
|
||||
this.value_parser = adapt_parser(value_parser);
|
||||
@ -75,45 +85,58 @@ export class Repeat extends Parser {
|
||||
|
||||
_parse (stream) {
|
||||
const results = [];
|
||||
for ( ;; ) {
|
||||
const subStream = stream.fork();
|
||||
const subStream = stream.fork();
|
||||
|
||||
// Value
|
||||
const result = this.value_parser.parse(subStream);
|
||||
if ( result.status === UNRECOGNIZED ) {
|
||||
break;
|
||||
}
|
||||
if ( result.status === INVALID ) {
|
||||
return { status: INVALID, value: result };
|
||||
}
|
||||
// Parse first value
|
||||
const result = this.value_parser.parse(subStream);
|
||||
if ( result.status === INVALID )
|
||||
return { status: INVALID, value: result };
|
||||
|
||||
if ( result.status === VALUE ) {
|
||||
stream.join(subStream);
|
||||
if ( ! result.$discard ) results.push(result);
|
||||
if (!result.$discard) results.push(result);
|
||||
|
||||
// Separator
|
||||
if ( ! this.separator_parser ) {
|
||||
continue;
|
||||
}
|
||||
const separatorResult = this.separator_parser.parse(subStream);
|
||||
if ( separatorResult.status === UNRECOGNIZED ) {
|
||||
break;
|
||||
}
|
||||
if ( separatorResult.status === INVALID ) {
|
||||
return { status: INVALID, value: separatorResult };
|
||||
}
|
||||
stream.join(subStream);
|
||||
if ( ! result.$discard ) results.push(separatorResult);
|
||||
// Repeatedly parse <separator> <value>
|
||||
for (;;) {
|
||||
// Separator
|
||||
if (!this.separator_parser)
|
||||
continue;
|
||||
|
||||
// TODO: Detect trailing separator and reject it if trailing==false
|
||||
const separatorResult = this.separator_parser.parse(subStream);
|
||||
if (separatorResult.status === UNRECOGNIZED)
|
||||
break;
|
||||
if (separatorResult.status === INVALID)
|
||||
return { status: INVALID, value: separatorResult };
|
||||
stream.join(subStream);
|
||||
if (!separatorResult.$discard) results.push(separatorResult);
|
||||
|
||||
// Value
|
||||
const result = this.value_parser.parse(subStream);
|
||||
if (result.status === UNRECOGNIZED) {
|
||||
// If we failed to parse a value, we have a trailing separator
|
||||
if (this.trailing === false)
|
||||
return { status: INVALID, value: result };
|
||||
break;
|
||||
}
|
||||
if (result.status === INVALID)
|
||||
return { status: INVALID, value: result };
|
||||
|
||||
stream.join(subStream);
|
||||
if (!result.$discard) results.push(result);
|
||||
}
|
||||
}
|
||||
|
||||
if ( results.length === 0 ) {
|
||||
if ( results.length === 0 )
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
return { status: VALUE, value: results };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Runs a sequence of child parsers, and returns their result as an array if they all succeed.
|
||||
* @param parsers Child parsers
|
||||
*/
|
||||
export class Sequence extends Parser {
|
||||
_create (...parsers) {
|
||||
this.parsers = parsers.map(adapt_parser);
|
93
packages/phoenix/packages/parsely/parsers/terminals.js
Normal file
93
packages/phoenix/packages/parsely/parsers/terminals.js
Normal file
@ -0,0 +1,93 @@
|
||||
import { INVALID, Parser, UNRECOGNIZED, VALUE } from '../parser.js';
|
||||
|
||||
/**
|
||||
* Parses a literal value.
|
||||
* @param value The value to parse
|
||||
*/
|
||||
export class Literal extends Parser {
|
||||
_create (value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const subStream = stream.fork();
|
||||
for ( let i=0 ; i < this.value.length ; i++ ) {
|
||||
let { done, value } = subStream.next();
|
||||
if ( done ) return UNRECOGNIZED;
|
||||
if ( this.value[i] !== value ) return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
stream.join(subStream);
|
||||
return { status: VALUE, $: 'literal', value: this.value };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses matching characters as a string.
|
||||
* @param test Function that takes a character, and returns whether to include it.
|
||||
*/
|
||||
export class StringOf extends Parser {
|
||||
_create (test) {
|
||||
this.test = test;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const subStream = stream.fork();
|
||||
let text = '';
|
||||
|
||||
while (true) {
|
||||
let { done, value } = subStream.look();
|
||||
if ( done ) break;
|
||||
if ( ! this.test(value) ) break;
|
||||
|
||||
subStream.next();
|
||||
text += value;
|
||||
}
|
||||
|
||||
if (text.length === 0) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
|
||||
stream.join(subStream);
|
||||
return { status: VALUE, $: 'stringOf', value: text };
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Parses an object defined by the symbol registry.
|
||||
* @param symbolName The name of the symbol to parse.
|
||||
*/
|
||||
export class Symbol extends Parser {
|
||||
_create(symbolName) {
|
||||
this.symbolName = symbolName;
|
||||
}
|
||||
|
||||
_parse (stream) {
|
||||
const parser = this.symbol_registry[this.symbolName];
|
||||
if ( ! parser ) {
|
||||
throw new Error(`No symbol defined named '${this.symbolName}'`);
|
||||
}
|
||||
const subStream = stream.fork();
|
||||
const result = parser.parse(subStream);
|
||||
if ( result.status === UNRECOGNIZED ) {
|
||||
return UNRECOGNIZED;
|
||||
}
|
||||
if ( result.status === INVALID ) {
|
||||
return { status: INVALID, value: result };
|
||||
}
|
||||
stream.join(subStream);
|
||||
result.$ = this.symbolName;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Does no parsing and returns a discarded result.
|
||||
*/
|
||||
export class None extends Parser {
|
||||
_create () {}
|
||||
|
||||
_parse (stream) {
|
||||
return { status: VALUE, $: 'none', $discard: true };
|
||||
}
|
||||
}
|
52
packages/phoenix/packages/parsely/streams.js
Normal file
52
packages/phoenix/packages/parsely/streams.js
Normal file
@ -0,0 +1,52 @@
|
||||
/**
|
||||
* Base class for input streams.
|
||||
* Defines which methods are expected for any stream implementations.
|
||||
*/
|
||||
export class ParserStream {
|
||||
value_at (index) { throw new Error(`${this.constructor.name}.value_at() not implemented`); }
|
||||
look () { throw new Error(`${this.constructor.name}.look() not implemented`); }
|
||||
next () { throw new Error(`${this.constructor.name}.next() not implemented`); }
|
||||
fork () { throw new Error(`${this.constructor.name}.fork() not implemented`); }
|
||||
join () { throw new Error(`${this.constructor.name}.join() not implemented`); }
|
||||
|
||||
is_eof () {
|
||||
return this.look().done;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* ParserStream that takes a string, and processes it character by character.
|
||||
*/
|
||||
export class StringStream extends ParserStream {
|
||||
constructor (str, startIndex = 0) {
|
||||
super();
|
||||
this.str = str;
|
||||
this.i = startIndex;
|
||||
}
|
||||
|
||||
value_at (index) {
|
||||
if ( index >= this.str.length ) {
|
||||
return { done: true, value: undefined };
|
||||
}
|
||||
|
||||
return { done: false, value: this.str[index] };
|
||||
}
|
||||
|
||||
look () {
|
||||
return this.value_at(this.i);
|
||||
}
|
||||
|
||||
next () {
|
||||
const result = this.value_at(this.i);
|
||||
this.i++;
|
||||
return result;
|
||||
}
|
||||
|
||||
fork () {
|
||||
return new StringStream(this.str, this.i);
|
||||
}
|
||||
|
||||
join (forked) {
|
||||
this.i = forked.i;
|
||||
}
|
||||
}
|
@ -1,5 +1,6 @@
|
||||
import { GrammarContext, standard_parsers } from '../../../packages/newparser/exports.js';
|
||||
import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/newparser/lib.js';
|
||||
import { GrammarContext, standard_parsers } from '../../../packages/parsely/exports.js';
|
||||
import { Parser, UNRECOGNIZED, VALUE } from '../../../packages/parsely/parser.js';
|
||||
import { StringStream } from '../../../packages/parsely/streams.js';
|
||||
|
||||
class NumberParser extends Parser {
|
||||
static data = {
|
||||
@ -163,39 +164,6 @@ class StringParser extends Parser {
|
||||
}
|
||||
}
|
||||
|
||||
class StringStream {
|
||||
constructor (str, startIndex = 0) {
|
||||
this.str = str;
|
||||
this.i = startIndex;
|
||||
}
|
||||
|
||||
value_at (index) {
|
||||
if ( index >= this.str.length ) {
|
||||
return { done: true, value: undefined };
|
||||
}
|
||||
|
||||
return { done: false, value: this.str[index] };
|
||||
}
|
||||
|
||||
look () {
|
||||
return this.value_at(this.i);
|
||||
}
|
||||
|
||||
next () {
|
||||
const result = this.value_at(this.i);
|
||||
this.i++;
|
||||
return result;
|
||||
}
|
||||
|
||||
fork () {
|
||||
return new StringStream(this.str, this.i);
|
||||
}
|
||||
|
||||
join (forked) {
|
||||
this.i = forked.i;
|
||||
}
|
||||
}
|
||||
|
||||
export default {
|
||||
name: 'concept-parser',
|
||||
args: {
|
||||
@ -204,15 +172,13 @@ export default {
|
||||
},
|
||||
execute: async ctx => {
|
||||
const { in_, out, err } = ctx.externs;
|
||||
await out.write("STARTING CONCEPT PARSER\n");
|
||||
const grammar_context = new GrammarContext(standard_parsers());
|
||||
await out.write("Constructed a grammar context\n");
|
||||
|
||||
const parser = grammar_context.define_parser({
|
||||
element: a => a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
a.symbol('value'),
|
||||
a.symbol('whitespace'),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
),
|
||||
value: a => a.firstMatch(
|
||||
a.symbol('object'),
|
||||
@ -225,37 +191,33 @@ export default {
|
||||
),
|
||||
array: a => a.sequence(
|
||||
a.literal('['),
|
||||
a.symbol('whitespace'),
|
||||
a.optional(
|
||||
a.firstMatch(
|
||||
a.repeat(
|
||||
a.symbol('element'),
|
||||
a.literal(','),
|
||||
{ trailing: true },
|
||||
{ trailing: false },
|
||||
),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
),
|
||||
a.symbol('whitespace'),
|
||||
a.literal(']'),
|
||||
),
|
||||
member: a => a.sequence(
|
||||
a.symbol('whitespace'),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
a.symbol('string'),
|
||||
a.symbol('whitespace'),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
a.literal(':'),
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('value'),
|
||||
a.symbol('whitespace'),
|
||||
a.symbol('element'),
|
||||
),
|
||||
object: a => a.sequence(
|
||||
a.literal('{'),
|
||||
a.symbol('whitespace'),
|
||||
a.optional(
|
||||
a.firstMatch(
|
||||
a.repeat(
|
||||
a.symbol('member'),
|
||||
a.literal(','),
|
||||
{ trailing: true },
|
||||
{ trailing: false },
|
||||
),
|
||||
a.optional(a.symbol('whitespace')),
|
||||
),
|
||||
a.symbol('whitespace'),
|
||||
a.literal('}'),
|
||||
),
|
||||
true: a => a.literal('true'),
|
||||
@ -263,37 +225,31 @@ export default {
|
||||
null: a => a.literal('null'),
|
||||
number: a => new NumberParser(),
|
||||
string: a => new StringParser(),
|
||||
whitespace: a => a.optional(
|
||||
a.stringOf(' \r\n\t'.split('')),
|
||||
),
|
||||
whitespace: a => a.stringOf(c => ' \r\n\t'.includes(c)),
|
||||
}, {
|
||||
element: it => it[0].value,
|
||||
element: it => it.filter(it => it.$ === 'value')[0].value,
|
||||
value: it => it,
|
||||
array: it => {
|
||||
// A parsed array contains 3 values: `[`, the entries array, and `]`, so we only care about index 1.
|
||||
// If it's less than 3, there were no entries.
|
||||
if (it.length < 3) return [];
|
||||
return (it[1].value || [])
|
||||
.filter(it => it.$ !== 'literal')
|
||||
.filter(it => it.$ === 'element')
|
||||
.map(it => it.value);
|
||||
},
|
||||
member: it => {
|
||||
// A parsed member contains 3 values: a name, `:`, and a value.
|
||||
const [ name_part, colon, value_part ] = it;
|
||||
const [ name_part, value_part ] = it.filter(it => it.$ === 'string' || it.$ === 'element');
|
||||
return { name: name_part.value, value: value_part.value };
|
||||
},
|
||||
object: it => {
|
||||
console.log('OBJECT!!!!');
|
||||
console.log(it[1]);
|
||||
// A parsed object contains 3 values: `{`, the members array, and `}`, so we only care about index 1.
|
||||
// If it's less than 3, there were no members.
|
||||
if (it.length < 3) return {};
|
||||
const result = {};
|
||||
// FIXME: This is all wrong!!!
|
||||
(it[1].value || [])
|
||||
.filter(it => it.$ === 'member')
|
||||
.forEach(it => {
|
||||
result[it.name] = it.value;
|
||||
result[it.value.name] = it.value.value;
|
||||
});
|
||||
return result;
|
||||
},
|
||||
@ -305,7 +261,6 @@ export default {
|
||||
whitespace: _ => {},
|
||||
});
|
||||
|
||||
// TODO: What do we want our streams to be like?
|
||||
const input = ctx.locals.positionals.shift();
|
||||
const stream = new StringStream(input);
|
||||
try {
|
||||
@ -317,4 +272,4 @@ export default {
|
||||
await err.write(e.stack + '\n');
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user