Fix infinite loops during table generation for left-recursive rules

Add function calls for arbitrary expressions and dot property access to javascript grammar
This commit is contained in:
Max Brunsfeld 2014-03-26 22:43:08 -07:00
parent 3f770ff3c3
commit 820b6f4020
7 changed files with 2824 additions and 2292 deletions

View file

@ -21,9 +21,6 @@ namespace tree_sitter {
Grammar javascript() {
return Grammar({
{ "program", repeat(sym("statement")) },
{ "_terminator", choice({
str(";"),
str("\n") }) },
{ "statement", choice({
sym("statement_block"),
sym("if_statement"),
@ -50,18 +47,23 @@ namespace tree_sitter {
{ "expression", choice({
sym("function_expression"),
sym("function_call"),
sym("property_access"),
sym("literal"),
sym("identifier") }) },
{ "function_expression", seq({
sym("_function"),
sym("identifier"),
optional(sym("identifier")),
sym("formal_parameters"),
sym("statement_block") }) },
{ "function_call", seq({
sym("identifier"),
sym("expression"),
str("("),
comma_sep(sym("expression")),
str(")") }) },
{ "property_access", seq({
sym("expression"),
str("."),
sym("identifier") }) },
{ "formal_parameters", seq({
str("("),
comma_sep(sym("identifier")),
@ -77,7 +79,7 @@ namespace tree_sitter {
{ "object", seq({
str("{"),
comma_sep(err(seq({
sym("string"),
choice({ sym("string"), sym("identifier") }),
str(":"),
sym("expression") }))),
str("}"), }) },
@ -86,6 +88,9 @@ namespace tree_sitter {
comma_sep(err(sym("expression"))),
str("]") }) },
{ "_terminator", choice({
str(";"),
str("\n") }) },
{ "_var", str("var") },
{ "_if", str("if") },
{ "_function", str("function") },

File diff suppressed because it is too large Load diff

View file

@ -68,6 +68,21 @@ describe("computing FIRST sets", []() {
})));
});
});
describe("when there are left-recursive rules", [&]() {
it("terminates", [&]() {
Grammar grammar({
{ "expression", choice({
seq({ sym("expression"), sym("x") }),
sym("y"),
}) },
});
AssertThat(first_set(sym("expression"), grammar), Equals(set<Symbol>({
Symbol("y")
})));
});
});
});
END_TEST
END_TEST

View file

@ -1,5 +1,6 @@
#include "compiler_spec_helper.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/prepared_grammar.h"
using namespace rules;
using build_tables::rule_can_be_blank;
@ -7,21 +8,63 @@ using build_tables::rule_can_be_blank;
START_TEST
describe("checking if rules can be blank", [&]() {
it("handles sequences", [&]() {
rule_ptr rule = seq({
choice({
str("x"),
blank(),
}),
str("y"),
});
rule_ptr rule;
AssertThat(rule_can_be_blank(rule), Equals(false));
it("returns false for basic rules", [&]() {
AssertThat(rule_can_be_blank(sym("x")), IsFalse());
AssertThat(rule_can_be_blank(str("x")), IsFalse());
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
});
it("returns true for blanks", [&]() {
AssertThat(rule_can_be_blank(blank()), IsTrue());
});
it("returns true for repeats", [&]() {
rule_ptr rule = repeat(str("x"));
AssertThat(rule_can_be_blank(rule), Equals(true));
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
});
it("returns true for choices iff one or more sides can be blank", [&]() {
rule = choice({ sym("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ blank(), sym("x") });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ sym("x"), sym("y") });
AssertThat(rule_can_be_blank(rule), IsFalse());
});
it("returns true for sequences iff both sides can be blank", [&]() {
rule = seq({ blank(), str("x") });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ str("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ blank(), choice({ sym("x"), blank() }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({
{ "A", choice({
seq({ sym("A"), sym("x") }),
blank() }) },
{ "B", choice({
seq({ sym("B"), sym("y") }),
sym("z") }) },
}, {});
it("terminates for left-recursive rules that can be blank", [&]() {
rule = sym("A");
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
});
it("terminates for left-recursive rules that can't be blank", [&]() {
rule = sym("B");
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
});
});
});

View file

@ -56,15 +56,21 @@ else {
==========================================
parses named functions
==========================================
function doThis(argA, argB) {
var x = argA;
var x = {
theMethod: function(argA, argB) {
var x = argA;
}
};
doThis(5, 6);
x.theMethod(5, 6);
---
(program
(statement (function_expression
(statement (assignment
(identifier)
(formal_parameters (identifier) (identifier))
(statement_block (statement (assignment (identifier) (identifier))))))
(statement (function_call (identifier) (number) (number))))
(object (identifier) (function_expression
(formal_parameters (identifier) (identifier))
(statement_block (statement (assignment (identifier) (identifier))))))))
(statement (function_call
(property_access (identifier) (identifier))
(number) (number))))

View file

@ -19,14 +19,19 @@ namespace tree_sitter {
class FirstSet : public rules::RuleFn<set<Symbol>> {
const PreparedGrammar grammar;
set<Symbol> visited_symbols;
public:
explicit FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {}
void visit(const Symbol *rule) {
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(*rule));
} else {
value = set<Symbol>({ *rule });
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(*rule));
} else {
value = set<Symbol>({ *rule });
}
}
}

View file

@ -1,4 +1,5 @@
#include "compiler/build_tables/rule_can_be_blank.h"
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h"
#include "compiler/rules/symbol.h"
@ -8,6 +9,8 @@
#include "compiler/rules/blank.h"
namespace tree_sitter {
using std::set;
namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> {
protected:
@ -34,13 +37,17 @@ namespace tree_sitter {
class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
public:
explicit CanBeBlankRecursive(const PreparedGrammar &grammar) : grammar(grammar) {}
void visit(const rules::Symbol *rule) {
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule));
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule));
}
}
};