Fix infinite loops during table generation for left-recursive rules

Add function calls for arbitrary expressions and dot property access to javascript grammar
This commit is contained in:
Max Brunsfeld 2014-03-26 22:43:08 -07:00
parent 3f770ff3c3
commit 820b6f4020
7 changed files with 2824 additions and 2292 deletions

View file

@ -21,9 +21,6 @@ namespace tree_sitter {
Grammar javascript() { Grammar javascript() {
return Grammar({ return Grammar({
{ "program", repeat(sym("statement")) }, { "program", repeat(sym("statement")) },
{ "_terminator", choice({
str(";"),
str("\n") }) },
{ "statement", choice({ { "statement", choice({
sym("statement_block"), sym("statement_block"),
sym("if_statement"), sym("if_statement"),
@ -50,18 +47,23 @@ namespace tree_sitter {
{ "expression", choice({ { "expression", choice({
sym("function_expression"), sym("function_expression"),
sym("function_call"), sym("function_call"),
sym("property_access"),
sym("literal"), sym("literal"),
sym("identifier") }) }, sym("identifier") }) },
{ "function_expression", seq({ { "function_expression", seq({
sym("_function"), sym("_function"),
sym("identifier"), optional(sym("identifier")),
sym("formal_parameters"), sym("formal_parameters"),
sym("statement_block") }) }, sym("statement_block") }) },
{ "function_call", seq({ { "function_call", seq({
sym("identifier"), sym("expression"),
str("("), str("("),
comma_sep(sym("expression")), comma_sep(sym("expression")),
str(")") }) }, str(")") }) },
{ "property_access", seq({
sym("expression"),
str("."),
sym("identifier") }) },
{ "formal_parameters", seq({ { "formal_parameters", seq({
str("("), str("("),
comma_sep(sym("identifier")), comma_sep(sym("identifier")),
@ -77,7 +79,7 @@ namespace tree_sitter {
{ "object", seq({ { "object", seq({
str("{"), str("{"),
comma_sep(err(seq({ comma_sep(err(seq({
sym("string"), choice({ sym("string"), sym("identifier") }),
str(":"), str(":"),
sym("expression") }))), sym("expression") }))),
str("}"), }) }, str("}"), }) },
@ -86,6 +88,9 @@ namespace tree_sitter {
comma_sep(err(sym("expression"))), comma_sep(err(sym("expression"))),
str("]") }) }, str("]") }) },
{ "_terminator", choice({
str(";"),
str("\n") }) },
{ "_var", str("var") }, { "_var", str("var") },
{ "_if", str("if") }, { "_if", str("if") },
{ "_function", str("function") }, { "_function", str("function") },

File diff suppressed because it is too large Load diff

View file

@ -68,6 +68,21 @@ describe("computing FIRST sets", []() {
}))); })));
}); });
}); });
describe("when there are left-recursive rules", [&]() {
it("terminates", [&]() {
Grammar grammar({
{ "expression", choice({
seq({ sym("expression"), sym("x") }),
sym("y"),
}) },
});
AssertThat(first_set(sym("expression"), grammar), Equals(set<Symbol>({
Symbol("y")
})));
});
});
}); });
END_TEST END_TEST

View file

@ -1,5 +1,6 @@
#include "compiler_spec_helper.h" #include "compiler_spec_helper.h"
#include "compiler/build_tables/rule_can_be_blank.h" #include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/prepared_grammar.h"
using namespace rules; using namespace rules;
using build_tables::rule_can_be_blank; using build_tables::rule_can_be_blank;
@ -7,21 +8,63 @@ using build_tables::rule_can_be_blank;
START_TEST START_TEST
describe("checking if rules can be blank", [&]() { describe("checking if rules can be blank", [&]() {
it("handles sequences", [&]() { rule_ptr rule;
rule_ptr rule = seq({
choice({
str("x"),
blank(),
}),
str("y"),
});
AssertThat(rule_can_be_blank(rule), Equals(false)); it("returns false for basic rules", [&]() {
AssertThat(rule_can_be_blank(sym("x")), IsFalse());
AssertThat(rule_can_be_blank(str("x")), IsFalse());
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
});
it("returns true for blanks", [&]() {
AssertThat(rule_can_be_blank(blank()), IsTrue());
}); });
it("returns true for repeats", [&]() { it("returns true for repeats", [&]() {
rule_ptr rule = repeat(str("x")); AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
AssertThat(rule_can_be_blank(rule), Equals(true)); });
it("returns true for choices iff one or more sides can be blank", [&]() {
rule = choice({ sym("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ blank(), sym("x") });
AssertThat(rule_can_be_blank(rule), IsTrue());
rule = choice({ sym("x"), sym("y") });
AssertThat(rule_can_be_blank(rule), IsFalse());
});
it("returns true for sequences iff both sides can be blank", [&]() {
rule = seq({ blank(), str("x") });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ str("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsFalse());
rule = seq({ blank(), choice({ sym("x"), blank() }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({
{ "A", choice({
seq({ sym("A"), sym("x") }),
blank() }) },
{ "B", choice({
seq({ sym("B"), sym("y") }),
sym("z") }) },
}, {});
it("terminates for left-recursive rules that can be blank", [&]() {
rule = sym("A");
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
});
it("terminates for left-recursive rules that can't be blank", [&]() {
rule = sym("B");
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
});
}); });
}); });

View file

@ -56,15 +56,21 @@ else {
========================================== ==========================================
parses named functions parses named functions
========================================== ==========================================
function doThis(argA, argB) { var x = {
var x = argA; theMethod: function(argA, argB) {
var x = argA;
}
}; };
doThis(5, 6); x.theMethod(5, 6);
--- ---
(program (program
(statement (function_expression (statement (assignment
(identifier) (identifier)
(formal_parameters (identifier) (identifier)) (object (identifier) (function_expression
(statement_block (statement (assignment (identifier) (identifier)))))) (formal_parameters (identifier) (identifier))
(statement (function_call (identifier) (number) (number)))) (statement_block (statement (assignment (identifier) (identifier))))))))
(statement (function_call
(property_access (identifier) (identifier))
(number) (number))))

View file

@ -19,14 +19,19 @@ namespace tree_sitter {
class FirstSet : public rules::RuleFn<set<Symbol>> { class FirstSet : public rules::RuleFn<set<Symbol>> {
const PreparedGrammar grammar; const PreparedGrammar grammar;
set<Symbol> visited_symbols;
public: public:
explicit FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {} explicit FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {}
void visit(const Symbol *rule) { void visit(const Symbol *rule) {
if (grammar.has_definition(*rule)) { if (visited_symbols.find(*rule) == visited_symbols.end()) {
value = apply(grammar.rule(*rule)); visited_symbols.insert(*rule);
} else {
value = set<Symbol>({ *rule }); if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(*rule));
} else {
value = set<Symbol>({ *rule });
}
} }
} }

View file

@ -1,4 +1,5 @@
#include "compiler/build_tables/rule_can_be_blank.h" #include "compiler/build_tables/rule_can_be_blank.h"
#include <set>
#include "tree_sitter/compiler.h" #include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h" #include "compiler/prepared_grammar.h"
#include "compiler/rules/symbol.h" #include "compiler/rules/symbol.h"
@ -8,6 +9,8 @@
#include "compiler/rules/blank.h" #include "compiler/rules/blank.h"
namespace tree_sitter { namespace tree_sitter {
using std::set;
namespace build_tables { namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> { class CanBeBlank : public rules::RuleFn<bool> {
protected: protected:
@ -34,13 +37,17 @@ namespace tree_sitter {
class CanBeBlankRecursive : public CanBeBlank { class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar grammar; const PreparedGrammar grammar;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit; using CanBeBlank::visit;
public: public:
explicit CanBeBlankRecursive(const PreparedGrammar &grammar) : grammar(grammar) {} explicit CanBeBlankRecursive(const PreparedGrammar &grammar) : grammar(grammar) {}
void visit(const rules::Symbol *rule) { void visit(const rules::Symbol *rule) {
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule)); if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule));
}
} }
}; };