Tweak format for example grammars

This commit is contained in:
Max Brunsfeld 2014-03-28 13:51:32 -07:00
parent 324f55f1ce
commit 13c4e6e648
23 changed files with 1616 additions and 1537 deletions

View file

@ -1,35 +1,32 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace examples {
using namespace tree_sitter::rules;
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
Grammar arithmetic() {
return Grammar({
{ "expression", choice({
seq({
sym("term"),
sym("plus"),
sym("term") }),
sym("term") }) },
{ "term", choice({
seq({
sym("factor"),
sym("times"),
sym("factor") }),
sym("factor") }) },
{ "factor", choice({
sym("variable"),
sym("number"),
seq({
str("("),
err(sym("expression")),
str(")") }) }) },
{ "plus", str("+") },
{ "times", str("*") },
{ "number", pattern("\\d+") },
{ "variable", pattern("[a-zA-Z]+") },
});
}
}
extern const Grammar arithmetic({
{ "expression", choice({
seq({
sym("term"),
sym("plus"),
sym("term") }),
sym("term") }) },
{ "term", choice({
seq({
sym("factor"),
sym("times"),
sym("factor") }),
sym("factor") }) },
{ "factor", choice({
sym("variable"),
sym("number"),
seq({
str("("),
err(sym("expression")),
str(")") }) }) },
{ "plus", str("+") },
{ "times", str("*") },
{ "number", pattern("\\d+") },
{ "variable", pattern("[a-zA-Z]+") },
});
}

View file

@ -1,130 +1,129 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace examples {
using namespace tree_sitter::rules;
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
static rule_ptr optional(const rule_ptr &rule) {
return choice({ rule, blank() });
}
static rule_ptr comma_sep(const rule_ptr &element) {
return choice({
seq({
element,
repeat(seq({ str(","), element })),
}),
blank(),
});
}
Grammar javascript() {
return Grammar({
{ "program", repeat(sym("statement")) },
{ "statement", choice({
sym("statement_block"),
sym("if_statement"),
sym("for_statement"),
sym("var_declaration"),
sym("expression_statement") }) },
{ "expression_statement", seq({
err(sym("expression")),
sym("_terminator") }) },
{ "for_statement", seq({
sym("_for"),
str("("),
choice({
sym("var_declaration"),
sym("expression_statement"),
}),
sym("expression_statement"),
err(sym("expression")),
str(")"),
sym("statement") }) },
{ "if_statement", seq({
sym("_if"),
str("("),
err(sym("expression")),
str(")"),
sym("statement"),
optional(seq({
sym("_else"),
sym("statement") })) }) },
{ "statement_block", seq({
str("{"),
err(repeat(sym("statement"))),
str("}") }) },
{ "var_declaration", seq({
sym("_var"),
choice({
sym("assignment"),
sym("identifier") }),
sym("_terminator") }) },
{ "assignment", seq({
sym("identifier"),
str("="),
sym("expression") })},
{ "expression", choice({
sym("function_expression"),
sym("function_call"),
sym("property_access"),
sym("assignment"),
sym("literal"),
sym("identifier") }) },
{ "function_expression", seq({
sym("_function"),
optional(sym("identifier")),
sym("formal_parameters"),
sym("statement_block") }) },
{ "function_call", seq({
sym("expression"),
str("("),
comma_sep(sym("expression")),
str(")") }) },
{ "property_access", seq({
sym("expression"),
str("."),
sym("identifier") }) },
{ "formal_parameters", seq({
str("("),
comma_sep(sym("identifier")),
str(")") })},
{ "literal", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", seq({
str("{"),
comma_sep(err(seq({
choice({ sym("string"), sym("identifier") }),
str(":"),
sym("expression") }))),
str("}"), }) },
{ "array", seq({
str("["),
comma_sep(err(sym("expression"))),
str("]") }) },
{ "_terminator", choice({
str(";"),
str("\n") }) },
{ "_var", str("var") },
{ "_for", str("for") },
{ "_if", str("if") },
{ "_function", str("function") },
{ "_else", str("else") },
{ "null", str("null") },
{ "true", str("true") },
{ "false", str("false") },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "identifier", pattern("[\\w_$]+") },
{ "number", pattern("\\d+(.\\d+)?") },
});
}
static rule_ptr optional(const rule_ptr &rule) {
return choice({ rule, blank() });
}
static rule_ptr comma_sep(const rule_ptr &element) {
return choice({
seq({ element, repeat(seq({ str(","), element })) }),
blank(),
});
}
extern const Grammar javascript({
{ "program", repeat(sym("statement")) },
{ "statement", choice({
sym("statement_block"),
sym("if_statement"),
sym("for_statement"),
sym("var_declaration"),
sym("expression_statement") }) },
// Statements
{ "statement_block", seq({
str("{"),
err(repeat(sym("statement"))),
str("}") }) },
{ "for_statement", seq({
sym("_for"),
str("("),
choice({
sym("var_declaration"),
sym("expression_statement"),
}),
sym("expression_statement"),
err(sym("expression")),
str(")"),
sym("statement") }) },
{ "if_statement", seq({
sym("_if"),
str("("),
err(sym("expression")),
str(")"),
sym("statement"),
optional(seq({
sym("_else"),
sym("statement") })) }) },
{ "var_declaration", seq({
sym("_var"),
choice({
sym("assignment"),
sym("identifier") }),
sym("_terminator") }) },
{ "expression_statement", seq({
err(sym("expression")),
sym("_terminator") }) },
{ "expression", choice({
sym("function_expression"),
sym("function_call"),
sym("property_access"),
sym("assignment"),
sym("literal"),
sym("identifier") }) },
// Expressions
{ "assignment", seq({
sym("identifier"),
str("="),
sym("expression") })},
{ "function_expression", seq({
sym("_function"),
optional(sym("identifier")),
sym("formal_parameters"),
sym("statement_block") }) },
{ "function_call", seq({
sym("expression"),
str("("),
comma_sep(sym("expression")),
str(")") }) },
{ "property_access", seq({
sym("expression"),
str("."),
sym("identifier") }) },
{ "formal_parameters", seq({
str("("),
comma_sep(sym("identifier")),
str(")") })},
{ "literal", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
// Literals
{ "object", seq({
str("{"),
comma_sep(err(seq({
choice({ sym("string"), sym("identifier") }),
str(":"),
sym("expression") }))),
str("}"), }) },
{ "array", seq({
str("["),
comma_sep(err(sym("expression"))),
str("]") }) },
// Keywords
{ "_terminator", choice({ str(";"), str("\n") }) },
{ "_var", str("var") },
{ "_for", str("for") },
{ "_if", str("if") },
{ "_function", str("function") },
{ "_else", str("else") },
{ "null", str("null") },
{ "true", str("true") },
{ "false", str("false") },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "identifier", pattern("[\\w_$]+") },
{ "number", pattern("\\d+(.\\d+)?") },
});
}

View file

@ -1,46 +1,40 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace examples {
using namespace tree_sitter::rules;
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
static rule_ptr comma_sep(const rule_ptr &rule) {
return choice({
seq({
rule,
repeat(seq({ str(","), rule })),
}),
blank(),
});
}
Grammar json() {
return Grammar({
{ "value", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", seq({
str("{"),
comma_sep(err(seq({
sym("string"),
str(":"),
sym("value") }))),
str("}"), }) },
{ "array", seq({
str("["),
comma_sep(err(sym("value"))),
str("]"), }) },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "number", pattern("\\d+(.\\d+)?") },
{ "null", str("null") },
{ "true", str("true") },
{ "false", str("false") },
});
}
static rule_ptr comma_sep(const rule_ptr &rule) {
return choice({
seq({ rule, repeat(seq({ str(","), rule })) }),
blank(),
});
}
extern const Grammar json({
{ "value", choice({
sym("object"),
sym("array"),
sym("string"),
sym("number"),
sym("true"),
sym("false"),
sym("null"), }) },
{ "object", seq({
str("{"),
comma_sep(err(seq({
sym("string"),
str(":"),
sym("value") }))),
str("}"), }) },
{ "array", seq({
str("["),
comma_sep(err(sym("value"))),
str("]"), }) },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "number", pattern("\\d+(.\\d+)?") },
{ "null", str("null") },
{ "true", str("true") },
{ "false", str("false") },
});
}

File diff suppressed because it is too large Load diff

View file

@ -39,10 +39,10 @@ static const size_t ts_state_count
#define SYMBOL_NAMES \
static const char *ts_symbol_names[ts_symbol_count]
#define HIDDEN_SYMBOLS \
static const int hidden_symbol_flags[ts_symbol_count]
#define LEX_STATES \
static state_id ts_lex_states[ts_state_count]
@ -55,16 +55,16 @@ ts_lexer_skip_whitespace(lexer); \
if (!ts_lexer_lookahead_char(lexer)) return ts_tree_make_leaf(ts_builtin_sym_end, 0, 0); \
next_state: \
lookahead = ts_lexer_lookahead_char(lexer);
#define ADVANCE(state_index) \
{ ts_lexer_advance(lexer); lex_state = state_index; goto next_state; }
#define ACCEPT_TOKEN(symbol) \
{ DEBUG_LEX("token: %s \n", ts_symbol_names[symbol]); return ts_lexer_build_node(lexer, symbol); }
#define LEX_ERROR() \
{ return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
#define LEX_PANIC() \
{ DEBUG_LEX("Lex error: unexpected state %d", LEX_STATE()); return NULL; }
@ -73,7 +73,7 @@ static const ts_parse_action ts_parse_actions[ts_state_count][ts_symbol_count]
#define EXPORT_PARSER(constructor_name) \
ts_parser constructor_name() { \
return (ts_parser){ \
return (ts_parser){ \
.parse_fn = ts_parse, \
.symbol_names = ts_symbol_names, \
.data = ts_lr_parser_make(ts_symbol_count, (const ts_parse_action *)ts_parse_actions, ts_lex_states, hidden_symbol_flags), \
@ -199,7 +199,7 @@ typedef struct {
};
} data;
} ts_parse_action;
/*
* Forward declarations
@ -232,7 +232,7 @@ static ts_lr_parser * ts_lr_parser_make(size_t symbol_count, const ts_parse_acti
result->hidden_symbol_flags = hidden_symbol_flags;
return result;
}
static const ts_parse_action * ts_lr_parser_table_actions(ts_lr_parser *parser, state_id state) {
return parser->parse_table + (state * parser->symbol_count);
}

View file

@ -68,7 +68,7 @@ describe("computing FIRST sets", []() {
})));
});
});
describe("when there are left-recursive rules", [&]() {
it("terminates", [&]() {
Grammar grammar({
@ -77,7 +77,7 @@ describe("computing FIRST sets", []() {
sym("y"),
}) },
});
AssertThat(first_set(sym("expression"), grammar), Equals(set<Symbol>({
Symbol("y")
})));

View file

@ -31,7 +31,7 @@ describe("computing closures of item sets", []() {
ParseItemSet item_set = item_set_closure(ParseItemSet({
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), 0, Symbol("__END__"))
}), grammar);
AssertThat(item_set, Equals(ParseItemSet({
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("__END__")),
ParseItem(Symbol("F"), grammar.rule(Symbol("F")), 0, Symbol("+")),

View file

@ -15,7 +15,7 @@ describe("checking if rules can be blank", [&]() {
AssertThat(rule_can_be_blank(str("x")), IsFalse());
AssertThat(rule_can_be_blank(pattern("x")), IsFalse());
});
it("returns true for blanks", [&]() {
AssertThat(rule_can_be_blank(blank()), IsTrue());
});
@ -23,7 +23,7 @@ describe("checking if rules can be blank", [&]() {
it("returns true for repeats", [&]() {
AssertThat(rule_can_be_blank(repeat(str("x"))), IsTrue());
});
it("returns true for choices iff one or more sides can be blank", [&]() {
rule = choice({ sym("x"), blank() });
AssertThat(rule_can_be_blank(rule), IsTrue());
@ -34,7 +34,7 @@ describe("checking if rules can be blank", [&]() {
rule = choice({ sym("x"), sym("y") });
AssertThat(rule_can_be_blank(rule), IsFalse());
});
it("returns true for sequences iff both sides can be blank", [&]() {
rule = seq({ blank(), str("x") });
AssertThat(rule_can_be_blank(rule), IsFalse());
@ -45,7 +45,7 @@ describe("checking if rules can be blank", [&]() {
rule = seq({ blank(), choice({ sym("x"), blank() }) });
AssertThat(rule_can_be_blank(rule), IsTrue());
});
describe("checking recursively (by expanding non-terminals)", [&]() {
PreparedGrammar grammar({
{ "A", choice({
@ -55,12 +55,12 @@ describe("checking if rules can be blank", [&]() {
seq({ sym("B"), sym("y") }),
sym("z") }) },
}, {});
it("terminates for left-recursive rules that can be blank", [&]() {
rule = sym("A");
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
});
it("terminates for left-recursive rules that can't be blank", [&]() {
rule = sym("B");
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());

View file

@ -7,12 +7,10 @@ static string src_dir() {
return dir;
}
namespace tree_sitter {
namespace examples {
Grammar arithmetic();
Grammar javascript();
Grammar json();
}
namespace tree_sitter_examples {
extern const Grammar arithmetic;
extern const Grammar javascript;
extern const Grammar json;
}
START_TEST
@ -20,7 +18,7 @@ START_TEST
describe("compiling the example grammars", []() {
string example_parser_dir = src_dir() + "/examples/parsers/";
auto compile_grammar = [&](Grammar grammar, string language) {
auto compile_grammar = [&](const Grammar &grammar, string language) {
it(("compiles the " + language + " grammar").c_str(), [&]() {
ofstream file(example_parser_dir + language + ".c");
file << compile(grammar, language);
@ -28,9 +26,9 @@ describe("compiling the example grammars", []() {
});
};
compile_grammar(examples::arithmetic(), "arithmetic");
compile_grammar(examples::json(), "json");
compile_grammar(examples::javascript(), "javascript");
compile_grammar(tree_sitter_examples::arithmetic, "arithmetic");
compile_grammar(tree_sitter_examples::json, "json");
compile_grammar(tree_sitter_examples::javascript, "javascript");
});
END_TEST

View file

@ -46,7 +46,7 @@ namespace std {
}
return stream << ">";
}
template<typename TKey, typename TValue>
inline std::ostream& operator<<(std::ostream &stream, const std::map<TKey, TValue> &map) {
stream << std::string("#<map: ");
@ -60,7 +60,7 @@ namespace std {
}
return stream << ">";
}
template<typename TKey, typename TValue>
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_map<TKey, TValue> &map) {
stream << std::string("#<map: ");

View file

@ -14,7 +14,7 @@ describe("assigning user-visible names to symbols", [&]() {
{ "some_generated_string_name", str("the-string") },
{ "some_generated_pattern_name", pattern("the-pattern") },
});
describe("for symbols that are not in the lexical grammar (syntactic rules)", [&]() {
it("uses the symbol's normal name", [&]() {
auto symbol = Symbol("some_syntactic_symbol");
@ -23,7 +23,7 @@ describe("assigning user-visible names to symbols", [&]() {
})));
});
});
describe("for symbols that are in the lexical grammar", [&]() {
it("uses symbols' normal names when they are given by the user", [&]() {
auto symbol = Symbol("some_given_name");
@ -31,14 +31,14 @@ describe("assigning user-visible names to symbols", [&]() {
{ symbol, "some_given_name" }
})));
});
it("assigns names to string rules based on their string value", [&]() {
auto symbol = Symbol("some_generated_string_name", rules::SymbolTypeAuxiliary);
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({
{ symbol, "'the-string'" }
})));
});
it("assigns names to pattern rules based on their pattern value", [&]() {
auto symbol = Symbol("some_generated_pattern_name", rules::SymbolTypeAuxiliary);
AssertThat(name_symbols::name_symbols({ symbol }, lexical_grammar), Equals(map<Symbol, string>({

View file

@ -77,7 +77,7 @@ var x = {
x.theMethod(5, 6);
---
(program
(program
(var_declaration (assignment
(identifier)
(object (identifier) (function_expression

View file

@ -73,7 +73,7 @@ namespace tree_sitter {
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (ParseItem item : item_set) {
if (item.is_done()) {
ParseAction action = (item.lhs == rules::START) ?
ParseAction action = (item.lhs == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbol_count);
parse_table.add_action(state_id, item.lookahead_sym, action);
@ -153,7 +153,7 @@ namespace tree_sitter {
pair<ParseTable, LexTable> build() {
auto start_symbol = make_shared<Symbol>(grammar.start_rule_name());
ParseItem item(rules::START, start_symbol, {}, rules::END_OF_INPUT);
ParseItem item(rules::START(), start_symbol, {}, rules::END_OF_INPUT());
ParseItemSet item_set = item_set_closure(ParseItemSet({ item }), grammar);
add_parse_state(item_set);
add_error_lex_state();

View file

@ -26,7 +26,7 @@ namespace tree_sitter {
void visit(const Symbol *rule) {
if (visited_symbols.find(*rule) == visited_symbols.end()) {
visited_symbols.insert(*rule);
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(*rule));
} else {

View file

@ -10,7 +10,7 @@
namespace tree_sitter {
using std::set;
namespace build_tables {
class CanBeBlank : public rules::RuleFn<bool> {
protected:
@ -37,7 +37,7 @@ namespace tree_sitter {
class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar grammar;
set<rules::Symbol> visited_symbols;
set<rules::Symbol> visited_symbols;
using CanBeBlank::visit;
public:

View file

@ -60,7 +60,7 @@ namespace tree_sitter {
lex_table(lex_table),
symbol_names(symbol_names)
{}
string code() {
return join({
includes(),
@ -74,11 +74,11 @@ namespace tree_sitter {
parser_export(),
}, "\n\n") + "\n";
}
private:
string symbol_id(rules::Symbol symbol) {
if (symbol.is_built_in()) {
return (symbol == rules::ERROR) ?
return (symbol == rules::ERROR()) ?
"ts_builtin_sym_error" :
"ts_builtin_sym_end";
} else if (symbol.is_auxiliary()) {
@ -183,15 +183,15 @@ namespace tree_sitter {
string symbol_names_list() {
set<rules::Symbol> symbols(parse_table.symbols);
symbols.insert(rules::Symbol("end", rules::SymbolTypeBuiltIn));
symbols.insert(rules::Symbol("error", rules::SymbolTypeBuiltIn));
symbols.insert(rules::END_OF_INPUT());
symbols.insert(rules::ERROR());
string result = "SYMBOL_NAMES = {\n";
for (auto symbol : parse_table.symbols)
result += indent("[" + symbol_id(symbol) + "] = \"" + symbol_names.find(symbol)->second) + "\",\n";
return result + "};";
}
string hidden_symbols_list() {
string result = "HIDDEN_SYMBOLS = {";
for (auto &symbol : parse_table.symbols)

View file

@ -4,7 +4,7 @@
namespace tree_sitter {
using std::string;
using std::vector;
namespace generate_code {
string join(vector<string> lines, string separator) {
string result;
@ -26,7 +26,7 @@ namespace tree_sitter {
util::str_replace(&input, "\n", "\n" + tab);
return tab + input;
}
string character_code(char character) {
switch (character) {
case '\0':

View file

@ -11,13 +11,13 @@ namespace tree_sitter {
using std::map;
using std::set;
using std::string;
class TokenName : public rules::RuleFn<string> {
protected:
virtual void visit(const rules::Pattern *rule) {
value = "/" + util::escape_string(rule->value) + "/";
}
virtual void visit(const rules::String *rule) {
value = "'" + util::escape_string(rule->value) + "'";
}

View file

@ -8,7 +8,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace name_symbols {
std::map<rules::Symbol, std::string> name_symbols(const std::set<rules::Symbol> &symbols,
const PreparedGrammar &lexical_grammar);

View file

@ -2,8 +2,8 @@
namespace tree_sitter {
namespace rules {
const Symbol ERROR("error", SymbolTypeBuiltIn);
const Symbol START("start", SymbolTypeBuiltIn);
const Symbol END_OF_INPUT("end", SymbolTypeBuiltIn);
Symbol ERROR() { return Symbol("error", SymbolTypeBuiltIn); }
Symbol START() { return Symbol("start", SymbolTypeBuiltIn); }
Symbol END_OF_INPUT() { return Symbol("end", SymbolTypeBuiltIn); }
}
}

View file

@ -5,9 +5,9 @@
namespace tree_sitter {
namespace rules {
extern const Symbol ERROR;
extern const Symbol START;
extern const Symbol END_OF_INPUT;
Symbol ERROR();
Symbol START();
Symbol END_OF_INPUT();
}
}

View file

@ -46,7 +46,7 @@ namespace tree_sitter {
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR.copy() });
return choice({ rule, ERROR().copy() });
}
}
}

View file

@ -2,7 +2,7 @@
namespace tree_sitter {
using std::string;
namespace util {
void str_replace(string *input, const string &search, const string &replace) {
size_t pos = 0;
@ -14,7 +14,7 @@ namespace tree_sitter {
pos += replace.length();
}
}
string escape_string(string input) {
str_replace(&input, "\"", "\\\"");
str_replace(&input, "\n", "\\n");