Add script to trim whitespace

This commit is contained in:
Max Brunsfeld 2014-03-09 19:49:35 -07:00
parent e681a63552
commit 39aa0ccc91
66 changed files with 350 additions and 347 deletions

View file

@ -10,9 +10,9 @@ namespace tree_sitter {
namespace rules {
class Rule;
typedef std::shared_ptr<Rule> rule_ptr;
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &rules);
rule_ptr repeat(const rule_ptr &content);
@ -31,7 +31,7 @@ namespace tree_sitter {
const std::string start_rule_name;
const std::map<const std::string, const rules::rule_ptr> rules;
};
std::ostream& operator<<(std::ostream &stream, const Grammar &grammar);
std::string compile(const Grammar &grammar, std::string name);

3
script/trim.sh Executable file
View file

@ -0,0 +1,3 @@
#!/usr/bin/env bash
find src spec include -type f | xargs perl -pi -e 's/ +$//'

View file

@ -36,7 +36,7 @@ describe("building parse and lex tables", []() {
sym("right-paren")
}) }) }
}, {});
PreparedGrammar lex_grammar("", {
{ "plus", str("+") },
{ "variable", pattern("\\w+") },
@ -44,25 +44,25 @@ describe("building parse and lex tables", []() {
{ "left-paren", str("(") },
{ "right-paren", str(")") }
}, {});
ParseTable table;
LexTable lex_table;
before_each([&]() {
pair<ParseTable, LexTable> tables = build_tables::build_tables(grammar, lex_grammar);
table = tables.first;
lex_table = tables.second;
});
function<ParseState(size_t)> parse_state = [&](size_t index) {
return table.states[index];
};
function<LexState(size_t)> lex_state = [&](size_t parse_state_index) {
long index = table.states[parse_state_index].lex_state_id;
return lex_table.states[index];
};
it("has the right starting state", [&]() {
AssertThat(keys(parse_state(0).actions), Equals(set<Symbol>({
Symbol("expression"),
@ -71,7 +71,7 @@ describe("building parse and lex tables", []() {
Symbol("variable"),
Symbol("left-paren"),
})));
AssertThat(lex_state(0).expected_inputs(), Equals(set<CharacterSet>({
CharacterSet({ '(' }),
CharacterSet({ CharacterRange('0', '9') }),

View file

@ -14,7 +14,7 @@ describe("computing FIRST sets", []() {
describe("for a sequence AB", [&]() {
it("ignores B when A cannot be blank", [&]() {
auto rule = seq({ sym("x"), sym("y") });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
Symbol("x"),
})));
@ -26,13 +26,13 @@ describe("computing FIRST sets", []() {
sym("x"),
blank() }),
sym("y") });
AssertThat(first_set(rule, null_grammar), Equals(set<Symbol>({
Symbol("x"),
Symbol("y")
})));
});
it("includes FIRST(A's right hand side) when A is a non-terminal", [&]() {
auto rule = choice({
seq({
@ -40,7 +40,7 @@ describe("computing FIRST sets", []() {
sym("x"),
sym("A") }),
sym("A") });
Grammar grammar("A", {
{ "A", choice({
seq({
@ -49,19 +49,19 @@ describe("computing FIRST sets", []() {
sym("y") }),
sym("y") }) }
});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol("y")
})));
});
it("includes FIRST(B) when A is a non-terminal and its expansion can be blank", [&]() {
Grammar grammar("A", {{ "A", choice({ sym("x"), blank() }) }});
auto rule = seq({
sym("A"),
sym("y") });
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol("x"),
Symbol("y")

View file

@ -13,35 +13,35 @@ describe("computing FOLLOW sets", []() {
{ "A", sym("a") },
{ "B", sym("b") },
}, {});
it("all of the starting non-terminals for the item, and their following terminals", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
seq({ sym("B"), sym("z") }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
{ Symbol("B"), set<Symbol>({ Symbol("z") }) },
})));
});
it("does not include terminals at the beginning of the item", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), sym("y") }) }),
seq({ sym("x"), sym("y") }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("y") }) },
})));
});
it("includes the item's lookahead terminal if the rule after the non-terminal might be blank", [&]() {
ParseItem item(Symbol("C"), choice({
seq({ sym("A"), choice({ sym("x"), blank() }) }),
}), {}, Symbol("w"));
AssertThat(follow_sets(item, grammar), Equals(map<Symbol, set<Symbol>>({
{ Symbol("A"), set<Symbol>({ Symbol("x"), Symbol("w") }) },
})));

View file

@ -26,7 +26,7 @@ describe("computing closures of item sets", []() {
sym("v"),
sym("n") }) }
}, {});
it("computes the item set closure", [&]() {
ParseItemSet item_set = item_set_closure(ParseItemSet({
ParseItem(Symbol("E"), grammar.rule(Symbol("E")), {}, Symbol("__END__"))

View file

@ -15,7 +15,7 @@ describe("checking if rules can be blank", [&]() {
}),
str("y"),
});
AssertThat(rule_can_be_blank(rule), Equals(false));
});
});

View file

@ -16,7 +16,7 @@ public:
}
return true;
}
rule_map(const initializer_list<pair<const K, rule_ptr>> &list) : map<K, rule_ptr>(list) {}
};
@ -30,7 +30,7 @@ describe("rule transitions", []() {
{ Symbol("1"), blank() }
})));
});
it("handles choices", [&]() {
AssertThat(
sym_transitions(choice({ sym("1"), sym("2") })),
@ -39,7 +39,7 @@ describe("rule transitions", []() {
{ Symbol("2"), blank() }
})));
});
it("handles sequences", [&]() {
AssertThat(
sym_transitions(seq({ sym("1"), sym("2") })),
@ -47,7 +47,7 @@ describe("rule transitions", []() {
{ Symbol("1"), sym("2") }
})));
});
it("handles long sequences", [&]() {
AssertThat(
sym_transitions(seq({
@ -60,7 +60,7 @@ describe("rule transitions", []() {
{ Symbol("1"), seq({ sym("2"), sym("3"), sym("4") }) }
})));
});
it("handles sequences whose left sides can be blank", [&]() {
AssertThat(
sym_transitions(seq({
@ -76,7 +76,7 @@ describe("rule transitions", []() {
{ Symbol("1"), choice({ seq({ sym("1"), sym("2") }), sym("2"), }) }
})));
});
it("handles choices with common starting symbols", [&]() {
AssertThat(
sym_transitions(
@ -87,7 +87,7 @@ describe("rule transitions", []() {
{ Symbol("1"), choice({ sym("2"), sym("3") }) }
})));
});
it("handles characters", [&]() {
AssertThat(
char_transitions(character({ '1' })),
@ -95,7 +95,7 @@ describe("rule transitions", []() {
{ CharacterSet({ '1' }), blank() }
})));
});
it("handles strings", [&]() {
AssertThat(
char_transitions(str("bad")),
@ -103,7 +103,7 @@ describe("rule transitions", []() {
{ CharacterSet({ 'b' }), seq({ character({ 'a' }), character({ 'd' }) }) }
})));
});
it("handles patterns", [&]() {
AssertThat(
char_transitions(pattern("a|b")),
@ -112,8 +112,8 @@ describe("rule transitions", []() {
{ CharacterSet({ 'b' }), blank() }
})));
});
it("handles choices between overlapping character sets", [&]() {
AssertThat(
char_transitions(choice({
@ -145,7 +145,7 @@ describe("rule transitions", []() {
})
})
}})));
rule = repeat(str("a"));
AssertThat(
char_transitions(rule),
@ -168,7 +168,7 @@ describe("rule transitions", []() {
}),
character({ '"' }),
});
AssertThat(char_transitions(rule), Equals(rule_map<CharacterSet>({
{ CharacterSet({ '"' }).complement(), seq({
choice({

View file

@ -4,7 +4,7 @@
namespace tree_sitter {
using std::make_shared;
using std::set;
namespace rules {
rule_ptr character(const set<CharacterRange> &ranges) {
return make_shared<CharacterSet>(ranges);

View file

@ -5,20 +5,20 @@
namespace snowhouse {
using namespace std;
template<typename ExpectedType>
struct EqualsPointerConstraint : Expression<EqualsPointerConstraint<ExpectedType>>
{
EqualsPointerConstraint(const ExpectedType& expected) : expected(expected) {}
template<typename ActualType>
bool operator()(const ActualType& actual) const {
return *expected == *actual;
}
ExpectedType expected;
};
template<typename ExpectedType>
struct Stringizer<EqualsPointerConstraint<ExpectedType>>
{
@ -28,7 +28,7 @@ namespace snowhouse {
return builder.str();
}
};
template<typename ExpectedType>
inline EqualsPointerConstraint<ExpectedType> EqualsPointer(const ExpectedType& expected) {
return EqualsPointerConstraint<ExpectedType>(expected);

View file

@ -8,7 +8,7 @@
using std::cout;
namespace std {
template<typename T>
inline std::ostream& operator<<(std::ostream &stream, const std::vector<T> &vector) {
stream << std::string("#<vector: ");
@ -20,7 +20,7 @@ namespace std {
}
return stream << ">";
}
template<typename T>
inline std::ostream& operator<<(std::ostream &stream, const std::set<T> &set) {
stream << std::string("#<set: ");
@ -32,7 +32,7 @@ namespace std {
}
return stream << ">";
}
template<typename TKey, typename TValue>
inline std::ostream& operator<<(std::ostream &stream, const std::map<TKey, TValue> &map) {
stream << std::string("#<map: ");

View file

@ -19,7 +19,7 @@ describe("preparing a grammar", []() {
sym("rule3") }),
str("ab") }) }
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
{ "rule1", seq({
make_shared<Symbol>("token1", SymbolTypeAuxiliary),
@ -28,36 +28,36 @@ describe("preparing a grammar", []() {
sym("rule3") }),
make_shared<Symbol>("token1", SymbolTypeAuxiliary) }) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {}, {
{ "token1", str("ab") },
})));
});
it("moves entire rules into the lexical grammar when possible, preserving their names", [&]() {
auto result = prepare_grammar(Grammar("rule1", {
{ "rule1", sym("rule2") },
{ "rule2", pattern("a|b") }
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
{ "rule1", sym("rule2") }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {
{ "rule2", pattern("a|b") },
}, {})));
});
it("does not extract blanks into tokens", [&]() {
pair<PreparedGrammar, PreparedGrammar> result = prepare_grammar(Grammar("rule1", {
{ "rule1", choice({ sym("rule2"), blank() }) },
}));
AssertThat(result.first, Equals(PreparedGrammar("rule1", {
{ "rule1", choice({ sym("rule2"), blank() }) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar("", {}, {})));
});
});
@ -71,7 +71,7 @@ describe("preparing a grammar", []() {
sym("y")
}) },
})).first;
AssertThat(result, Equals(PreparedGrammar("rule1", {
{ "rule1", seq({
sym("x"),

View file

@ -7,7 +7,7 @@ START_TEST
describe("character sets", []() {
char max_char = 255;
describe("computing the complement", [&]() {
it("works for the set containing only the null character", [&]() {
CharacterSet set1({ '\0' });
@ -28,14 +28,14 @@ describe("character sets", []() {
AssertThat(set2.complement(), Equals(set1));
});
});
describe("computing unions", [&]() {
it("works for disjoint sets", [&]() {
CharacterSet set({ {'a', 'z'} });
set.add_set(CharacterSet({ {'A', 'Z'} }));
AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'} })));
});
it("works for sets with adjacent ranges", [&]() {
CharacterSet set({ CharacterRange('a', 'r') });
set.add_set(CharacterSet({ CharacterRange('s', 'z') }));
@ -46,33 +46,33 @@ describe("character sets", []() {
set.add_set(c);
AssertThat(set, Equals(CharacterSet({ {0, max_char} })));
});
it("works when the result becomes a continuous range", []() {
CharacterSet set({ {'a', 'd'}, {'f', 'z'} });
set.add_set(CharacterSet({ {'c', 'g'} }));
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} })));
});
it("does nothing for the set of all characters", [&]() {
CharacterSet set({ 'a' });
set.add_set(set.complement());
AssertThat(set, Equals(CharacterSet({ {'\0', max_char} })));
});
});
describe("computing differences", []() {
it("works for disjoint sets", []() {
CharacterSet set1({ {'a','z'} });
set1.remove_set(CharacterSet({ {'A','Z'} }));
AssertThat(set1, Equals(CharacterSet({ {'a', 'z'} })));
});
it("works when one set spans the other", []() {
CharacterSet set1({ {'a','z'} });
set1.remove_set(CharacterSet({ {'d','s'} }));
AssertThat(set1, Equals(CharacterSet({ {'a', 'c'}, {'t', 'z'} })));
});
it("works for sets that overlap", []() {
CharacterSet set1({ {'a','s'} });
set1.remove_set(CharacterSet({ {'m','z'} }));
@ -82,21 +82,21 @@ describe("character sets", []() {
set2.remove_set(CharacterSet({ {'a','s'} }));
AssertThat(set2, Equals(CharacterSet({ {'t', 'z'} })));
});
it("works for sets with multiple ranges", []() {
CharacterSet set1({ {'a','d'}, {'m', 'z'} });
set1.remove_set(CharacterSet({ {'c','o'}, {'s','x'} }));
AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p','r'}, {'y','z'} })));
});
});
describe("computing intersections", []() {
it("returns an empty set for disjoint sets", []() {
CharacterSet set1({ {'a','d'} });
CharacterSet set2({ {'e','x'} });
AssertThat(set1.intersect(set2), Equals(CharacterSet()));
});
it("works for sets with a single overlapping range", []() {
CharacterSet set1({ {'a','e'} });
CharacterSet set2({ {'c','x'} });

View file

@ -48,35 +48,35 @@ describe("parsing pattern rules", []() {
})
})));
});
it("parses character sets", []() {
Pattern rule("[aAeE]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ 'a', 'A', 'e', 'E' })));
});
it("parses character ranges", []() {
Pattern rule("[12a-dA-D3]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })));
});
it("parses negated characters", []() {
Pattern rule("[^a\\d]");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ {'a'}, {'0', '9'} }, false)));
});
it("parses backslashes", []() {
Pattern rule("\\\\");
AssertThat(
rule.to_rule_tree(),
EqualsPointer(character({ '\\' })));
});
it("parses character groups in sequences", []() {
Pattern rule("\"([^\"]|\\\\\")+\"");
AssertThat(
@ -90,7 +90,7 @@ describe("parsing pattern rules", []() {
character({ '"' })
})));
});
it("parses choices in sequences", []() {
Pattern rule("(a|b)cd");
AssertThat(
@ -104,7 +104,7 @@ describe("parsing pattern rules", []() {
character({ 'd' })
})));
});
it("parses special characters when they are escaped", []() {
Pattern rule("a\\(b");
AssertThat(
@ -115,7 +115,7 @@ describe("parsing pattern rules", []() {
character({ 'b' })
})));
});
it("parses repeating rules", []() {
Pattern rule("(ab)+(cd)+");
AssertThat(

View file

@ -9,12 +9,12 @@ describe("constructing rules", []() {
rule_ptr symbol1 = sym("1");
rule_ptr symbol2 = sym("2");
rule_ptr symbol3 = sym("3");
it("constructs binary trees", [&]() {
AssertThat(
seq({ symbol1, symbol2, symbol3 }),
EqualsPointer(seq({ seq({ symbol1, symbol2 }), symbol3 })));
AssertThat(
choice({ symbol1, symbol2, symbol3 }),
EqualsPointer(choice({ choice({ symbol1, symbol2 }), symbol3 })));

View file

@ -6,22 +6,22 @@ START_TEST
describe("arithmetic", []() {
ts_document *doc;
before_each([&]() {
doc = ts_document_make();
ts_document_set_parser(doc, ts_parse_config_arithmetic);
});
after_each([&]() {
ts_document_free(doc);
});
it("parses variables", [&]() {
ts_document_set_input_string(doc, "x");
AssertThat(string(ts_document_string(doc)), Equals(
"(expression (term (factor (variable))))"));
});
it("parses numbers", [&]() {
ts_document_set_input_string(doc, "5");
AssertThat(string(ts_document_string(doc)), Equals(
@ -32,12 +32,12 @@ describe("arithmetic", []() {
ts_document_set_input_string(doc, "x + y");
AssertThat(string(ts_document_string(doc)), Equals(
"(expression (term (factor (variable))) (plus) (term (factor (variable))))"));
ts_document_set_input_string(doc, "x * y");
AssertThat(string(ts_document_string(doc)), Equals(
"(expression (term (factor (variable)) (times) (factor (variable))))"));
});
it("parses complex trees", [&]() {
ts_document_set_input_string(doc, "x * y + z * a");
AssertThat(string(ts_document_string(doc)), Equals(
@ -47,13 +47,13 @@ describe("arithmetic", []() {
AssertThat(string(ts_document_string(doc)), Equals(
"(expression (term (factor (variable)) (times) (factor (expression (term (factor (variable))) (plus) (term (factor (variable)))))))"));
});
describe("error recovery", [&]() {
it("recovers from errors at the top level", [&]() {
ts_document_set_input_string(doc, "x * * y");
AssertThat(string(ts_document_string(doc)), Equals("(ERROR)"));
});
it("recovers from errors in parenthesized expressions", [&]() {
ts_document_set_input_string(doc, "x + (y * + z) * 5");
AssertThat(string(ts_document_string(doc)), Equals(

View file

@ -6,16 +6,16 @@ START_TEST
describe("json", []() {
ts_document *doc;
before_each([&]() {
doc = ts_document_make();
ts_document_set_parser(doc, ts_parse_config_json);
});
after_each([&]() {
ts_document_free(doc);
});
it("parses strings", [&]() {
ts_document_set_input_string(doc, "\"\"");
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
@ -26,7 +26,7 @@ describe("json", []() {
ts_document_set_input_string(doc, "\"this is a \\\"string\\\" within a string\"");
AssertThat(string(ts_document_string(doc)), Equals("(value (string))"));
});
it("parses objects", [&]() {
ts_document_set_input_string(doc, "{}");
AssertThat(string(ts_document_string(doc)), Equals("(value (object))"));
@ -37,7 +37,7 @@ describe("json", []() {
ts_document_set_input_string(doc, "{\"key1\": 1, \"key2\": 2 }");
AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (string) (value (number))))"));
});
it("parses arrays", [&]() {
ts_document_set_input_string(doc, "[]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array))"));
@ -48,11 +48,11 @@ describe("json", []() {
ts_document_set_input_string(doc, "[1, 2, 3]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (value (number)) (value (number))))"));
});
describe("tracking the positions of AST nodes", [&]() {
it("records the widths and offsets of nodes", [&]() {
ts_document_set_input_string(doc, " [12, 5]");
const ts_tree *tree = ts_document_tree(doc);
const ts_tree *array = ts_tree_children(tree)[0];
const ts_tree *number1 = ts_tree_children(array)[0];
@ -66,18 +66,18 @@ describe("json", []() {
AssertThat(array->offset, Equals(2));
AssertThat(array->size, Equals(7));
AssertThat(tree->offset, Equals(2));
AssertThat(tree->size, Equals(7));
});
});
describe("errors", [&]() {
it("reports errors in the top-level node", [&]() {
ts_document_set_input_string(doc, "[");
AssertThat(string(ts_document_string(doc)), Equals("(ERROR)"));
});
it("reports errors inside of arrays and objects", [&]() {
ts_document_set_input_string(doc, "{ \"key1\": 1, 5 }");
AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (number)) (ERROR)))"));
@ -85,7 +85,7 @@ describe("json", []() {
ts_document_set_input_string(doc, "[1,,2]");
AssertThat(string(ts_document_string(doc)), Equals("(value (array (value (number)) (ERROR) (value (number))))"));
});
it("reports errors in nested objects", [&]() {
ts_document_set_input_string(doc, "{ \"key1\": { \"key2\": 1, 2 }, [, \"key3\": 3 }");
AssertThat(string(ts_document_string(doc)), Equals("(value (object (string) (value (object (string) (value (number)) (ERROR))) (ERROR) (string) (value (number))))"));

View file

@ -25,7 +25,7 @@ namespace tree_sitter {
map<const LexItemSet, LexStateId> lex_state_ids;
ParseTable parse_table;
LexTable lex_table;
long parse_state_id_for_item_set(const ParseItemSet &item_set) const {
auto entry = parse_state_ids.find(item_set);
return (entry == parse_state_ids.end()) ? NOT_FOUND : entry->second;
@ -35,20 +35,20 @@ namespace tree_sitter {
auto entry = lex_state_ids.find(item_set);
return (entry == lex_state_ids.end()) ? NOT_FOUND : entry->second;
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (auto transition : sym_transitions(item_set, grammar)) {
Symbol symbol = transition.first;
ParseItemSet item_set = transition.second;
ParseStateId new_state_id = add_parse_state(item_set);
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id));
if (symbol == rules::ERROR) {
parse_table.error_table.insert({ state_id, { new_state_id, first_set(transition.second, grammar) } });
}
}
}
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (auto transition : char_transitions(item_set, grammar)) {
CharacterSet rule = transition.first;
@ -57,7 +57,7 @@ namespace tree_sitter {
lex_table.add_action(state_id, rule, LexAction::Advance(new_state_id));
}
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (LexItem item : item_set) {
if (item.is_done()) {
@ -65,7 +65,7 @@ namespace tree_sitter {
}
}
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (ParseItem item : item_set) {
if (item.is_done()) {
@ -76,7 +76,7 @@ namespace tree_sitter {
}
}
}
void assign_lex_state(ParseStateId state_id) {
ParseState &state = parse_table.states[state_id];
LexItemSet item_set;
@ -87,7 +87,7 @@ namespace tree_sitter {
state.lex_state_id = add_lex_state(item_set);
}
LexStateId add_lex_state(const LexItemSet &item_set) {
auto state_id = lex_state_id_for_item_set(item_set);
if (state_id == NOT_FOUND) {
@ -98,7 +98,7 @@ namespace tree_sitter {
}
return state_id;
}
ParseStateId add_parse_state(const ParseItemSet &item_set) {
auto state_id = parse_state_id_for_item_set(item_set);
if (state_id == NOT_FOUND) {
@ -111,7 +111,7 @@ namespace tree_sitter {
}
return state_id;
}
void add_error_lex_state() {
LexItemSet error_item_set;
for (auto &pair : lex_grammar.rules)
@ -126,7 +126,7 @@ namespace tree_sitter {
// std::vector<const ParseItemSet *> item_sets(parse_state_ids.size());
// for (auto &pair : parse_state_ids)
// item_sets[pair.second] = &pair.first;
//
//
// for (int i = 0; i < item_sets.size(); i++) {
// std:cout << "\n\n" << i;
// for (auto &item : *item_sets[i]) {
@ -136,9 +136,9 @@ namespace tree_sitter {
// }
// }
// }
public:
TableBuilder(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) :
grammar(grammar),
lex_grammar(lex_grammar) {};
@ -151,7 +151,7 @@ namespace tree_sitter {
return pair<ParseTable, LexTable>(parse_table, lex_table);
}
};
pair<ParseTable, LexTable> build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar) {
return TableBuilder(grammar, lex_grammar).build();
}

View file

@ -6,7 +6,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
std::pair<ParseTable, LexTable> build_tables(const PreparedGrammar &grammar, const PreparedGrammar &lex_grammar);
}

View file

@ -21,7 +21,7 @@ namespace tree_sitter {
const PreparedGrammar grammar;
public:
FirstSet(const PreparedGrammar &grammar) : grammar(grammar) {}
void visit(const Symbol *rule) {
if (grammar.has_definition(*rule)) {
value = apply(grammar.rule(*rule));
@ -29,11 +29,11 @@ namespace tree_sitter {
value = set<Symbol>({ *rule });
}
}
void visit(const Choice *rule) {
value = set_union(apply(rule->left), apply(rule->right));
}
void visit(const Seq *rule) {
auto result = apply(rule->left);
if (rule_can_be_blank(rule->left, grammar)) {
@ -42,11 +42,11 @@ namespace tree_sitter {
value = result;
}
};
set<Symbol> first_set(const rule_ptr &rule, const PreparedGrammar &grammar) {
return FirstSet(grammar).apply(rule);
}
set<Symbol> first_set(const ParseItemSet &item_set, const PreparedGrammar &grammar) {
set<Symbol> result;
for (auto &item : item_set) {

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
/*

View file

@ -24,7 +24,7 @@ namespace tree_sitter {
result.insert({ symbol, following_terminals });
}
}
return result;
}
}

View file

@ -8,9 +8,9 @@
namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
/*
* Returns a map of non-terminal symbols to sets of terminal symbols.
* The keys are the non-terminals which may appear first in the given

View file

@ -14,11 +14,11 @@ namespace tree_sitter {
Item::Item(const Symbol &lhs, const rule_ptr rule) :
lhs(lhs),
rule(rule) {};
bool Item::is_done() const {
return rule_can_be_blank(rule);
}
ostream& operator<<(ostream &stream, const LexItem &item) {
return stream <<
string("#<item ") <<
@ -27,7 +27,7 @@ namespace tree_sitter {
*item.rule <<
string(">");
}
ostream& operator<<(ostream &stream, const ParseItem &item) {
return stream <<
string("#<item ") <<
@ -40,7 +40,7 @@ namespace tree_sitter {
item.lookahead_sym <<
string(">");
}
bool LexItem::operator<(const LexItem &other) const {
if (lhs < other.lhs) return true;
if (other.lhs < lhs) return false;
@ -60,13 +60,13 @@ namespace tree_sitter {
}
LexItem::LexItem(const Symbol &lhs, const rule_ptr rule) : Item(lhs, rule) {}
bool LexItem::operator==(const LexItem &other) const {
bool lhs_eq = other.lhs == lhs;
bool rules_eq = (*other.rule == *rule);
return lhs_eq && rules_eq;
}
ParseItem::ParseItem(const Symbol &lhs, const rule_ptr rule, const vector<bool> &consumed_symbols, const Symbol &lookahead_sym) :
Item(lhs, rule),
consumed_symbols(consumed_symbols),

View file

@ -8,7 +8,7 @@
namespace tree_sitter {
class Grammar;
namespace build_tables {
class Item {
public:
@ -38,9 +38,9 @@ namespace tree_sitter {
typedef std::set<ParseItem> ParseItemSet;
typedef std::set<LexItem> LexItemSet;
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
std::ostream& operator<<(std::ostream &stream, const LexItem &item);
std::ostream& operator<<(std::ostream &stream, const ParseItem &item);
}
}
@ -64,7 +64,7 @@ namespace std {
hash<string>()(item.lookahead_sym.name);
}
};
template<typename T>
struct hash<const set<T>> {
size_t operator()(const set<T> &set) const {

View file

@ -8,12 +8,12 @@
namespace tree_sitter {
using std::set;
using rules::Symbol;
namespace build_tables {
static bool contains(const ParseItemSet &items, const ParseItem &item) {
return items.size() > 0 && (std::find(items.begin(), items.end(), item) != items.end());
}
static void add_item(ParseItemSet &item_set, const ParseItem &item, const PreparedGrammar &grammar) {
if (!contains(item_set, item)) {
item_set.insert(item);

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
const ParseItemSet item_set_closure(const ParseItemSet &item_set, const PreparedGrammar &grammar);
}

View file

@ -8,7 +8,7 @@ namespace tree_sitter {
using std::set;
using rules::CharacterSet;
using rules::Symbol;
namespace build_tables {
map<CharacterSet, LexItemSet> char_transitions(const LexItem &item) {
map<CharacterSet, LexItemSet> result;
@ -18,7 +18,7 @@ namespace tree_sitter {
}
return result;
}
map<Symbol, ParseItemSet> sym_transitions(const ParseItem &item, const PreparedGrammar &grammar) {
map<Symbol, ParseItemSet> result;
for (auto transition : sym_transitions(item.rule)) {
@ -30,14 +30,14 @@ namespace tree_sitter {
}
return result;
}
template<typename T>
static set<T> merge_sets(const set<T> &left, const set<T> &right) {
set<T> result = left;
result.insert(right.begin(), right.end());
return result;
}
map<CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar) {
map<CharacterSet, LexItemSet> result;
for (const LexItem &item : item_set) {

View file

@ -10,7 +10,7 @@ namespace tree_sitter {
class CharacterSet;
class Symbol;
}
namespace build_tables {
std::map<rules::CharacterSet, LexItemSet> char_transitions(const LexItemSet &item_set, const PreparedGrammar &grammar);
std::map<rules::Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set, const PreparedGrammar &grammar);

View file

@ -7,7 +7,7 @@
namespace tree_sitter {
namespace build_tables {
/*
* Merges two transition maps with symbol keys. If both maps
* contain values for the same symbol, the new value for that
@ -36,7 +36,7 @@ namespace tree_sitter {
}
return result;
}
/*
* Merges two transition maps with character set keys. If the
* two maps contain values for overlapping character sets, the

View file

@ -16,11 +16,11 @@ namespace tree_sitter {
void default_visit(const Rule *) {
value = false;
}
virtual void visit(const Blank *) {
value = true;
}
virtual void visit(const Repeat *rule) {
value = true;
}
@ -28,12 +28,12 @@ namespace tree_sitter {
virtual void visit(const Choice *rule) {
value = apply(rule->left) || apply(rule->right);
}
virtual void visit(const Seq *rule) {
value = apply(rule->left) && apply(rule->right);
}
};
class CanBeBlankRecursive : public CanBeBlank {
const PreparedGrammar grammar;
using CanBeBlank::visit;
@ -45,11 +45,11 @@ namespace tree_sitter {
value = grammar.has_definition(*rule) && apply(grammar.rule(*rule));
}
};
bool rule_can_be_blank(const rule_ptr &rule) {
return CanBeBlank().apply(rule);
}
bool rule_can_be_blank(const rule_ptr &rule, const PreparedGrammar &grammar) {
return CanBeBlankRecursive(grammar).apply(rule);
}

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace build_tables {
bool rule_can_be_blank(const rules::rule_ptr &rule);
bool rule_can_be_blank(const rules::rule_ptr &rule, const PreparedGrammar &grammar);

View file

@ -19,7 +19,7 @@ namespace tree_sitter {
namespace build_tables {
template<typename T>
map<T, rule_ptr> merge_transitions(const map<T, rule_ptr> &left, const map<T, rule_ptr> &right);
template<>
map<CharacterSet, rule_ptr> merge_transitions(const map<CharacterSet, rule_ptr> &left, const map<CharacterSet, rule_ptr> &right) {
auto transitions = merge_char_transitions<rule_ptr>(left, right, [](rule_ptr left, rule_ptr right) {
@ -35,7 +35,7 @@ namespace tree_sitter {
});
return *static_cast<map<Symbol, rule_ptr> *>(&transitions);
}
template<typename T>
map<T, rule_ptr> map_transitions(const map<T, rule_ptr> &initial, std::function<const rule_ptr(rule_ptr)> map_fn) {
map<T, rule_ptr> result;
@ -55,7 +55,7 @@ namespace tree_sitter {
void visit(const CharacterSet *rule) {
visit_atom(rule);
}
void visit(const Symbol *rule) {
visit_atom(rule);
}
@ -73,25 +73,25 @@ namespace tree_sitter {
result = merge_transitions<T>(result, this->apply(rule->right));
this->value = result;
}
void visit(const Repeat *rule) {
this->value = map_transitions(this->apply(rule->content), [&](const rule_ptr &value) {
return Seq::Build({ value, make_shared<Choice>(rule->copy(), make_shared<Blank>()) });
});
}
void visit(const String *rule) {
rule_ptr result = make_shared<Blank>();
for (char val : rule->value)
result = Seq::Build({ result, make_shared<CharacterSet>(set<CharacterRange>({ val })) });
this->value = this->apply(result);
}
void visit(const Pattern *rule) {
this->value = this->apply(rule->to_rule_tree());
}
};
map<CharacterSet, rule_ptr> char_transitions(const rule_ptr &rule) {
return RuleTransitions<CharacterSet>().apply(rule);
}

View file

@ -12,7 +12,7 @@ namespace tree_sitter {
using std::vector;
using std::set;
using std::pair;
namespace generate_code {
string _switch(string condition, string body) {
return join({
@ -21,7 +21,7 @@ namespace tree_sitter {
"}"
});
}
string _case(string value, string body) {
return join({
"case " + value + ":",
@ -35,14 +35,14 @@ namespace tree_sitter {
indent(body)
});
}
string _if(string condition, string body) {
return join({
"if (" + condition + ")",
indent(body), ""
});
}
class CCodeGenerator {
const string name;
const ParseTable parse_table;
@ -53,7 +53,7 @@ namespace tree_sitter {
parse_table(parse_table),
lex_table(lex_table)
{}
string symbol_id(rules::Symbol symbol) {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR)
@ -65,7 +65,7 @@ namespace tree_sitter {
else
return "ts_sym_" + symbol.name;
}
string character_code(char character) {
switch (character) {
case '\0':
@ -78,17 +78,17 @@ namespace tree_sitter {
return string() + character;
}
}
string condition_for_character_range(const rules::CharacterRange &range) {
string lookahead("LOOKAHEAD_CHAR()");
if (range.min == range.max) {
return lookahead + " == '" + character_code(range.min) + "'";
} else {
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
return string("'") + character_code(range.min) + string("' <= ") + lookahead +
" && " + lookahead + " <= '" + character_code(range.max) + "'";
}
}
string condition_for_character_set(const rules::CharacterSet &set) {
vector<string> parts;
if (set.ranges.size() == 1) {
@ -99,7 +99,7 @@ namespace tree_sitter {
return join(parts, " ||\n ");
}
}
string condition_for_character_rule(const rules::CharacterSet &rule) {
vector<string> parts;
pair<rules::CharacterSet, bool> representation = rule.most_compact_representation();
@ -108,7 +108,7 @@ namespace tree_sitter {
else
return "!(" + condition_for_character_set(rule.complement()) + ")";
}
string collapse_flags(vector<bool> flags) {
string result;
bool started = false;
@ -119,7 +119,7 @@ namespace tree_sitter {
}
return result;
}
string code_for_parse_actions(const set<ParseAction> &actions, const set<rules::Symbol> &expected_inputs) {
auto action = actions.begin();
switch (action->type) {
@ -133,7 +133,7 @@ namespace tree_sitter {
return "";
}
}
string parse_error_call(const set<rules::Symbol> &expected_inputs) {
string result = "PARSE_ERROR(" + to_string(expected_inputs.size()) + ", EXPECT({";
bool started = false;
@ -198,7 +198,7 @@ namespace tree_sitter {
body += _default("LEX_PANIC();");
return _switch("LEX_STATE()", body);
}
string symbol_enum() {
string result = "enum {\n";
for (auto symbol : parse_table.symbols)
@ -218,7 +218,7 @@ namespace tree_sitter {
string includes() {
return "#include \"tree_sitter/parser.h\"";
}
string recover_case(ParseStateId state, set<rules::Symbol> symbols) {
string result = "RECOVER(" + to_string(state) + ", " + to_string(symbols.size()) + ", EXPECT({";
bool started = false;
@ -231,7 +231,7 @@ namespace tree_sitter {
}
return result + "}));";
}
string recover_function() {
string cases;
for (auto &pair : parse_table.error_table) {
@ -239,7 +239,7 @@ namespace tree_sitter {
cases += _case(to_string(pair.first), recover_case(pair_for_state.first, pair_for_state.second));
}
cases += _default(recover_case(0, set<rules::Symbol>()));
string body = _switch("state", cases);
return join({
"static const ts_symbol * ts_recover(ts_state state, ts_state *to_state, size_t *count) {",
@ -247,7 +247,7 @@ namespace tree_sitter {
"}"
});
}
string lex_function() {
return join({
"LEX_FN() {",
@ -256,7 +256,7 @@ namespace tree_sitter {
"}"
});
}
string parse_function() {
return join({
"PARSE_FN() {",
@ -266,11 +266,11 @@ namespace tree_sitter {
"}"
});
}
string parse_config_struct() {
return "EXPORT_PARSER(ts_parse_config_" + name + ");";
}
string code() {
return join({
includes(),
@ -283,7 +283,7 @@ namespace tree_sitter {
}, "\n\n") + "\n";
}
};
string c_code(string name, const ParseTable &parse_table, const LexTable &lex_table) {
return CCodeGenerator(name, parse_table, lex_table).code();
}

View file

@ -12,7 +12,7 @@ namespace tree_sitter {
pos += replace.length();
}
}
string join(vector<string> lines, string separator) {
string result;
bool started = false;
@ -23,17 +23,17 @@ namespace tree_sitter {
}
return result;
}
string join(vector<string> lines) {
return join(lines, "\n");
}
string indent(string input) {
string tab = " ";
str_replace(input, "\n", "\n" + tab);
return tab + input;
}
string escape_string(string input) {
str_replace(input, "\"", "\\\"");
return input;

View file

@ -6,11 +6,11 @@ namespace tree_sitter {
using std::map;
using std::ostream;
using rules::rule_ptr;
Grammar::Grammar(std::string start_rule_name, const map<const string, const rule_ptr> &rules) :
start_rule_name(start_rule_name),
start_rule_name(start_rule_name),
rules(rules) {}
bool Grammar::operator==(const Grammar &other) const {
if (other.start_rule_name != start_rule_name) return false;
if (other.rules.size() != rules.size()) return false;
@ -20,10 +20,10 @@ namespace tree_sitter {
if (other_pair == other.rules.end()) return false;
if (!other_pair->second->operator==(*pair.second)) return false;
}
return true;
}
ostream& operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");

View file

@ -7,31 +7,31 @@ namespace tree_sitter {
using std::set;
using rules::Symbol;
using rules::CharacterSet;
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol) :
type(type),
symbol(symbol),
state_index(state_index) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(""));
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(""));
}
LexAction LexAction::Accept(Symbol symbol) {
return LexAction(LexActionTypeAccept, -1, symbol);
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol == other.symbol);
}
bool LexAction::operator<(const LexAction &other) const {
if (type < other.type) return true;
if (type > other.type) return false;
@ -39,7 +39,7 @@ namespace tree_sitter {
if (state_index > other.state_index) return false;
return (symbol < other.symbol);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
@ -50,33 +50,33 @@ namespace tree_sitter {
return stream << string("#<advance ") + to_string(action.state_index) + ">";
}
}
set<CharacterSet> LexState::expected_inputs() const {
set<CharacterSet> result;
for (auto &pair : actions)
result.insert(pair.first);
return result;
}
LexStateId LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
LexState & state(LexTable *table, LexStateId id) {
if (id < 0)
return table->error_state;
else
return table->states[id];
}
void LexTable::add_action(LexStateId id, CharacterSet match, LexAction action) {
state(this, id).actions[match].insert(action);
}
void LexTable::add_default_action(LexStateId id, LexAction action) {
state(this, id).default_actions.insert(action);
}
const LexStateId LexTable::ERROR_STATE_ID = -1;
}

View file

@ -14,7 +14,7 @@ namespace tree_sitter {
LexActionTypeError,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol);
public:
@ -23,12 +23,12 @@ namespace tree_sitter {
static LexAction Advance(size_t state_index);
bool operator==(const LexAction &action) const;
bool operator<(const LexAction &action) const;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
};
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
}
@ -50,16 +50,16 @@ namespace tree_sitter {
std::set<LexAction> default_actions;
std::set<rules::CharacterSet> expected_inputs() const;
};
typedef long int LexStateId;
class LexTable {
public:
static const LexStateId ERROR_STATE_ID;
LexStateId add_state();
void add_action(LexStateId state_id, rules::CharacterSet rule, LexAction action);
void add_default_action(LexStateId state_id, LexAction action);
std::vector<LexState> states;
LexState error_state;
};

View file

@ -7,36 +7,36 @@ namespace tree_sitter {
using std::set;
using std::vector;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, size_t state_index, Symbol symbol, const vector<bool> &child_flags) :
type(type),
symbol(symbol),
state_index(state_index),
child_flags(child_flags) {};
ParseAction ParseAction::Error() {
return ParseAction(ParseActionTypeError, -1, Symbol(""), {});
}
ParseAction ParseAction::Accept() {
return ParseAction(ParseActionTypeAccept, -1, Symbol(""), {});
}
ParseAction ParseAction::Shift(size_t state_index) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(""), {});
}
ParseAction ParseAction::Reduce(Symbol symbol, const vector<bool> &child_flags) {
return ParseAction(ParseActionTypeReduce, -1, symbol, child_flags);
}
bool ParseAction::operator==(const ParseAction &other) const {
bool types_eq = type == other.type;
bool state_indices_eq = state_index == other.state_index;
bool child_flags_eq = child_flags == other.child_flags;
return types_eq && state_indices_eq && child_flags_eq;
}
bool ParseAction::operator<(const ParseAction &other) const {
if (type < other.type) return true;
if (type > other.type) return false;
@ -44,7 +44,7 @@ namespace tree_sitter {
if (state_index > other.state_index) return false;
return (child_flags < other.child_flags);
}
ostream& operator<<(ostream &stream, const ParseAction &action) {
switch (action.type) {
case ParseActionTypeError:
@ -57,16 +57,16 @@ namespace tree_sitter {
return stream << (string("#<reduce ") + action.symbol.name + ">");
}
}
ParseState::ParseState() : lex_state_id(-1) {}
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto pair : actions)
result.insert(pair.first);
return result;
}
ostream& operator<<(ostream &stream, const ParseState &state) {
stream << string("#<parse_state ");
bool started1 = false;
@ -85,12 +85,12 @@ namespace tree_sitter {
stream << string(">");
return stream;
}
ParseStateId ParseTable::add_state() {
states.push_back(ParseState());
return states.size() - 1;
}
void ParseTable::add_action(ParseStateId id, Symbol symbol, ParseAction action) {
symbols.insert(symbol);
states[id].actions[symbol].insert(action);

View file

@ -14,7 +14,7 @@ namespace tree_sitter {
ParseActionTypeShift,
ParseActionTypeReduce,
} ParseActionType;
class ParseAction {
ParseAction(ParseActionType type, size_t state_index, rules::Symbol symbol, const std::vector<bool> &child_flags);
public:
@ -24,7 +24,7 @@ namespace tree_sitter {
static ParseAction Reduce(rules::Symbol symbol, const std::vector<bool> &child_flags);
bool operator==(const ParseAction &action) const;
bool operator<(const ParseAction &action) const;
ParseActionType type;
rules::Symbol symbol;
size_t state_index;
@ -33,7 +33,7 @@ namespace tree_sitter {
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
}
namespace std {
template<>
struct hash<tree_sitter::ParseAction> {
@ -55,16 +55,16 @@ namespace tree_sitter {
std::set<rules::Symbol> expected_inputs() const;
LexStateId lex_state_id;
};
typedef unsigned long int ParseStateId;
std::ostream& operator<<(std::ostream &stream, const ParseState &state);
class ParseTable {
public:
size_t add_state();
void add_action(ParseStateId state_id, rules::Symbol symbol, ParseAction action);
std::vector<ParseState> states;
std::set<rules::Symbol> symbols;
std::map<ParseStateId, std::pair<ParseStateId, std::set<rules::Symbol>>> error_table;

View file

@ -22,14 +22,14 @@ namespace tree_sitter {
Seq::Build({ rule, make_shared<Symbol>(name, SymbolTypeAuxiliary) }),
make_shared<Blank>() });
}
void visit(const Repeat *rule) {
rule_ptr inner_rule = apply(rule->content);
string helper_rule_name = string("repeat_helper") + to_string(aux_rules.size() + 1);
aux_rules.insert({ helper_rule_name, make_repeat_helper(helper_rule_name, inner_rule) });
value = make_shared<Symbol>(helper_rule_name, SymbolTypeAuxiliary);
}
void visit(const Seq *rule) {
value = Seq::Build({ apply(rule->left), apply(rule->right) });
}
@ -45,14 +45,14 @@ namespace tree_sitter {
public:
map<const string, const rule_ptr> aux_rules;
};
PreparedGrammar expand_repeats(const PreparedGrammar &grammar) {
map<const string, const rule_ptr> rules, aux_rules(grammar.aux_rules);
ExpandRepeats expander;
for (auto &pair : grammar.rules)
rules.insert({ pair.first, expander.apply(pair.second) });
aux_rules.insert(expander.aux_rules.begin(), expander.aux_rules.end());
return PreparedGrammar(grammar.start_rule_name, rules, aux_rules);

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace prepare_grammar {
PreparedGrammar expand_repeats(const PreparedGrammar &);
}

View file

@ -18,13 +18,13 @@ namespace tree_sitter {
using std::map;
using std::make_shared;
using namespace rules;
namespace prepare_grammar {
class IsToken : public RuleFn<bool> {
void default_visit(const Rule *rule) {
value = false;
}
void visit(const String *rule) {
value = true;
}
@ -43,7 +43,7 @@ namespace tree_sitter {
tokens.insert({ name, rule });
return name;
}
void default_visit(const Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(result)) {
@ -52,15 +52,15 @@ namespace tree_sitter {
value = result;
}
}
void visit(const Choice *rule) {
value = Choice::Build({ apply(rule->left), apply(rule->right) });
}
void visit(const Seq *rule) {
value = Seq::Build({ apply(rule->left), apply(rule->right) });
}
void visit(const Repeat *rule) {
value = make_shared<Repeat>(apply(rule->content));
}
@ -68,11 +68,11 @@ namespace tree_sitter {
public:
map<const string, const rule_ptr> tokens;
};
pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &input_grammar) {
map<const string, const rule_ptr> rules, tokens, aux_rules, aux_tokens;
TokenExtractor extractor;
for (auto &pair : input_grammar.rules) {
string name = pair.first;
rule_ptr rule = pair.second;
@ -90,10 +90,10 @@ namespace tree_sitter {
else
aux_rules.insert({ name, extractor.apply(rule) });
}
aux_tokens.insert(extractor.tokens.begin(), extractor.tokens.end());
return {
return {
PreparedGrammar(input_grammar.start_rule_name, rules, aux_rules),
PreparedGrammar("", tokens, aux_tokens)
};

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, PreparedGrammar> extract_tokens(const PreparedGrammar &);
}

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
using std::pair;
namespace prepare_grammar {
pair<PreparedGrammar, PreparedGrammar> prepare_grammar(const Grammar &input_grammar) {
auto grammars = prepare_grammar::extract_tokens(input_grammar);

View file

@ -6,7 +6,7 @@
namespace tree_sitter {
class Grammar;
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, PreparedGrammar> prepare_grammar(const Grammar &);
}

View file

@ -15,7 +15,7 @@ namespace tree_sitter {
const map<const string, const rule_ptr> &aux_rules) :
Grammar(start_rule_name, rules),
aux_rules(aux_rules) {}
PreparedGrammar::PreparedGrammar(std::string start_rule_name,
const initializer_list<pair<const string, const rule_ptr>> &rules,
const initializer_list<pair<const string, const rule_ptr>> &aux_rules) :
@ -45,14 +45,14 @@ namespace tree_sitter {
}
return true;
}
bool PreparedGrammar::has_definition(const Symbol &symbol) const {
return rule(symbol).get() != nullptr;
}
ostream& operator<<(ostream &stream, const PreparedGrammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules) {
@ -63,7 +63,7 @@ namespace tree_sitter {
started = true;
}
stream << string("}");
stream << string(" aux_rules: {");
started = false;
for (auto pair : grammar.aux_rules) {
@ -74,7 +74,7 @@ namespace tree_sitter {
started = true;
}
stream << string("}");
return stream << string(">");
}
}

View file

@ -14,16 +14,16 @@ namespace tree_sitter {
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &rules,
const std::initializer_list<std::pair<const std::string, const rules::rule_ptr>> &aux_rules);
PreparedGrammar(const Grammar &grammar);
bool operator==(const PreparedGrammar &other) const;
bool has_definition(const rules::Symbol &symbol) const;
const rules::rule_ptr rule(const rules::Symbol &symbol) const;
const std::map<const std::string, const rules::rule_ptr> aux_rules;
};
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
std::string compile(const Grammar &grammar, std::string name);
}

View file

@ -4,23 +4,23 @@
namespace tree_sitter {
namespace rules {
Blank::Blank() {}
bool Blank::operator==(const Rule &rule) const {
return dynamic_cast<const Blank *>(&rule) != nullptr;
}
size_t Blank::hash_code() const {
return 0;
}
rule_ptr Blank::copy() const {
return std::make_shared<Blank>();
}
std::string Blank::to_string() const {
return "#<blank>";
}
void Blank::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -8,7 +8,7 @@ namespace tree_sitter {
class Blank : public Rule {
public:
Blank();
bool operator==(const Rule& other) const;
size_t hash_code() const;
rule_ptr copy() const;

View file

@ -3,24 +3,24 @@
namespace tree_sitter {
using std::string;
namespace rules {
static const char MAX_CHAR = '\xff';
CharacterRange::CharacterRange(char value) : min(value), max(value) {}
CharacterRange::CharacterRange(char min, char max) : min(min), max(max) {}
bool CharacterRange::operator==(const CharacterRange &other) const {
return min == other.min && max == other.max;
}
bool CharacterRange::operator<(const CharacterRange &other) const {
if (min < other.min) return true;
if (min > other.min) return false;
if (max < other.max) return true;
return false;
}
string escape_character(char input) {
switch (input) {
case '\0':
@ -31,7 +31,7 @@ namespace tree_sitter {
return string() + input;
}
}
string CharacterRange::to_string() const {
if (min == 0 && max == MAX_CHAR)
return "<ANY>";

View file

@ -14,24 +14,24 @@ namespace tree_sitter {
int max_int(const CharacterRange &range) {
return range.max == MAX_CHAR ? 255 : (int)range.max;
}
int min_int(const CharacterRange &range) {
return (int)range.min;
}
CharacterSet::CharacterSet() : ranges({}) {}
CharacterSet::CharacterSet(const set<CharacterRange> &ranges) : ranges(ranges) {}
CharacterSet::CharacterSet(const initializer_list<CharacterRange> &ranges) : ranges(ranges) {}
bool CharacterSet::operator==(const Rule &rule) const {
const CharacterSet *other = dynamic_cast<const CharacterSet *>(&rule);
return other && (ranges == other->ranges);
}
bool CharacterSet::operator<(const CharacterSet &other) const {
return ranges < other.ranges;
}
size_t CharacterSet::hash_code() const {
size_t result = std::hash<size_t>()(ranges.size());
for (auto &range : ranges) {
@ -51,13 +51,13 @@ namespace tree_sitter {
result += " " + range.to_string();
return result + " }>";
}
CharacterSet CharacterSet::complement() const {
CharacterSet result({ {0, MAX_CHAR} });
result.remove_set(*this);
return result;
}
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
auto first_range = *ranges.begin();
if (first_range.min == 0 && first_range.max > 0) {
@ -66,7 +66,7 @@ namespace tree_sitter {
return { *this, true };
}
}
void add_range(CharacterSet *self, CharacterRange new_range) {
set<CharacterRange> new_ranges;
@ -87,7 +87,7 @@ namespace tree_sitter {
new_range.max = range.max;
}
}
if (!is_adjacent) {
new_ranges.insert(range);
}
@ -95,7 +95,7 @@ namespace tree_sitter {
new_ranges.insert(new_range);
self->ranges = new_ranges;
}
CharacterSet remove_range(CharacterSet *self, CharacterRange new_range) {
CharacterSet removed_set;
set<CharacterRange> new_ranges;
@ -126,17 +126,17 @@ namespace tree_sitter {
self->ranges = new_ranges;
return removed_set;
}
bool CharacterSet::is_empty() const {
return ranges.empty();
}
void CharacterSet::add_set(const CharacterSet &other) {
for (auto &other_range : other.ranges) {
add_range(this, other_range);
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
CharacterSet result;
for (auto &other_range : other.ranges) {
@ -145,12 +145,12 @@ namespace tree_sitter {
}
return result;
}
CharacterSet CharacterSet::intersect(const CharacterSet &set) const {
CharacterSet copy = *this;
return copy.remove_set(set);
}
void CharacterSet::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -13,21 +13,21 @@ namespace tree_sitter {
CharacterSet();
CharacterSet(const std::set<CharacterRange> &ranges);
CharacterSet(const std::initializer_list<CharacterRange> &ranges);
bool operator==(const Rule& other) const;
bool operator<(const CharacterSet &) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor &visitor) const;
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet complement() const;
CharacterSet intersect(const CharacterSet &) const;
std::pair<CharacterSet, bool> most_compact_representation() const;
bool is_empty() const;
std::set<CharacterRange> ranges;
};
}

View file

@ -5,17 +5,17 @@ namespace tree_sitter {
using std::string;
using std::make_shared;
using std::vector;
namespace rules {
Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {};
rule_ptr Choice::Build(const vector<rule_ptr> &rules) {
rule_ptr result;
for (auto rule : rules)
result = result.get() ? make_shared<Choice>(result, rule) : rule;
return result;
}
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
@ -24,7 +24,7 @@ namespace tree_sitter {
size_t Choice::hash_code() const {
return left->hash_code() ^ right->hash_code();
}
rule_ptr Choice::copy() const {
return std::make_shared<Choice>(*this);
}
@ -32,7 +32,7 @@ namespace tree_sitter {
string Choice::to_string() const {
return string("#<choice ") + left->to_string() + " " + right->to_string() + ">";
}
void Choice::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -12,14 +12,14 @@ namespace tree_sitter {
using std::hash;
using std::make_shared;
using std::set;
class PatternParser {
public:
PatternParser(const string &input) :
input(input),
length(input.length()),
position(0) {}
rule_ptr rule() {
auto result = term();
while (has_more_input() && peek() == '|') {
@ -28,7 +28,7 @@ namespace tree_sitter {
}
return result;
}
private:
rule_ptr term() {
rule_ptr result = factor();
@ -36,7 +36,7 @@ namespace tree_sitter {
result = Seq::Build({ result, factor() });
return result;
}
rule_ptr factor() {
rule_ptr result = atom();
if (has_more_input() && (peek() == '+')) {
@ -45,7 +45,7 @@ namespace tree_sitter {
}
return result;
}
rule_ptr atom() {
rule_ptr result;
switch (peek()) {
@ -77,7 +77,7 @@ namespace tree_sitter {
}
return result;
}
CharacterSet char_set() {
bool is_affirmative = true;
if (peek() == '^') {
@ -89,7 +89,7 @@ namespace tree_sitter {
result.add_set(single_char());
return is_affirmative ? result : result.complement();
}
CharacterSet single_char() {
CharacterSet value;
switch (peek()) {
@ -112,7 +112,7 @@ namespace tree_sitter {
}
return value;
}
CharacterSet escaped_char(char value) {
switch (value) {
case '\\':
@ -128,52 +128,52 @@ namespace tree_sitter {
return CharacterSet();
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
bool has_error() {
return error != "";
}
string error;
const string input;
const size_t length;
size_t position;
};
Pattern::Pattern(const string &string) : value(string) {};
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
size_t Pattern::hash_code() const {
return hash<string>()(value);
}
rule_ptr Pattern::copy() const {
return std::make_shared<Pattern>(*this);
}
string Pattern::to_string() const {
return string("#<pattern '") + value + "'>";
}
void Pattern::accept(Visitor &visitor) const {
visitor.visit(this);
}
rule_ptr Pattern::to_rule_tree() const {
return PatternParser(value).rule();
}

View file

@ -15,7 +15,7 @@ namespace tree_sitter {
rule_ptr copy() const;
std::string to_string() const;
void accept(Visitor &visitor) const;
rule_ptr to_rule_tree() const;
};
}

View file

@ -6,24 +6,24 @@ using std::string;
namespace tree_sitter {
namespace rules {
Repeat::Repeat(const rule_ptr content) : content(content) {}
bool Repeat::operator==(const Rule &rule) const {
const Repeat *other = dynamic_cast<const Repeat *>(&rule);
return other && (*other->content == *content);
}
size_t Repeat::hash_code() const {
return content->hash_code();
}
rule_ptr Repeat::copy() const {
return std::make_shared<Repeat>(*this);
}
string Repeat::to_string() const {
return string("#<repeat ") + content->to_string() + ">";
}
void Repeat::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -9,7 +9,7 @@ namespace tree_sitter {
bool Rule::operator!=(const Rule &other) const {
return !this->operator==(other);
}
ostream& operator<<(ostream& stream, const Rule &rule) {
return stream << rule.to_string();
}

View file

@ -20,7 +20,7 @@ namespace tree_sitter {
virtual std::string to_string() const = 0;
virtual void accept(Visitor &visitor) const = 0;
};
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
}

View file

@ -15,20 +15,20 @@ namespace tree_sitter {
using std::string;
using std::set;
using std::vector;
namespace rules {
rule_ptr blank() {
return make_shared<Blank>();
}
rule_ptr choice(const vector<rule_ptr> &rules) {
return Choice::Build(rules);
}
rule_ptr repeat(const rule_ptr &content) {
return std::make_shared<Repeat>(content);
}
rule_ptr seq(const vector<rule_ptr> &rules) {
return Seq::Build(rules);
}
@ -44,11 +44,11 @@ namespace tree_sitter {
rule_ptr pattern(const string &value) {
return make_shared<Pattern>(value);
}
rule_ptr str(const string &value) {
return make_shared<String>(value);
}
rule_ptr err(const rule_ptr &rule) {
return choice({ rule, ERROR.copy() });
}

View file

@ -21,19 +21,19 @@ namespace tree_sitter {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return other && (*other->left == *left) && (*other->right == *right);
}
size_t Seq::hash_code() const {
return left->hash_code() ^ right->hash_code();
}
rule_ptr Seq::copy() const {
return std::make_shared<Seq>(*this);
}
string Seq::to_string() const {
return string("#<seq ") + left->to_string() + " " + right->to_string() + ">";
}
void Seq::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -7,24 +7,24 @@ using std::hash;
namespace tree_sitter {
namespace rules {
String::String(string value) : value(value) {};
bool String::operator==(const Rule &rule) const {
const String *other = dynamic_cast<const String *>(&rule);
return other && (other->value == value);
}
size_t String::hash_code() const {
return hash<string>()(value);
}
rule_ptr String::copy() const {
return std::make_shared<String>(*this);
}
string String::to_string() const {
return string("#<string '") + value + "'>";
}
}
void String::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -22,11 +22,11 @@ namespace tree_sitter {
size_t Symbol::hash_code() const {
return hash<string>()(name) ^ hash<short int>()(type);
}
rule_ptr Symbol::copy() const {
return std::make_shared<Symbol>(*this);
}
string Symbol::to_string() const {
switch (type) {
case SymbolTypeNormal:
@ -39,17 +39,17 @@ namespace tree_sitter {
return string("#<builtin_sym '") + name + "'>";
}
}
bool Symbol::operator<(const Symbol &other) const {
if (type < other.type) return true;
if (type > other.type) return false;
return (name < other.name);
}
bool Symbol::is_built_in() const {
return type == SymbolTypeBuiltIn;
}
bool Symbol::is_auxiliary() const {
return type == SymbolTypeAuxiliary;
}
@ -57,7 +57,7 @@ namespace tree_sitter {
bool Symbol::is_hidden() const {
return (type == SymbolTypeHidden || type == SymbolTypeAuxiliary);
}
void Symbol::accept(Visitor &visitor) const {
visitor.visit(this);
}

View file

@ -11,8 +11,8 @@ namespace tree_sitter {
SymbolTypeHidden,
SymbolTypeAuxiliary,
SymbolTypeBuiltIn
} SymbolType;
} SymbolType;
class Symbol : public Rule {
public:
Symbol(const std::string &name);
@ -20,7 +20,7 @@ namespace tree_sitter {
bool operator==(const Rule& other) const;
bool operator==(const Symbol &other) const;
size_t hash_code() const;
rule_ptr copy() const;
std::string to_string() const;

View file

@ -13,7 +13,7 @@ namespace tree_sitter {
class Seq;
class String;
class Pattern;
class Visitor {
public:
virtual void default_visit(const Rule *rule);
@ -26,7 +26,7 @@ namespace tree_sitter {
virtual void visit(const String *rule);
virtual void visit(const Pattern *rule);
};
template<typename T>
class RuleFn : public Visitor {
protected:

View file

@ -87,7 +87,7 @@ static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_name
return snprintf(string, limit, "%s", NULL_TREE_STRING);
if (tree->symbol == ts_builtin_sym_error)
return snprintf(string, limit, "%s", ERROR_TREE_STRING);
size_t result = snprintf(string, limit, "(%s", symbol_names[tree->symbol]);
char *cursor = string + result;
for (size_t i = 0; i < tree->data.children.count; i++) {
@ -96,7 +96,7 @@ static size_t tree_write_to_string(const ts_tree *tree, const char **symbol_name
result += tree_write_to_string(child, symbol_names, cursor + 1, limit);
cursor = (limit > 0) ? string + result : string;
}
return result + snprintf(cursor, limit, ")");
}