In lexer, prefer tokens to skipped separator characters

This was causing newlines in go and javascript to be parsed as
meaningless separator characters instead of statement terminators
This commit is contained in:
Max Brunsfeld 2014-05-30 13:29:54 -07:00
parent 220e081c49
commit e93e254518
26 changed files with 5559 additions and 6650 deletions

View file

@ -6,6 +6,10 @@ namespace tree_sitter_examples {
using tree_sitter::GrammarOptions;
using namespace tree_sitter::rules;
static rule_ptr terminated(rule_ptr rule) {
return seq({ rule, sym("_terminator") });
}
extern const Grammar golang({
{ "program", seq({
sym("package_directive"),
@ -20,28 +24,26 @@ namespace tree_sitter_examples {
in_parens(err(repeat(sym("package_import")))),
sym("package_import") }) }) },
{ "package_import", sym("string") },
{ "declaration", seq({
choice({
sym("type_declaration"),
sym("var_declaration"),
sym("func_declaration") }),
blank() }) },
{ "declaration", choice({
sym("type_declaration"),
sym("var_declaration"),
sym("func_declaration") }) },
// Declarations
{ "type_declaration", seq({
{ "type_declaration", terminated(seq({
keyword("type"),
sym("type_name"),
sym("type_expression") }) },
{ "var_declaration", seq({
sym("type_expression") })) },
{ "var_declaration", terminated(seq({
keyword("var"),
sym("var_name"),
str("="),
sym("expression") }) },
{ "func_declaration", seq({
sym("expression") })) },
{ "func_declaration", terminated(seq({
keyword("func"),
sym("var_name"),
sym("_func_signature"),
sym("statement_block") }) },
sym("statement_block") })) },
{ "statement_block", in_braces(blank()) },
{ "type_expression", choice({
sym("pointer_type"),
@ -108,6 +110,10 @@ namespace tree_sitter_examples {
sym("type_name"),
blank() }) }) },
{ "_terminator", token(choice({
str("\n"),
str(";") })) },
{ "string", delimited("\"") },
{ "package_name", sym("_identifier") },
{ "var_name", sym("_identifier") },

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -31,6 +31,7 @@ static ts_state_id ts_lex_states[STATE_COUNT]
static ts_tree * ts_lex(ts_lexer *lexer, ts_state_id lex_state)
#define START_LEXER() \
DEBUG_LEX("LEX %d", lex_state); \
char lookahead; \
next_state: \
lookahead = ts_lexer_lookahead_char(lexer); \
@ -46,7 +47,7 @@ ts_lexer_start_token(lexer);
{ DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); return ts_lexer_build_node(lexer, symbol); }
#define LEX_ERROR() \
{ return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
{ DEBUG_LEX("ERROR"); return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
#define LEX_PANIC() \
{ DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); return NULL; }

View file

@ -8,7 +8,7 @@ using namespace build_tables;
START_TEST
describe("resolving parse conflicts", []() {
bool should_update;
bool update;
PreparedGrammar parse_grammar({
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
@ -37,31 +37,67 @@ describe("resolving parse conflicts", []() {
});
it("favors non-errors over lexical errors", [&]() {
should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
AssertThat(should_update, IsTrue());
update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2, {0}));
AssertThat(update, IsTrue());
should_update = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error());
AssertThat(should_update, IsFalse());
update = manager->resolve_lex_action(LexAction::Advance(2, {0}), LexAction::Error());
AssertThat(update, IsFalse());
});
describe("accept-token/advance conflicts", [&]() {
describe("when the the accept-token has higher precedence", [&]() {
it("prefers the accept", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -1 }));
AssertThat(update, IsFalse());
update = manager->resolve_lex_action(LexAction::Advance(1, { -1 }), LexAction::Accept(sym3, 2));
AssertThat(update, IsTrue());
});
});
describe("when the the actions have the same precedence", [&]() {
it("prefers the advance", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { 0 }));
AssertThat(update, IsTrue());
update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 0));
AssertThat(update, IsFalse());
});
});
describe("when the advance has conflicting precedences compared to the accept", [&]() {
it("prefers the advance", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
AssertThat(update, IsTrue());
update = manager->resolve_lex_action(LexAction::Advance(1, { -2, 2 }), LexAction::Accept(sym3, 0));
AssertThat(update, IsFalse());
});
it_skip("records a conflict", [&]() {
manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
});
});
});
describe("accept-token/accept-token conflicts", [&]() {
describe("when one token has a higher precedence than the other", [&]() {
it("prefers the token with the higher precedence", [&]() {
should_update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
AssertThat(should_update, IsFalse());
update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
AssertThat(update, IsFalse());
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
AssertThat(should_update, IsTrue());
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
AssertThat(update, IsTrue());
});
});
describe("when both tokens have the same precedence", [&]() {
it("prefers the token listed earlier in the grammar", [&]() {
should_update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
AssertThat(should_update, IsFalse());
update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
AssertThat(update, IsFalse());
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
AssertThat(should_update, IsTrue());
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
AssertThat(update, IsTrue());
});
});
});
@ -81,11 +117,11 @@ describe("resolving parse conflicts", []() {
});
it("favors non-errors over parse errors", [&]() {
should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
AssertThat(should_update, IsTrue());
update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
AssertThat(update, IsTrue());
should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
AssertThat(should_update, IsFalse());
update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
AssertThat(update, IsFalse());
});
describe("shift/reduce conflicts", [&]() {

View file

@ -34,7 +34,7 @@ namespace tree_sitter {
rule_ptr i_aux_token(size_t index) {
return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
}
rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
return make_shared<Metadata>(rule, values);
}

View file

@ -15,9 +15,9 @@ describe("expanding token rules", []() {
pattern("x*"),
i_sym(11) }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_A", seq({
@ -26,7 +26,7 @@ describe("expanding token rules", []() {
i_sym(11) }) },
}, {})));
});
it("replaces string rules with a sequence of characters", [&]() {
PreparedGrammar grammar({
{ "rule_A", seq({
@ -34,9 +34,9 @@ describe("expanding token rules", []() {
str("xyz"),
i_sym(11) }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_A", seq({
@ -45,7 +45,7 @@ describe("expanding token rules", []() {
i_sym(11) }) },
}, {})));
});
it("returns an error when the grammar contains an invalid regex", [&]() {
PreparedGrammar grammar({
{ "rule_A", seq({
@ -53,7 +53,7 @@ describe("expanding token rules", []() {
str("xyz"),
pattern("[") }) },
}, {});
auto result = expand_tokens(grammar);
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));

View file

@ -82,11 +82,11 @@ describe("extracting tokens from a grammar", []() {
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "'ab'", str("ab") },
})));
@ -99,34 +99,34 @@ describe("extracting tokens from a grammar", []() {
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {}));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_A", i_token(0) }
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
}, {})));
});
it("updates symbols whose indices need to change due to deleted rules", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
}, {}));
AssertThat(result.first, Equals(PreparedGrammar({
{ "rule_B", i_token(0) },
{ "rule_C", i_sym(0) },
}, {})));
AssertThat(result.second, Equals(PreparedGrammar({
{ "rule_A", str("ab") },
}, {})));
});
it("updates the grammar's ubiquitous_tokens", [&]() {
auto result = extract_tokens(PreparedGrammar({
{ "rule_A", str("ab") },
@ -135,24 +135,24 @@ describe("extracting tokens from a grammar", []() {
}, {}, PreparedGrammarOptions({
{ Symbol(0) }
})));
AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
{ Symbol(0, SymbolOptionToken) }
})));
});
it("extracts entire auxiliary rules", [&]() {
auto result = extract_tokens(PreparedGrammar({}, {
{ "rule_A", str("ab") },
{ "rule_B", i_aux_sym(0) },
{ "rule_C", i_aux_sym(1) },
}));
AssertThat(result.first, Equals(PreparedGrammar({}, {
{ "rule_B", i_aux_token(0) },
{ "rule_C", i_aux_sym(0) },
})));
AssertThat(result.second, Equals(PreparedGrammar({}, {
{ "rule_A", str("ab") },
})));

View file

@ -13,13 +13,13 @@ describe("parsing regex patterns", []() {
"[aAeE]",
character({ 'a', 'A', 'e', 'E' })
},
{
"'.' characters as wildcards",
".",
CharacterSet({'\n'}).complement().copy()
},
{
"character classes",
"\\w-\\d",
@ -28,7 +28,7 @@ describe("parsing regex patterns", []() {
character({ '-' }),
character({ {'0', '9'} }) })
},
{
"choices",
"ab|cd|ef",
@ -47,7 +47,7 @@ describe("parsing regex patterns", []() {
})
})
},
{
"simple sequences",
"abc",
@ -56,25 +56,25 @@ describe("parsing regex patterns", []() {
character({ 'b' }),
character({ 'c' }) })
},
{
"character ranges",
"[12a-dA-D3]",
character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })
},
{
"negated characters",
"[^a\\d]",
character({ {'a'}, {'0', '9'} }, false)
},
{
"backslashes",
"\\\\",
character({ '\\' })
},
{
"character groups in sequences",
"x([^x]|\\\\x)*x",
@ -87,7 +87,7 @@ describe("parsing regex patterns", []() {
character({ 'x' })
})
},
{
"choices in sequences",
"(a|b)cd",
@ -100,7 +100,7 @@ describe("parsing regex patterns", []() {
character({ 'd' })
})
},
{
"escaped parentheses",
"a\\(b",
@ -110,7 +110,7 @@ describe("parsing regex patterns", []() {
character({ 'b' })
})
},
{
"escaped periods",
"a\\.",
@ -119,7 +119,7 @@ describe("parsing regex patterns", []() {
character({ '.' })
})
},
{
"plus repeats",
"(ab)+(cd)+",
@ -134,7 +134,7 @@ describe("parsing regex patterns", []() {
}),
})
},
{
"asterix repeats",
"(ab)*(cd)*",
@ -143,7 +143,7 @@ describe("parsing regex patterns", []() {
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
})
},
{
"optional rules",
"a(bc)?",
@ -156,7 +156,7 @@ describe("parsing regex patterns", []() {
})
}
};
vector<tuple<string, string, const char *>> invalid_inputs = {
{
"mismatched open parens",
@ -189,23 +189,23 @@ describe("parsing regex patterns", []() {
"unmatched close square bracket",
},
};
for (auto &triple : valid_inputs) {
string description = get<0>(triple);
string regex = get<1>(triple);
rule_ptr rule = get<2>(triple);
it(("parses " + description).c_str(), [&]() {
auto result = parse_regex(regex);
AssertThat(result.first, EqualsPointer(rule));
});
}
for (auto &triple : invalid_inputs) {
string description = get<0>(triple);
string regex = get<1>(triple);
const char *expected_message = get<2>(triple);
it(("handles invalid regexes with " + description).c_str(), [&]() {
auto result = parse_regex(regex);
AssertThat(result.second, !Equals((const GrammarError *)nullptr));

View file

@ -27,3 +27,15 @@ func main() {
(func_declaration (var_name) (statement_block
(comment))))
==========================================
handles indented code after blocks
=========================================
package trivial
func one() {}
func two() {}
---
(program
(package_directive (package_name))
(func_declaration (var_name) (statement_block))
(func_declaration (var_name) (statement_block)))

View file

@ -80,6 +80,16 @@ try {
(statement_block (expression_statement (function_call (identifier) (identifier))))))
===========================================
parses indented code after blocks
===========================================
function x() {}
return z;
---
(program
(expression_statement
(function_expression (identifier) (formal_parameters) (statement_block)))
(return_statement (identifier)))
===========================================
parses switch statements
===========================================
switch(x) {

View file

@ -70,7 +70,9 @@ namespace tree_sitter {
CharacterSet rule = transition.first;
LexItemSet new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
lex_table.state(state_id).actions[rule] = LexAction::Advance(new_state_id);
auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
lex_table.state(state_id).actions[rule] = action;
}
}
@ -93,14 +95,24 @@ namespace tree_sitter {
rules::rule_ptr after_separators(rules::rule_ptr rule) {
return rules::Seq::Build({
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
make_shared<rules::Metadata>(make_shared<rules::Blank>(), map<rules::MetadataKey, int>({
make_shared<rules::Metadata>(rules::Seq::Build({
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
make_shared<rules::Blank>(),
}), map<rules::MetadataKey, int>({
{rules::START_TOKEN, 1},
{rules::PRECEDENCE, -1},
})),
rule
rule,
});
}
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
set<int> result;
for (const auto &item : item_set)
result.insert(item.precedence());
return result;
}
public:
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
lex_grammar(lex_grammar),

View file

@ -58,7 +58,7 @@ namespace tree_sitter {
}
}
}
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &symbol : grammar.options.ubiquitous_tokens) {
auto &actions = parse_table.states[state_id].actions;

View file

@ -1,5 +1,6 @@
#include "compiler/build_tables/get_metadata.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
namespace tree_sitter {
namespace build_tables {
@ -11,6 +12,13 @@ namespace tree_sitter {
return rule->value_for(metadata_key);
}
// TODO -
// Remove this. It is currently needed to make the rule generated
// by `LexTableBuilder::after_separators` have the right precedence.
int apply_to(const rules::Seq *rule) {
return apply(rule->left);
}
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
};

View file

@ -18,17 +18,45 @@ namespace tree_sitter {
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
const LexAction &new_action) {
if (new_action.type < old_action.type)
return !resolve_lex_action(new_action, old_action);
switch (old_action.type) {
case LexActionTypeError:
return true;
case LexActionTypeAccept:
if (new_action.precedence > old_action.precedence) {
return true;
} else if (new_action.precedence < old_action.precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
case LexActionTypeAccept: {
int old_precedence = *old_action.precedence_values.begin();
switch (new_action.type) {
case LexActionTypeAccept: {
int new_precedence = *new_action.precedence_values.begin();
if (new_precedence > old_precedence) {
return true;
} else if (new_precedence < old_precedence) {
return false;
} else {
return new_action.symbol.index < old_action.symbol.index;
}
}
case LexActionTypeAdvance: {
// int min_precedence = *new_action.precedence_values.begin();
int max_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > old_precedence) {
// if (min_precedence < old_precedence)
return true;
} else if (max_precedence < old_precedence) {
return false;
} else {
return true;
}
return false;
}
default:
return false;
}
return true;
}
default:
return false;
}

View file

@ -88,7 +88,7 @@ namespace tree_sitter {
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
return symbol.is_token() ? lexical_grammar : syntax_grammar;
}
string sanitize_name(string name) {
auto existing = sanitized_names.find(name);
if (existing != sanitized_names.end())
@ -115,7 +115,7 @@ namespace tree_sitter {
}
}
}
bool has_sanitized_name(string name) {
for (auto &pair : sanitized_names)
if (pair.second == name)

View file

@ -48,7 +48,7 @@ namespace tree_sitter {
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
type(type),
message(message) {}
bool GrammarError::operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}

View file

@ -12,24 +12,25 @@ namespace tree_sitter {
LexAction::LexAction() :
type(LexActionTypeError),
symbol(Symbol(-1)),
state_index(-1) {}
state_index(-1),
precedence_values({0}) {}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, int precedence) :
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
type(type),
symbol(symbol),
state_index(state_index),
precedence(precedence) {}
precedence_values(precedence_values) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), 0);
return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), 0);
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, precedence);
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
}
bool LexAction::operator==(const LexAction &other) const {

View file

@ -16,18 +16,18 @@ namespace tree_sitter {
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, int precedence);
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Error();
static LexAction Advance(size_t state_index);
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
bool operator==(const LexAction &action) const;
LexActionType type;
rules::Symbol symbol;
size_t state_index;
int precedence;
std::set<int> precedence_values;
};
std::ostream& operator<<(std::ostream &stream, const LexAction &item);

View file

@ -19,7 +19,7 @@ namespace tree_sitter {
using rules::rule_ptr;
using rules::String;
using rules::Pattern;
namespace prepare_grammar {
class ExpandTokens : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
@ -30,38 +30,38 @@ namespace tree_sitter {
elements.push_back(rules::CharacterSet({ val }).copy());
return rules::Seq::Build(elements);
}
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
error = pair.second;
return pair.first;
}
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
};
pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &grammar) {
vector<pair<string, rule_ptr>> rules, aux_rules;
ExpandTokens expander;
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { PreparedGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}
return { PreparedGrammar(rules, aux_rules, grammar.options), nullptr };
}
}

View file

@ -5,7 +5,7 @@
namespace tree_sitter {
class PreparedGrammar;
namespace prepare_grammar {
std::pair<PreparedGrammar, const GrammarError *>
expand_tokens(const PreparedGrammar &);

View file

@ -57,7 +57,7 @@ namespace tree_sitter {
SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
};
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
class TokenExtractor : public rules::IdentityRuleFn {
@ -71,7 +71,7 @@ namespace tree_sitter {
return make_shared<Symbol>(index, SymbolOptionAuxToken);
}
rule_ptr default_apply(const rules::Rule *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {
@ -80,7 +80,7 @@ namespace tree_sitter {
return result;
}
}
rule_ptr apply_to(const rules::Metadata *rule) {
auto result = rule->copy();
if (IsToken().apply(rule->copy())) {

View file

@ -21,7 +21,7 @@ namespace tree_sitter {
using rules::Repeat;
using rules::CharacterRange;
using rules::blank;
namespace prepare_grammar {
class PatternParser {
public:
@ -29,7 +29,7 @@ namespace tree_sitter {
input(input),
length(input.length()),
position(0) {}
pair<rule_ptr, const GrammarError *> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
@ -47,7 +47,7 @@ namespace tree_sitter {
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
rule_ptr result = blank();
@ -63,7 +63,7 @@ namespace tree_sitter {
} while (has_more_input());
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> factor() {
auto pair = atom();
if (pair.second)
@ -87,7 +87,7 @@ namespace tree_sitter {
}
return { result, nullptr };
}
pair<rule_ptr, const GrammarError *> atom() {
switch (peek()) {
case '(': {
@ -103,7 +103,7 @@ namespace tree_sitter {
case '[': {
next();
auto pair = char_set();
if (pair.second)
if (pair.second)
return { blank(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
@ -122,13 +122,13 @@ namespace tree_sitter {
}
default: {
auto pair = single_char();
if (pair.second)
if (pair.second)
return { blank(), pair.second };
return { pair.first.copy(), nullptr };
}
}
}
pair<CharacterSet, const GrammarError *> char_set() {
bool is_affirmative = true;
if (peek() == '^') {
@ -146,7 +146,7 @@ namespace tree_sitter {
result = result.complement();
return { result, nullptr };
}
pair<CharacterSet, const GrammarError *> single_char() {
CharacterSet value;
switch (peek()) {
@ -168,7 +168,7 @@ namespace tree_sitter {
}
return { value, nullptr };
}
CharacterSet escaped_char(char value) {
switch (value) {
case 'a':
@ -181,23 +181,23 @@ namespace tree_sitter {
return CharacterSet({ value });
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
pair<rule_ptr, const GrammarError *> error(string msg) {
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
}
const string input;
const size_t length;
size_t position;

View file

@ -24,10 +24,10 @@ namespace tree_sitter {
auto expand_tokens_result = expand_tokens(grammars.second);
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
return make_tuple(rule_grammar, lex_grammar, nullptr);
}
}

View file

@ -25,11 +25,11 @@ namespace tree_sitter {
string apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
string apply_to(const rules::Seq *rule) {
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
}
string apply_to(const rules::Choice *rule) {
string result = "(choice";
for (auto &element : rule->elements)

View file

@ -10,7 +10,7 @@ namespace tree_sitter {
using std::ostream;
using rules::rule_ptr;
using rules::Symbol;
PreparedGrammar::PreparedGrammar() : Grammar({}), aux_rules({}), options({}) {}
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,