In lexer, prefer tokens to skipped separator characters
This was causing newlines in go and javascript to be parsed as meaningless separator characters instead of statement terminators
This commit is contained in:
parent
220e081c49
commit
e93e254518
26 changed files with 5559 additions and 6650 deletions
|
|
@ -6,6 +6,10 @@ namespace tree_sitter_examples {
|
|||
using tree_sitter::GrammarOptions;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
static rule_ptr terminated(rule_ptr rule) {
|
||||
return seq({ rule, sym("_terminator") });
|
||||
}
|
||||
|
||||
extern const Grammar golang({
|
||||
{ "program", seq({
|
||||
sym("package_directive"),
|
||||
|
|
@ -20,28 +24,26 @@ namespace tree_sitter_examples {
|
|||
in_parens(err(repeat(sym("package_import")))),
|
||||
sym("package_import") }) }) },
|
||||
{ "package_import", sym("string") },
|
||||
{ "declaration", seq({
|
||||
choice({
|
||||
sym("type_declaration"),
|
||||
sym("var_declaration"),
|
||||
sym("func_declaration") }),
|
||||
blank() }) },
|
||||
{ "declaration", choice({
|
||||
sym("type_declaration"),
|
||||
sym("var_declaration"),
|
||||
sym("func_declaration") }) },
|
||||
|
||||
// Declarations
|
||||
{ "type_declaration", seq({
|
||||
{ "type_declaration", terminated(seq({
|
||||
keyword("type"),
|
||||
sym("type_name"),
|
||||
sym("type_expression") }) },
|
||||
{ "var_declaration", seq({
|
||||
sym("type_expression") })) },
|
||||
{ "var_declaration", terminated(seq({
|
||||
keyword("var"),
|
||||
sym("var_name"),
|
||||
str("="),
|
||||
sym("expression") }) },
|
||||
{ "func_declaration", seq({
|
||||
sym("expression") })) },
|
||||
{ "func_declaration", terminated(seq({
|
||||
keyword("func"),
|
||||
sym("var_name"),
|
||||
sym("_func_signature"),
|
||||
sym("statement_block") }) },
|
||||
sym("statement_block") })) },
|
||||
{ "statement_block", in_braces(blank()) },
|
||||
{ "type_expression", choice({
|
||||
sym("pointer_type"),
|
||||
|
|
@ -108,6 +110,10 @@ namespace tree_sitter_examples {
|
|||
sym("type_name"),
|
||||
blank() }) }) },
|
||||
|
||||
{ "_terminator", token(choice({
|
||||
str("\n"),
|
||||
str(";") })) },
|
||||
|
||||
{ "string", delimited("\"") },
|
||||
{ "package_name", sym("_identifier") },
|
||||
{ "var_name", sym("_identifier") },
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -31,6 +31,7 @@ static ts_state_id ts_lex_states[STATE_COUNT]
|
|||
static ts_tree * ts_lex(ts_lexer *lexer, ts_state_id lex_state)
|
||||
|
||||
#define START_LEXER() \
|
||||
DEBUG_LEX("LEX %d", lex_state); \
|
||||
char lookahead; \
|
||||
next_state: \
|
||||
lookahead = ts_lexer_lookahead_char(lexer); \
|
||||
|
|
@ -46,7 +47,7 @@ ts_lexer_start_token(lexer);
|
|||
{ DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); return ts_lexer_build_node(lexer, symbol); }
|
||||
|
||||
#define LEX_ERROR() \
|
||||
{ return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
|
||||
{ DEBUG_LEX("ERROR"); return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
|
||||
|
||||
#define LEX_PANIC() \
|
||||
{ DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); return NULL; }
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ using namespace build_tables;
|
|||
START_TEST
|
||||
|
||||
describe("resolving parse conflicts", []() {
|
||||
bool should_update;
|
||||
bool update;
|
||||
|
||||
PreparedGrammar parse_grammar({
|
||||
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
|
||||
|
|
@ -37,31 +37,67 @@ describe("resolving parse conflicts", []() {
|
|||
});
|
||||
|
||||
it("favors non-errors over lexical errors", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2, {0}));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error());
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Advance(2, {0}), LexAction::Error());
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
describe("accept-token/advance conflicts", [&]() {
|
||||
describe("when the the accept-token has higher precedence", [&]() {
|
||||
it("prefers the accept", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -1 }));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { -1 }), LexAction::Accept(sym3, 2));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the the actions have the same precedence", [&]() {
|
||||
it("prefers the advance", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { 0 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the advance has conflicting precedences compared to the accept", [&]() {
|
||||
it("prefers the advance", [&]() {
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
update = manager->resolve_lex_action(LexAction::Advance(1, { -2, 2 }), LexAction::Accept(sym3, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
it_skip("records a conflict", [&]() {
|
||||
manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("accept-token/accept-token conflicts", [&]() {
|
||||
describe("when one token has a higher precedence than the other", [&]() {
|
||||
it("prefers the token with the higher precedence", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when both tokens have the same precedence", [&]() {
|
||||
it("prefers the token listed earlier in the grammar", [&]() {
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
@ -81,11 +117,11 @@ describe("resolving parse conflicts", []() {
|
|||
});
|
||||
|
||||
it("favors non-errors over parse errors", [&]() {
|
||||
should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
|
||||
AssertThat(should_update, IsTrue());
|
||||
update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
|
||||
AssertThat(update, IsTrue());
|
||||
|
||||
should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
|
||||
AssertThat(should_update, IsFalse());
|
||||
update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
|
||||
describe("shift/reduce conflicts", [&]() {
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ namespace tree_sitter {
|
|||
rule_ptr i_aux_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
|
||||
}
|
||||
|
||||
|
||||
rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
|
||||
return make_shared<Metadata>(rule, values);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ describe("expanding token rules", []() {
|
|||
pattern("x*"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -26,7 +26,7 @@ describe("expanding token rules", []() {
|
|||
i_sym(11) }) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("replaces string rules with a sequence of characters", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -34,9 +34,9 @@ describe("expanding token rules", []() {
|
|||
str("xyz"),
|
||||
i_sym(11) }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -45,7 +45,7 @@ describe("expanding token rules", []() {
|
|||
i_sym(11) }) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
PreparedGrammar grammar({
|
||||
{ "rule_A", seq({
|
||||
|
|
@ -53,7 +53,7 @@ describe("expanding token rules", []() {
|
|||
str("xyz"),
|
||||
pattern("[") }) },
|
||||
}, {});
|
||||
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
|
||||
|
|
|
|||
|
|
@ -82,11 +82,11 @@ describe("extracting tokens from a grammar", []() {
|
|||
pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "'ab'", str("ab") },
|
||||
})));
|
||||
|
|
@ -99,34 +99,34 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_A", i_token(0) }
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule_B", pattern("a|b") },
|
||||
{ "rule_C", token(seq({ str("a"), str("b") })) },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("updates symbols whose indices need to change due to deleted rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_sym(0) },
|
||||
{ "rule_C", i_sym(1) },
|
||||
}, {}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "rule_B", i_token(0) },
|
||||
{ "rule_C", i_sym(0) },
|
||||
}, {})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
|
||||
it("updates the grammar's ubiquitous_tokens", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule_A", str("ab") },
|
||||
|
|
@ -135,24 +135,24 @@ describe("extracting tokens from a grammar", []() {
|
|||
}, {}, PreparedGrammarOptions({
|
||||
{ Symbol(0) }
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
{ Symbol(0, SymbolOptionToken) }
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("extracts entire auxiliary rules", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
{ "rule_B", i_aux_sym(0) },
|
||||
{ "rule_C", i_aux_sym(1) },
|
||||
}));
|
||||
|
||||
|
||||
AssertThat(result.first, Equals(PreparedGrammar({}, {
|
||||
{ "rule_B", i_aux_token(0) },
|
||||
{ "rule_C", i_aux_sym(0) },
|
||||
})));
|
||||
|
||||
|
||||
AssertThat(result.second, Equals(PreparedGrammar({}, {
|
||||
{ "rule_A", str("ab") },
|
||||
})));
|
||||
|
|
|
|||
|
|
@ -13,13 +13,13 @@ describe("parsing regex patterns", []() {
|
|||
"[aAeE]",
|
||||
character({ 'a', 'A', 'e', 'E' })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"'.' characters as wildcards",
|
||||
".",
|
||||
CharacterSet({'\n'}).complement().copy()
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character classes",
|
||||
"\\w-\\d",
|
||||
|
|
@ -28,7 +28,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ '-' }),
|
||||
character({ {'0', '9'} }) })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"choices",
|
||||
"ab|cd|ef",
|
||||
|
|
@ -47,7 +47,7 @@ describe("parsing regex patterns", []() {
|
|||
})
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"simple sequences",
|
||||
"abc",
|
||||
|
|
@ -56,25 +56,25 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'b' }),
|
||||
character({ 'c' }) })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character ranges",
|
||||
"[12a-dA-D3]",
|
||||
character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"negated characters",
|
||||
"[^a\\d]",
|
||||
character({ {'a'}, {'0', '9'} }, false)
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"backslashes",
|
||||
"\\\\",
|
||||
character({ '\\' })
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"character groups in sequences",
|
||||
"x([^x]|\\\\x)*x",
|
||||
|
|
@ -87,7 +87,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'x' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"choices in sequences",
|
||||
"(a|b)cd",
|
||||
|
|
@ -100,7 +100,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'd' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"escaped parentheses",
|
||||
"a\\(b",
|
||||
|
|
@ -110,7 +110,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ 'b' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"escaped periods",
|
||||
"a\\.",
|
||||
|
|
@ -119,7 +119,7 @@ describe("parsing regex patterns", []() {
|
|||
character({ '.' })
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"plus repeats",
|
||||
"(ab)+(cd)+",
|
||||
|
|
@ -134,7 +134,7 @@ describe("parsing regex patterns", []() {
|
|||
}),
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"asterix repeats",
|
||||
"(ab)*(cd)*",
|
||||
|
|
@ -143,7 +143,7 @@ describe("parsing regex patterns", []() {
|
|||
repeat(seq({ character({ 'c' }), character({ 'd' }) })),
|
||||
})
|
||||
},
|
||||
|
||||
|
||||
{
|
||||
"optional rules",
|
||||
"a(bc)?",
|
||||
|
|
@ -156,7 +156,7 @@ describe("parsing regex patterns", []() {
|
|||
})
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
vector<tuple<string, string, const char *>> invalid_inputs = {
|
||||
{
|
||||
"mismatched open parens",
|
||||
|
|
@ -189,23 +189,23 @@ describe("parsing regex patterns", []() {
|
|||
"unmatched close square bracket",
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
for (auto &triple : valid_inputs) {
|
||||
string description = get<0>(triple);
|
||||
string regex = get<1>(triple);
|
||||
rule_ptr rule = get<2>(triple);
|
||||
|
||||
|
||||
it(("parses " + description).c_str(), [&]() {
|
||||
auto result = parse_regex(regex);
|
||||
AssertThat(result.first, EqualsPointer(rule));
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
for (auto &triple : invalid_inputs) {
|
||||
string description = get<0>(triple);
|
||||
string regex = get<1>(triple);
|
||||
const char *expected_message = get<2>(triple);
|
||||
|
||||
|
||||
it(("handles invalid regexes with " + description).c_str(), [&]() {
|
||||
auto result = parse_regex(regex);
|
||||
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
|
||||
|
|
|
|||
|
|
@ -27,3 +27,15 @@ func main() {
|
|||
(func_declaration (var_name) (statement_block
|
||||
(comment))))
|
||||
|
||||
==========================================
|
||||
handles indented code after blocks
|
||||
=========================================
|
||||
package trivial
|
||||
|
||||
func one() {}
|
||||
func two() {}
|
||||
---
|
||||
(program
|
||||
(package_directive (package_name))
|
||||
(func_declaration (var_name) (statement_block))
|
||||
(func_declaration (var_name) (statement_block)))
|
||||
|
|
@ -80,6 +80,16 @@ try {
|
|||
(statement_block (expression_statement (function_call (identifier) (identifier))))))
|
||||
|
||||
===========================================
|
||||
parses indented code after blocks
|
||||
===========================================
|
||||
function x() {}
|
||||
return z;
|
||||
---
|
||||
(program
|
||||
(expression_statement
|
||||
(function_expression (identifier) (formal_parameters) (statement_block)))
|
||||
(return_statement (identifier)))
|
||||
===========================================
|
||||
parses switch statements
|
||||
===========================================
|
||||
switch(x) {
|
||||
|
|
|
|||
|
|
@ -70,7 +70,9 @@ namespace tree_sitter {
|
|||
CharacterSet rule = transition.first;
|
||||
LexItemSet new_item_set = transition.second;
|
||||
LexStateId new_state_id = add_lex_state(new_item_set);
|
||||
lex_table.state(state_id).actions[rule] = LexAction::Advance(new_state_id);
|
||||
auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
|
||||
if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
|
||||
lex_table.state(state_id).actions[rule] = action;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -93,14 +95,24 @@ namespace tree_sitter {
|
|||
|
||||
rules::rule_ptr after_separators(rules::rule_ptr rule) {
|
||||
return rules::Seq::Build({
|
||||
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Metadata>(make_shared<rules::Blank>(), map<rules::MetadataKey, int>({
|
||||
make_shared<rules::Metadata>(rules::Seq::Build({
|
||||
make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
|
||||
make_shared<rules::Blank>(),
|
||||
}), map<rules::MetadataKey, int>({
|
||||
{rules::START_TOKEN, 1},
|
||||
{rules::PRECEDENCE, -1},
|
||||
})),
|
||||
rule
|
||||
rule,
|
||||
});
|
||||
}
|
||||
|
||||
set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
result.insert(item.precedence());
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
|
||||
lex_grammar(lex_grammar),
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const Symbol &symbol : grammar.options.ubiquitous_tokens) {
|
||||
auto &actions = parse_table.states[state_id].actions;
|
||||
|
|
|
|||
|
|
@ -1,5 +1,6 @@
|
|||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -11,6 +12,13 @@ namespace tree_sitter {
|
|||
return rule->value_for(metadata_key);
|
||||
}
|
||||
|
||||
// TODO -
|
||||
// Remove this. It is currently needed to make the rule generated
|
||||
// by `LexTableBuilder::after_separators` have the right precedence.
|
||||
int apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left);
|
||||
}
|
||||
|
||||
public:
|
||||
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
|
||||
};
|
||||
|
|
|
|||
|
|
@ -18,17 +18,45 @@ namespace tree_sitter {
|
|||
|
||||
bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
|
||||
const LexAction &new_action) {
|
||||
if (new_action.type < old_action.type)
|
||||
return !resolve_lex_action(new_action, old_action);
|
||||
|
||||
switch (old_action.type) {
|
||||
case LexActionTypeError:
|
||||
return true;
|
||||
case LexActionTypeAccept:
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
return true;
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
case LexActionTypeAccept: {
|
||||
int old_precedence = *old_action.precedence_values.begin();
|
||||
switch (new_action.type) {
|
||||
case LexActionTypeAccept: {
|
||||
int new_precedence = *new_action.precedence_values.begin();
|
||||
if (new_precedence > old_precedence) {
|
||||
return true;
|
||||
} else if (new_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
}
|
||||
case LexActionTypeAdvance: {
|
||||
// int min_precedence = *new_action.precedence_values.begin();
|
||||
int max_precedence = *new_action.precedence_values.rbegin();
|
||||
if (max_precedence > old_precedence) {
|
||||
// if (min_precedence < old_precedence)
|
||||
return true;
|
||||
} else if (max_precedence < old_precedence) {
|
||||
return false;
|
||||
} else {
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -88,7 +88,7 @@ namespace tree_sitter {
|
|||
const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
|
||||
return symbol.is_token() ? lexical_grammar : syntax_grammar;
|
||||
}
|
||||
|
||||
|
||||
string sanitize_name(string name) {
|
||||
auto existing = sanitized_names.find(name);
|
||||
if (existing != sanitized_names.end())
|
||||
|
|
@ -115,7 +115,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
bool has_sanitized_name(string name) {
|
||||
for (auto &pair : sanitized_names)
|
||||
if (pair.second == name)
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ namespace tree_sitter {
|
|||
GrammarError::GrammarError(GrammarErrorType type, std::string message) :
|
||||
type(type),
|
||||
message(message) {}
|
||||
|
||||
|
||||
bool GrammarError::operator==(const GrammarError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,24 +12,25 @@ namespace tree_sitter {
|
|||
LexAction::LexAction() :
|
||||
type(LexActionTypeError),
|
||||
symbol(Symbol(-1)),
|
||||
state_index(-1) {}
|
||||
state_index(-1),
|
||||
precedence_values({0}) {}
|
||||
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, int precedence) :
|
||||
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
|
||||
type(type),
|
||||
symbol(symbol),
|
||||
state_index(state_index),
|
||||
precedence(precedence) {}
|
||||
precedence_values(precedence_values) {}
|
||||
|
||||
LexAction LexAction::Error() {
|
||||
return LexAction(LexActionTypeError, -1, Symbol(-1), 0);
|
||||
return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
|
||||
}
|
||||
|
||||
LexAction LexAction::Advance(size_t state_index) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), 0);
|
||||
LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
|
||||
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
|
||||
}
|
||||
|
||||
LexAction LexAction::Accept(Symbol symbol, int precedence) {
|
||||
return LexAction(LexActionTypeAccept, -1, symbol, precedence);
|
||||
return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
|
||||
}
|
||||
|
||||
bool LexAction::operator==(const LexAction &other) const {
|
||||
|
|
|
|||
|
|
@ -16,18 +16,18 @@ namespace tree_sitter {
|
|||
} LexActionType;
|
||||
|
||||
class LexAction {
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, int precedence);
|
||||
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
|
||||
public:
|
||||
LexAction();
|
||||
static LexAction Accept(rules::Symbol symbol, int precedence);
|
||||
static LexAction Error();
|
||||
static LexAction Advance(size_t state_index);
|
||||
static LexAction Advance(size_t state_index, std::set<int> precedence_values);
|
||||
bool operator==(const LexAction &action) const;
|
||||
|
||||
LexActionType type;
|
||||
rules::Symbol symbol;
|
||||
size_t state_index;
|
||||
int precedence;
|
||||
std::set<int> precedence_values;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
using rules::rule_ptr;
|
||||
using rules::String;
|
||||
using rules::Pattern;
|
||||
|
||||
|
||||
namespace prepare_grammar {
|
||||
class ExpandTokens : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
|
@ -30,38 +30,38 @@ namespace tree_sitter {
|
|||
elements.push_back(rules::CharacterSet({ val }).copy());
|
||||
return rules::Seq::Build(elements);
|
||||
}
|
||||
|
||||
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
|
||||
|
||||
public:
|
||||
const GrammarError *error;
|
||||
ExpandTokens() : error(nullptr) {}
|
||||
};
|
||||
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, aux_rules;
|
||||
ExpandTokens expander;
|
||||
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
|
||||
for (auto &pair : grammar.aux_rules) {
|
||||
auto rule = expander.apply(pair.second);
|
||||
if (expander.error)
|
||||
return { PreparedGrammar(), expander.error };
|
||||
aux_rules.push_back({ pair.first, rule });
|
||||
}
|
||||
|
||||
|
||||
return { PreparedGrammar(rules, aux_rules, grammar.options), nullptr };
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
class PreparedGrammar;
|
||||
|
||||
|
||||
namespace prepare_grammar {
|
||||
std::pair<PreparedGrammar, const GrammarError *>
|
||||
expand_tokens(const PreparedGrammar &);
|
||||
|
|
|
|||
|
|
@ -57,7 +57,7 @@ namespace tree_sitter {
|
|||
|
||||
SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
|
||||
};
|
||||
|
||||
|
||||
const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);
|
||||
|
||||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
|
|
@ -71,7 +71,7 @@ namespace tree_sitter {
|
|||
return make_shared<Symbol>(index, SymbolOptionAuxToken);
|
||||
|
||||
}
|
||||
|
||||
|
||||
rule_ptr default_apply(const rules::Rule *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
|
|
@ -80,7 +80,7 @@ namespace tree_sitter {
|
|||
return result;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
auto result = rule->copy();
|
||||
if (IsToken().apply(rule->copy())) {
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ namespace tree_sitter {
|
|||
using rules::Repeat;
|
||||
using rules::CharacterRange;
|
||||
using rules::blank;
|
||||
|
||||
|
||||
namespace prepare_grammar {
|
||||
class PatternParser {
|
||||
public:
|
||||
|
|
@ -29,7 +29,7 @@ namespace tree_sitter {
|
|||
input(input),
|
||||
length(input.length()),
|
||||
position(0) {}
|
||||
|
||||
|
||||
pair<rule_ptr, const GrammarError *> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
do {
|
||||
|
|
@ -47,7 +47,7 @@ namespace tree_sitter {
|
|||
auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, nullptr };
|
||||
}
|
||||
|
||||
|
||||
private:
|
||||
pair<rule_ptr, const GrammarError *> term(bool nested) {
|
||||
rule_ptr result = blank();
|
||||
|
|
@ -63,7 +63,7 @@ namespace tree_sitter {
|
|||
} while (has_more_input());
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
|
||||
pair<rule_ptr, const GrammarError *> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second)
|
||||
|
|
@ -87,7 +87,7 @@ namespace tree_sitter {
|
|||
}
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
|
||||
pair<rule_ptr, const GrammarError *> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
|
|
@ -103,7 +103,7 @@ namespace tree_sitter {
|
|||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second)
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
if (peek() != ']')
|
||||
return error("unmatched open square bracket");
|
||||
|
|
@ -122,13 +122,13 @@ namespace tree_sitter {
|
|||
}
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
if (pair.second)
|
||||
return { blank(), pair.second };
|
||||
return { pair.first.copy(), nullptr };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
pair<CharacterSet, const GrammarError *> char_set() {
|
||||
bool is_affirmative = true;
|
||||
if (peek() == '^') {
|
||||
|
|
@ -146,7 +146,7 @@ namespace tree_sitter {
|
|||
result = result.complement();
|
||||
return { result, nullptr };
|
||||
}
|
||||
|
||||
|
||||
pair<CharacterSet, const GrammarError *> single_char() {
|
||||
CharacterSet value;
|
||||
switch (peek()) {
|
||||
|
|
@ -168,7 +168,7 @@ namespace tree_sitter {
|
|||
}
|
||||
return { value, nullptr };
|
||||
}
|
||||
|
||||
|
||||
CharacterSet escaped_char(char value) {
|
||||
switch (value) {
|
||||
case 'a':
|
||||
|
|
@ -181,23 +181,23 @@ namespace tree_sitter {
|
|||
return CharacterSet({ value });
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void next() {
|
||||
position++;
|
||||
}
|
||||
|
||||
|
||||
char peek() {
|
||||
return input[position];
|
||||
}
|
||||
|
||||
|
||||
bool has_more_input() {
|
||||
return position < length;
|
||||
}
|
||||
|
||||
|
||||
pair<rule_ptr, const GrammarError *> error(string msg) {
|
||||
return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
|
||||
}
|
||||
|
||||
|
||||
const string input;
|
||||
const size_t length;
|
||||
size_t position;
|
||||
|
|
|
|||
|
|
@ -24,10 +24,10 @@ namespace tree_sitter {
|
|||
auto expand_tokens_result = expand_tokens(grammars.second);
|
||||
const PreparedGrammar &lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
|
||||
|
||||
if (error)
|
||||
return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
|
||||
|
||||
|
||||
return make_tuple(rule_grammar, lex_grammar, nullptr);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,11 +25,11 @@ namespace tree_sitter {
|
|||
string apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
|
||||
string apply_to(const rules::Seq *rule) {
|
||||
return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
|
||||
}
|
||||
|
||||
|
||||
string apply_to(const rules::Choice *rule) {
|
||||
string result = "(choice";
|
||||
for (auto &element : rule->elements)
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ namespace tree_sitter {
|
|||
using std::ostream;
|
||||
using rules::rule_ptr;
|
||||
using rules::Symbol;
|
||||
|
||||
|
||||
PreparedGrammar::PreparedGrammar() : Grammar({}), aux_rules({}), options({}) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue