Allow lexer to accept tokens that ended at previous positions

* Track lookahead in each tree
* Add 'mark_end' API that external scanners can use
This commit is contained in:
Max Brunsfeld 2017-03-13 17:03:47 -07:00
parent 12d2a9d93f
commit d222dbb9fd
12 changed files with 96 additions and 71 deletions

View file

@ -217,9 +217,10 @@ class CCodeGenerator {
line("START_LEXER();");
_switch("state", [&]() {
size_t i = 0;
for (const LexState &state : lex_table.states)
for (const LexState &state : lex_table.states) {
_case(to_string(i++), [&]() { add_lex_state(state); });
_default([&]() { line("LEX_ERROR();"); });
}
_default([&]() { line("return false;"); });
});
});
line("}");
@ -396,18 +397,18 @@ class CCodeGenerator {
}
void add_lex_state(const LexState &lex_state) {
if (lex_state.is_token_start)
line("START_TOKEN();");
if (lex_state.accept_action.is_present()) {
add_accept_token_action(lex_state.accept_action);
}
for (const auto &pair : lex_state.advance_actions)
if (!pair.first.is_empty())
for (const auto &pair : lex_state.advance_actions) {
if (!pair.first.is_empty()) {
_if([&]() { add_character_set_condition(pair.first); },
[&]() { add_advance_action(pair.second); });
}
}
if (lex_state.accept_action.is_present())
add_accept_token_action(lex_state.accept_action);
else
line("LEX_ERROR();");
line("END_STATE();");
}
void add_character_set_condition(const rules::CharacterSet &rule) {
@ -428,8 +429,7 @@ class CCodeGenerator {
for (const auto &range : ranges) {
if (!first) {
add(" ||");
line();
add_padding();
line(" ");
}
add("(");
@ -442,20 +442,20 @@ class CCodeGenerator {
}
void add_character_range_condition(const rules::CharacterRange &range) {
string lookahead("lookahead");
if (range.min == range.max) {
add(lookahead + " == " + escape_char(range.min));
add("lookahead == " + escape_char(range.min));
} else {
add(escape_char(range.min) + string(" <= ") + lookahead + " && " +
lookahead + " <= " + escape_char(range.max));
add(escape_char(range.min) + string(" <= lookahead && lookahead <= ") +
escape_char(range.max));
}
}
void add_advance_action(const AdvanceAction &action) {
if (action.in_main_token)
if (action.in_main_token) {
line("ADVANCE(" + to_string(action.state_index) + ");");
else
} else {
line("SKIP(" + to_string(action.state_index) + ");");
}
}
void add_accept_token_action(const AcceptTokenAction &action) {
@ -669,7 +669,7 @@ class CCodeGenerator {
void add_padding() {
for (size_t i = 0; i < indent_level; i++)
add(" ");
add(" ");
}
void indent(function<void()> body) {

View file

@ -42,12 +42,9 @@ bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
(is_string == other.is_string);
}
LexState::LexState() : is_token_start(false) {}
bool LexState::operator==(const LexState &other) const {
return advance_actions == other.advance_actions &&
accept_action == other.accept_action &&
is_token_start == other.is_token_start;
accept_action == other.accept_action;
}
} // namespace tree_sitter

View file

@ -35,12 +35,10 @@ struct AcceptTokenAction {
};
struct LexState {
LexState();
bool operator==(const LexState &) const;
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
AcceptTokenAction accept_action;
bool is_token_start;
};
struct LexTable {