Fix handling of tokens consisting of separator characters

The parser is no longer hard-coded to skip whitespace. Tokens
such as newlines, whose characters overlap with the separator
characters, can now be correctly recognized.
This commit is contained in:
Max Brunsfeld 2014-04-03 19:10:09 -07:00
parent f39cb1890d
commit 1cc7e32e2d
32 changed files with 5401 additions and 4847 deletions

View file

@ -145,22 +145,24 @@ namespace tree_sitter {
}
}
string switch_on_lookahead_char(const LexState &parse_state) {
string code_for_lex_state(const LexState &lex_state) {
string result = "";
auto expected_inputs = parse_state.expected_inputs();
for (auto pair : parse_state.actions)
auto expected_inputs = lex_state.expected_inputs();
if (lex_state.is_token_start)
result += "START_TOKEN();" "\n";
for (auto pair : lex_state.actions)
if (!pair.first.is_empty())
result += _if(condition_for_character_rule(pair.first),
code_for_lex_actions(pair.second, expected_inputs));
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
result += code_for_lex_actions(lex_state.default_action, expected_inputs);
return result;
}
string switch_on_lex_state() {
string body = "";
for (size_t i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
body += _case(std::to_string(i), code_for_lex_state(lex_table.states[i]));
body += _case("ts_lex_state_error", code_for_lex_state(lex_table.error_state));
body += _default("LEX_PANIC();");
return _switch("lex_state", body);
}