Fix handling of tokens consisting of separator characters
The parser is no longer hard-coded to skip whitespace. Tokens such as newlines, whose characters overlap with the separator characters, can now be correctly recognized.
This commit is contained in:
parent
f39cb1890d
commit
1cc7e32e2d
32 changed files with 5401 additions and 4847 deletions
|
|
@ -145,22 +145,24 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
string switch_on_lookahead_char(const LexState &parse_state) {
|
||||
string code_for_lex_state(const LexState &lex_state) {
|
||||
string result = "";
|
||||
auto expected_inputs = parse_state.expected_inputs();
|
||||
for (auto pair : parse_state.actions)
|
||||
auto expected_inputs = lex_state.expected_inputs();
|
||||
if (lex_state.is_token_start)
|
||||
result += "START_TOKEN();" "\n";
|
||||
for (auto pair : lex_state.actions)
|
||||
if (!pair.first.is_empty())
|
||||
result += _if(condition_for_character_rule(pair.first),
|
||||
code_for_lex_actions(pair.second, expected_inputs));
|
||||
result += code_for_lex_actions(parse_state.default_action, expected_inputs);
|
||||
result += code_for_lex_actions(lex_state.default_action, expected_inputs);
|
||||
return result;
|
||||
}
|
||||
|
||||
string switch_on_lex_state() {
|
||||
string body = "";
|
||||
for (size_t i = 0; i < lex_table.states.size(); i++)
|
||||
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
|
||||
body += _case("ts_lex_state_error", switch_on_lookahead_char(lex_table.error_state));
|
||||
body += _case(std::to_string(i), code_for_lex_state(lex_table.states[i]));
|
||||
body += _case("ts_lex_state_error", code_for_lex_state(lex_table.error_state));
|
||||
body += _default("LEX_PANIC();");
|
||||
return _switch("lex_state", body);
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue