Ensure that the word token has a low numerical index

Fixes https://github.com/tree-sitter/tree-sitter/issues/258
This commit is contained in:
Max Brunsfeld 2019-01-17 12:44:14 -08:00
parent 3d11388cd1
commit 9f7079c9c5
2 changed files with 15 additions and 5 deletions

View file

@ -172,17 +172,17 @@ fn populate_used_symbols(
non_terminal_usages[symbol.index] = true;
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::external(i));
}
}
parse_table.symbols.push(Symbol::end());
for (i, value) in terminal_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::terminal(i));
}
}
for (i, value) in external_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::external(i));
}
}
for (i, value) in non_terminal_usages.into_iter().enumerate() {
if value {
parse_table.symbols.push(Symbol::non_terminal(i));

View file

@ -15,6 +15,16 @@ pub(super) fn extract_tokens(
extracted_usage_counts: Vec::new(),
};
// Extract the word token first to give it a low numerical index. This ensure that
// it can be stored in a subtree with no heap allocations, even for grammars with
// very large numbers of tokens. This is an optimization, but also important to
// ensure that a subtree's symbol can be successfully reassigned to the word token
// without having to move the subtree to the heap.
// See https://github.com/tree-sitter/tree-sitter/issues/258
if let Some(token) = grammar.word_token {
extractor.extract_tokens_in_variable(&mut grammar.variables[token.index]);
}
for mut variable in grammar.variables.iter_mut() {
extractor.extract_tokens_in_variable(&mut variable);
}