From 9f7079c9c50abd43cceda31e22b6871ac4db6847 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 17 Jan 2019 12:44:14 -0800 Subject: [PATCH] Ensure that the word token has a low numerical index Fixes https://github.com/tree-sitter/tree-sitter/issues/258 --- cli/src/generate/build_tables/mod.rs | 10 +++++----- cli/src/generate/prepare_grammar/extract_tokens.rs | 10 ++++++++++ 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 7811176b..3d7b6fd0 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -172,17 +172,17 @@ fn populate_used_symbols( non_terminal_usages[symbol.index] = true; } } - for (i, value) in external_usages.into_iter().enumerate() { - if value { - parse_table.symbols.push(Symbol::external(i)); - } - } parse_table.symbols.push(Symbol::end()); for (i, value) in terminal_usages.into_iter().enumerate() { if value { parse_table.symbols.push(Symbol::terminal(i)); } } + for (i, value) in external_usages.into_iter().enumerate() { + if value { + parse_table.symbols.push(Symbol::external(i)); + } + } for (i, value) in non_terminal_usages.into_iter().enumerate() { if value { parse_table.symbols.push(Symbol::non_terminal(i)); diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index ae07763b..72df21b2 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -15,6 +15,16 @@ pub(super) fn extract_tokens( extracted_usage_counts: Vec::new(), }; + // Extract the word token first to give it a low numerical index. This ensure that + // it can be stored in a subtree with no heap allocations, even for grammars with + // very large numbers of tokens. This is an optimization, but also important to + // ensure that a subtree's symbol can be successfully reassigned to the word token + // without having to move the subtree to the heap. + // See https://github.com/tree-sitter/tree-sitter/issues/258 + if let Some(token) = grammar.word_token { + extractor.extract_tokens_in_variable(&mut grammar.variables[token.index]); + } + for mut variable in grammar.variables.iter_mut() { extractor.extract_tokens_in_variable(&mut variable); }