diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 3d7b6fd0..92fddefe 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -175,7 +175,17 @@ fn populate_used_symbols( parse_table.symbols.push(Symbol::end()); for (i, value) in terminal_usages.into_iter().enumerate() { if value { - parse_table.symbols.push(Symbol::terminal(i)); + // Assign the grammar's word token a low numerical index. This ensures that + // it can be stored in a subtree with no heap allocations, even for grammars with + // very large numbers of tokens. This is an optimization, but it's also important to + // ensure that a subtree's symbol can be successfully reassigned to the word token + // without having to move the subtree to the heap. + // See https://github.com/tree-sitter/tree-sitter/issues/258 + if syntax_grammar.word_token.map_or(false, |t| t.index == i) { + parse_table.symbols.insert(1, Symbol::terminal(i)); + } else { + parse_table.symbols.push(Symbol::terminal(i)); + } } } for (i, value) in external_usages.into_iter().enumerate() { diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index 72df21b2..ae07763b 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -15,16 +15,6 @@ pub(super) fn extract_tokens( extracted_usage_counts: Vec::new(), }; - // Extract the word token first to give it a low numerical index. This ensure that - // it can be stored in a subtree with no heap allocations, even for grammars with - // very large numbers of tokens. This is an optimization, but also important to - // ensure that a subtree's symbol can be successfully reassigned to the word token - // without having to move the subtree to the heap. - // See https://github.com/tree-sitter/tree-sitter/issues/258 - if let Some(token) = grammar.word_token { - extractor.extract_tokens_in_variable(&mut grammar.variables[token.index]); - } - for mut variable in grammar.variables.iter_mut() { extractor.extract_tokens_in_variable(&mut variable); }