diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 7a111622..fc6c6003 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,14 +6,14 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, + ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; use std::collections::hash_map::DefaultHasher; -use std::collections::VecDeque; +use std::collections::{BTreeMap, VecDeque}; use std::u32; use std::fmt::Write; @@ -36,6 +36,7 @@ struct ParseStateQueueEntry { struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, + field_names_by_hidden_symbol: HashMap>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, state_ids_by_item_set: HashMap, ParseStateId>, @@ -48,7 +49,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.child_info_sequences.push(Vec::new()); + self.parse_table.child_infos.push(ChildInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -177,7 +178,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - child_info_sequence_id: self.get_child_info_sequence_id(item), + child_info_id: self.get_child_info_id(item), } }; @@ -646,34 +647,56 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId { - let mut child_info_sequence: Vec = item - .production - .steps - .iter() - .map(|s| ChildInfo { - alias: s.alias.clone(), - field_name: s.field_name.clone(), - }) - .collect(); - while child_info_sequence.last() == Some(&ChildInfo::default()) { - child_info_sequence.pop(); + fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId { + let mut child_info = ChildInfo { + alias_sequence: Vec::new(), + field_map: BTreeMap::new(), + }; + + for (i, step) in item.production.steps.iter().enumerate() { + child_info.alias_sequence.push(step.alias.clone()); + if let Some(field_name) = &step.field_name { + child_info + .field_map + .entry(field_name.clone()) + .or_insert(Vec::new()) + .push(FieldLocation { + index: i, + inherited: false, + }); + } + if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) { + for field_name in field_names { + child_info + .field_map + .entry(field_name.clone()) + .or_insert(Vec::new()) + .push(FieldLocation { + index: i, + inherited: true, + }); + } + } } + + while child_info.alias_sequence.last() == Some(&None) { + child_info.alias_sequence.pop(); + } + if item.production.steps.len() > self.parse_table.max_production_length_with_child_info { self.parse_table.max_production_length_with_child_info = item.production.steps.len() } + if let Some(index) = self .parse_table - .child_info_sequences + .child_infos .iter() - .position(|seq| *seq == child_info_sequence) + .position(|seq| *seq == child_info) { index } else { - self.parse_table - .child_info_sequences - .push(child_info_sequence); - self.parse_table.child_info_sequences.len() - 1 + self.parse_table.child_infos.push(child_info); + self.parse_table.child_infos.len() - 1 } } @@ -720,6 +743,26 @@ fn populate_following_tokens( } } +fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap> { + let mut result = HashMap::new(); + for (i, variable) in grammar.variables.iter().enumerate() { + let mut field_names = Vec::new(); + if variable.kind == VariableType::Hidden { + for production in &variable.productions { + for step in &production.steps { + if let Some(field_name) = &step.field_name { + if let Err(i) = field_names.binary_search(field_name) { + field_names.insert(i, field_name.clone()); + } + } + } + } + } + result.insert(Symbol::non_terminal(i), field_names); + } + result +} + pub(crate) fn build_parse_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -746,9 +789,10 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - child_info_sequences: Vec::new(), + child_infos: Vec::new(), max_production_length_with_child_info: 0, }, + field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar), } .build()?; diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 81a153d3..f3862732 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - child_info_sequence_id: 0, + child_info_id: 0, symbol, .. } => { diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 089edb79..bc7a6aa0 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,7 +1,9 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; use super::nfa::CharacterSet; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; -use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry}; +use super::tables::{ + AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry, +}; use core::ops::Range; use hashbrown::{HashMap, HashSet}; use std::fmt::Write; @@ -70,12 +72,14 @@ impl Generator { self.add_symbol_names_list(); self.add_symbol_metadata_list(); - if self.parse_table.child_info_sequences.len() > 1 { - if !self.field_names.is_empty() { - self.add_field_name_enum(); - } + if !self.field_names.is_empty() { + self.add_field_name_enum(); self.add_field_name_names_list(); - self.add_child_info_sequences(); + self.add_field_sequences(); + } + + if !self.alias_ids.is_empty() { + self.add_alias_sequences(); } let mut main_lex_table = LexTable::default(); @@ -109,13 +113,13 @@ impl Generator { } let mut field_names = Vec::new(); - for child_info_sequence in &self.parse_table.child_info_sequences { - for entry in child_info_sequence { - if let Some(field_name) = &entry.field_name { - field_names.push(field_name); - } + for child_info in &self.parse_table.child_infos { + for field_name in child_info.field_map.keys() { + field_names.push(field_name); + } - if let Some(alias) = &entry.alias { + for alias in &child_info.alias_sequence { + if let Some(alias) = &alias { let alias_kind = if alias.is_named { VariableType::Named } else { @@ -350,22 +354,22 @@ impl Generator { add_line!(self, ""); } - fn add_child_info_sequences(&mut self) { + fn add_alias_sequences(&mut self) { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", - self.parse_table.child_info_sequences.len() + self.parse_table.child_infos.len() ); indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { - if sequence.iter().all(|i| i.alias.is_none()) { + for (i, child_info) in self.parse_table.child_infos.iter().enumerate() { + if child_info.alias_sequence.is_empty() { continue; } add_line!(self, "[{}] = {{", i); indent!(self); - for (j, child_info) in sequence.iter().enumerate() { - if let Some(alias) = &child_info.alias { + for (j, alias) in child_info.alias_sequence.iter().enumerate() { + if let Some(alias) = alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } } @@ -375,28 +379,66 @@ impl Generator { dedent!(self); add_line!(self, "}};"); add_line!(self, ""); + } - add_line!( - self, - "static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", - self.parse_table.child_info_sequences.len() + fn add_field_sequences(&mut self) { + let mut flat_field_maps = vec![]; + let mut next_flat_field_map_index = self.parse_table.child_infos.len(); + self.get_field_map_id( + &Vec::new(), + &mut flat_field_maps, + &mut next_flat_field_map_index, ); - indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { - if sequence.iter().all(|i| i.field_name.is_none()) { - continue; - } - add_line!(self, "[{}] = {{", i); - indent!(self); - for (j, child_info) in sequence.iter().enumerate() { - if let Some(field_name) = &child_info.field_name { - add_line!(self, "[{}] = {},", j, self.field_id(&field_name)); + let mut field_map_ids = Vec::new(); + for child_info in &self.parse_table.child_infos { + if !child_info.field_map.is_empty() { + let mut flat_field_map = Vec::new(); + for (field_name, locations) in &child_info.field_map { + for location in locations { + flat_field_map.push((field_name.clone(), *location)); + } } + field_map_ids.push(( + self.get_field_map_id( + &flat_field_map, + &mut flat_field_maps, + &mut next_flat_field_map_index, + ), + flat_field_map.len(), + )); + } else { + field_map_ids.push((0, 0)); + } + } + + add_line!(self, "static const TSFieldMapping ts_field_map[] = {{",); + indent!(self); + + add_line!(self, "/* child info id -> (field map index, count) */"); + for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { + if length > 0 { + add_line!(self, "[{}] = {{{}, {}, 0}},", child_info_id, row_id, length); + } + } + + add!(self, "\n"); + add_line!(self, "/* field id -> child index */"); + for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { + add_line!(self, "[{}] =", row_index); + indent!(self); + for (field_name, location) in field_pairs { + add_line!( + self, + "{{{}, {}, {}}},", + self.field_id(&field_name), + location.index, + location.inherited + ); } dedent!(self); - add_line!(self, "}},"); } + dedent!(self); add_line!(self, "}};"); add_line!(self, ""); @@ -762,19 +804,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - child_info_sequence_id, + child_info_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if child_info_sequence_id != 0 { - add!( - self, - ", .child_info_sequence_id = {}", - child_info_sequence_id - ); + if child_info_id != 0 { + add!(self, ", .child_info_id = {}", child_info_id); } add!(self, ")"); } @@ -839,17 +877,17 @@ impl Generator { add_line!(self, ".lex_modes = ts_lex_modes,"); add_line!(self, ".symbol_names = ts_symbol_names,"); - if self.parse_table.child_info_sequences.len() > 1 { + if !self.alias_ids.is_empty() { add_line!( self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); + } - add_line!(self, ".field_count = FIELD_COUNT,"); - add_line!( - self, - ".field_sequences = (const TSFieldId *)ts_field_sequences," - ); + add_line!(self, ".field_count = FIELD_COUNT,"); + + if !self.field_names.is_empty() { + add_line!(self, ".field_map = (const TSFieldMapping *)ts_field_map,"); add_line!(self, ".field_names = ts_field_names,"); } @@ -907,6 +945,22 @@ impl Generator { result } + fn get_field_map_id( + &self, + flat_field_map: &Vec<(String, FieldLocation)>, + flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>, + next_flat_field_map_index: &mut usize, + ) -> usize { + if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) { + return *index; + } + + let result = *next_flat_field_map_index; + flat_field_maps.push((result, flat_field_map.clone())); + *next_flat_field_map_index += flat_field_map.len(); + result + } + fn get_external_scanner_state_id(&mut self, external_tokens: HashSet) -> usize { self.external_scanner_states .iter() diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index fc1ad642..99adde69 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,8 +1,9 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; use hashbrown::HashMap; +use std::collections::BTreeMap; -pub(crate) type ChildInfoSequenceId = usize; +pub(crate) type ChildInfoId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -21,7 +22,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - child_info_sequence_id: ChildInfoSequenceId, + child_info_id: ChildInfoId, }, } @@ -39,17 +40,23 @@ pub(crate) struct ParseState { pub unfinished_item_signature: u64, } +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldLocation { + pub index: usize, + pub inherited: bool, +} + #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct ChildInfo { - pub alias: Option, - pub field_name: Option, + pub alias_sequence: Vec>, + pub field_map: BTreeMap>, } #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub child_info_sequences: Vec>, + pub child_infos: Vec, pub max_production_length_with_child_info: usize, } diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index dc4bb7a2..06a6fd8f 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -343,7 +343,7 @@ fn test_node_field_names() { let (parser_name, parser_code) = generate_parser_for_grammar( r#" { - "name": "test_grammar_with_refs", + "name": "test_grammar_with_fields", "extras": [ {"type": "PATTERN", "value": "\\s+"} ], @@ -354,30 +354,54 @@ fn test_node_field_names() { { "type": "FIELD", "name": "field_1", - "content": { - "type": "STRING", - "value": "child-1" - } + "content": {"type": "STRING", "value": "child-0"} }, { "type": "CHOICE", "members": [ + {"type": "STRING", "value": "child-1"}, + {"type": "BLANK"}, + + // This isn't used in the test, but prevents `_hidden_rule1` + // from being eliminated as a unit reduction. { - "type": "STRING", - "value": "child-2" - }, - { - "type": "BLANK" + "type": "ALIAS", + "value": "x", + "named": true, + "content": { + "type": "SYMBOL", + "name": "_hidden_rule1" + } } ] }, { "type": "FIELD", "name": "field_2", - "content": { - "type": "STRING", - "value": "child-3" - } + "content": {"type": "SYMBOL", "name": "_hidden_rule1"} + }, + {"type": "SYMBOL", "name": "_hidden_rule2"} + ] + }, + + // Fields pointing to hidden nodes with a single child resolve to the child. + "_hidden_rule1": { + "type": "CHOICE", + "members": [ + {"type": "STRING", "value": "child-2"}, + {"type": "STRING", "value": "child-2.5"} + ] + }, + + // Fields within hidden nodes can be referenced through the parent node. + "_hidden_rule2": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "child-3"}, + { + "type": "FIELD", + "name": "field_3", + "content": {"type": "STRING", "value": "child-4"} } ] } @@ -391,10 +415,30 @@ fn test_node_field_names() { let language = get_test_language(&parser_name, &parser_code, None); parser.set_language(language).unwrap(); - let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); + let tree = parser.parse("child-0 child-1 child-2 child-3 child-4", None).unwrap(); let root_node = tree.root_node(); + assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); + assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4)); + + let mut cursor = root_node.walk(); + assert_eq!(cursor.field_name(), None); + cursor.goto_first_child(); + assert_eq!(cursor.node().kind(), "child-0"); + assert_eq!(cursor.field_name(), Some("field_1")); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-1"); + assert_eq!(cursor.field_name(), None); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-2"); + assert_eq!(cursor.field_name(), Some("field_2")); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-3"); + assert_eq!(cursor.field_name(), None); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-4"); + assert_eq!(cursor.field_name(), Some("field_3")); } fn get_all_nodes(tree: &Tree) -> Vec { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index d9d63614..d81d0271 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -19,6 +19,12 @@ typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; #endif +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapping; + typedef uint16_t TSStateId; typedef struct { @@ -55,7 +61,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t child_info_sequence_id; + uint8_t child_info_id; }; } params; TSParseActionType type : 4; @@ -100,7 +106,7 @@ struct TSLanguage { void (*deserialize)(void *, const char *, unsigned); } external_scanner; uint32_t field_count; - const TSFieldId *field_sequences; + const TSFieldMapping *field_map; const char **field_names; }; diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index fad30e84..83331cce 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -108,7 +108,7 @@ static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLa .subtree = tree, .position = length_zero(), .child_index = 0, - .child_info_offset = 0, + .structural_child_index = 0, })); return (Iterator) { .cursor = *cursor, @@ -144,11 +144,15 @@ Length iterator_end_position(Iterator *self) { static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (ts_subtree_visible(*entry.subtree)) return true; - if (entry.child_info_offset) { - return self->language->alias_sequences[entry.child_info_offset] != 0; - } else { - return false; + if (self->cursor.stack.size > 1) { + Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->language, + parent.ptr->child_info_id + ); + return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } + return false; } static void iterator_get_visible_state(const Iterator *self, Subtree *tree, @@ -163,8 +167,15 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, for (; i + 1 > 0; i--) { TreeCursorEntry entry = self->cursor.stack.contents[i]; - if (entry.child_info_offset) { - *alias_symbol = self->language->alias_sequences[entry.child_info_offset]; + if (i > 0) { + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->language, + parent->ptr->child_info_id + ); + if (alias_sequence) { + *alias_symbol = alias_sequence[entry.structural_child_index]; + } } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { @@ -190,9 +201,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; - uint32_t child_info_offset = - self->language->max_child_info_production_length * - ts_subtree_child_info_sequence_id(*entry.subtree); + uint32_t structural_child_index = 0; for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { const Subtree *child = &entry.subtree->ptr->children[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); @@ -203,7 +212,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { .subtree = child, .position = position, .child_index = i, - .child_info_offset = child_info_offset, + .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { @@ -220,9 +229,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { } position = child_right; - if (!ts_subtree_extra(*child) && child_info_offset) { - child_info_offset++; - } + if (!ts_subtree_extra(*child)) structural_child_index++; } } while (did_descend); @@ -249,17 +256,15 @@ static void iterator_advance(Iterator *self) { uint32_t child_index = entry.child_index + 1; if (ts_subtree_child_count(*parent) > child_index) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t child_info_offset = entry.child_info_offset; - if (child_info_offset && !ts_subtree_extra(*entry.subtree)) { - child_info_offset++; - } + uint32_t structural_child_index = entry.structural_child_index; + if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; const Subtree *next_child = &parent->ptr->children[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, .position = position, .child_index = child_index, - .child_info_offset = child_info_offset, + .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { diff --git a/lib/src/language.h b/lib/src/language.h index 43a5eaa5..09adca62 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -81,12 +81,29 @@ ts_language_enabled_external_tokens(const TSLanguage *self, } static inline const TSSymbol * -ts_language_alias_sequence(const TSLanguage *self, unsigned id) { - return id > 0 ? - self->alias_sequences + id * self->max_child_info_production_length : +ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { + return child_info_id > 0 ? + self->alias_sequences + child_info_id * self->max_child_info_production_length : NULL; } +static inline void ts_language_field_map( + const TSLanguage *self, + uint32_t child_info_id, + const TSFieldMapping **start, + const TSFieldMapping **end +) { + // To find the field mappings for a given child info id, first index + // into the field map using the child info id directly. This 'header' + // row contains two values: + // * the index where the field mappings start + // * the number of field mappings. + const TSFieldMapping *field_map = self->field_map; + TSFieldMapping header = field_map[child_info_id]; + *start = &field_map[header.field_id]; + *end = &field_map[header.field_id] + header.child_index; +} + #ifdef __cplusplus } #endif diff --git a/lib/src/node.c b/lib/src/node.c index 8ed8355e..7ae8f115 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -8,8 +8,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t child_info_offset; - TSFieldId last_field_id; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; } NodeChildIterator; // TSNode - constructors @@ -49,29 +49,35 @@ static inline Subtree ts_node__subtree(TSNode self) { static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, 0}; + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; } - uint32_t child_info_offset = - subtree.ptr->child_info_sequence_id * - node->tree->language->max_child_info_production_length; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, + subtree.ptr->child_info_id + ); return (NodeChildIterator) { .tree = node->tree, .parent = subtree, .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, .child_index = 0, - .child_info_offset = child_info_offset, - .last_field_id = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, }; } +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { + return self->child_index == self->parent.ptr->child_count; +} + static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) { - if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; + if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; const Subtree *child = &self->parent.ptr->children[self->child_index]; TSSymbol alias_symbol = 0; - if (!ts_subtree_extra(*child) && self->child_info_offset) { - alias_symbol = self->tree->language->alias_sequences[self->child_info_offset]; - self->last_field_id = self->tree->language->field_sequences[self->child_info_offset]; - self->child_info_offset++; + if (!ts_subtree_extra(*child)) { + if (self->alias_sequence) { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; } if (self->child_index > 0) { self->position = length_add(self->position, ts_subtree_padding(*child)); @@ -452,15 +458,68 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { } TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { - if (field_id) { - TSNode child; - NodeChildIterator iterator = ts_node_iterate_children(&self); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (iterator.last_field_id == field_id) { +recur: + if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); + + const TSFieldMapping *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->child_info_id, + &field_map, + &field_map_end + ); + if (field_map == field_map_end) return ts_node__null(); + + // The field mappings are sorted by their field id. Scan all + // the mappings to find the ones for the given field id. + while (field_map->field_id < field_id) { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + while (field_map_end[-1].field_id > field_id) { + field_map_end--; + if (field_map == field_map_end) return ts_node__null(); + } + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (!ts_subtree_extra(ts_node__subtree(child))) { + uint32_t index = iterator.structural_child_index - 1; + if (index < field_map->child_index) continue; + + // Hidden nodes' fields are "inherited" by their visible parent. + if (field_map->inherited) { + + // If this is the *last* possible child node for this field, + // then perform a tail call to avoid recursion. + if (field_map + 1 == field_map_end) { + self = child; + goto recur; + } + + // Otherwise, descend into this child, but if that child doesn't + // contain the field, continue searching subsequent children. + else { + TSNode result = ts_node_child_by_field_id(child, field_id); + if (result.id) return result; + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } + + else if (ts_node__is_relevant(child, true)) { return child; } + + // If the field refers to a hidden node, return its first visible + // child. + else { + return ts_node_child(child, 0); + } } } + return ts_node__null(); } diff --git a/lib/src/parser.c b/lib/src/parser.c index 0c4453e9..5fd75cd8 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t child_info_sequence_id, bool fragile) { + uint16_t child_info_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, child_info_sequence_id, self->language + symbol, &children, child_info_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->child_info_sequence_id = child_info_sequence_id; + parent.ptr->child_info_id = child_info_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->child_info_sequence_id, + child.ptr->child_info_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .child_info_sequence_id = action.params.child_info_sequence_id, + .child_info_id = action.params.child_info_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.child_info_sequence_id, + action.dynamic_precedence, action.child_info_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.child_info_sequence_id, + action.params.dynamic_precedence, action.params.child_info_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 557e92d7..9eca0327 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short child_info_sequence_id; + unsigned short child_info_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index ec1c11ee..4c93d6d0 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned child_info_sequence_id, + SubtreeArray *children, unsigned child_info_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .child_info_sequence_id = child_info_sequence_id, + .child_info_id = child_info_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -913,20 +913,17 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, fprintf(f, "\"]\n"); uint32_t child_start_offset = start_offset; - uint32_t structural_child_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - language, - ts_subtree_child_info_sequence_id(*self) - ); + uint32_t child_info_offset = + language->max_child_info_production_length * + ts_subtree_child_info_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; - if (ts_subtree_extra(*child)) { - ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f); - } else { - TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; - ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); - structural_child_index++; + TSSymbol alias_symbol = 0; + if (!ts_subtree_extra(*child) && child_info_offset) { + alias_symbol = language->alias_sequences[child_info_offset]; + child_info_offset++; } + ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); child_start_offset += ts_subtree_total_bytes(*child); } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 6226e4f6..f32edfc2 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t child_info_sequence_id; + uint16_t child_info_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_child_info_sequence_id(Subtree self) { +static inline uint16_t ts_subtree_child_info_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->child_info_sequence_id; + return self.ptr->child_info_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index f6cb00b4..099992b9 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -9,7 +9,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t child_info_offset; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; } CursorChildIterator; // CursorChildIterator @@ -17,17 +18,19 @@ typedef struct { static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; } - uint32_t child_info_offset = - last_entry->subtree->ptr->child_info_sequence_id * - self->tree->language->max_child_info_production_length; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + last_entry->subtree->ptr->child_info_id + ); return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, .child_index = 0, - .child_info_offset = child_info_offset, + .structural_child_index = 0, + .alias_sequence = alias_sequence, }; } @@ -40,13 +43,13 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, .subtree = child, .position = self->position, .child_index = self->child_index, - .child_info_offset = self->child_info_offset, + .structural_child_index = self->structural_child_index, }; *visible = ts_subtree_visible(*child); bool extra = ts_subtree_extra(*child); - if (!extra && self->child_info_offset) { - *visible |= self->tree->language->alias_sequences[self->child_info_offset]; - self->child_info_offset++; + if (!extra && self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index++; } self->position = length_add(self->position, ts_subtree_size(*child)); @@ -82,7 +85,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { ts_node_start_point(node) }, .child_index = 0, - .child_info_offset = 0, + .structural_child_index = 0, })); } @@ -173,7 +176,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { TreeCursorEntry entry = array_pop(&self->stack); CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); iterator.child_index = entry.child_index; - iterator.child_info_offset = entry.child_info_offset; + iterator.structural_child_index = entry.structural_child_index; iterator.position = entry.position; bool visible = false; @@ -204,9 +207,12 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_aliased = false; if (i > 0) { - is_aliased = - entry->child_info_offset && - self->tree->language->alias_sequences[entry->child_info_offset]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; } if (ts_subtree_visible(*entry->subtree) || is_aliased) { self->stack.size = i + 1; @@ -220,8 +226,15 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); TSSymbol alias_symbol = 0; - if (last_entry->child_info_offset) { - alias_symbol = self->tree->language->alias_sequences[last_entry->child_info_offset]; + if (self->stack.size > 1) { + TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { + alias_symbol = alias_sequence[last_entry->structural_child_index]; + } } return ts_node_new( self->tree, @@ -233,12 +246,40 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *entry = array_back(&self->stack); - if (entry->child_info_offset) { - return self->tree->language->field_sequences[entry->child_info_offset]; - } else { - return 0; + + // Walk up the tree, visiting the current node and its invisible ancestors. + for (unsigned i = self->stack.size - 1; i > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + // Stop walking up when another visible node is found. + if (i != self->stack.size - 1) { + if (ts_subtree_visible(*entry->subtree)) break; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + if (alias_sequence && alias_sequence[entry->structural_child_index]) { + break; + } + } + + const TSFieldMapping *field_map, *field_map_end; + ts_language_field_map( + self->tree->language, + parent_entry->subtree->ptr->child_info_id, + &field_map, &field_map_end + ); + + while (field_map < field_map_end) { + if ( + !field_map->inherited && + field_map->child_index == entry->structural_child_index + ) return field_map->field_id; + field_map++; + } } + return 0; } const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index f50bdb63..55bdad86 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -7,7 +7,7 @@ typedef struct { const Subtree *subtree; Length position; uint32_t child_index; - uint32_t child_info_offset; + uint32_t structural_child_index; } TreeCursorEntry; typedef struct {