Fully implement ts_node_child_by_field_id

This commit is contained in:
Max Brunsfeld 2019-02-07 17:18:33 -08:00
parent bef80c162e
commit 1d1674811c
15 changed files with 455 additions and 181 deletions

View file

@ -6,14 +6,14 @@ use crate::generate::grammars::{
};
use crate::generate::rules::{Associativity, Symbol, SymbolType};
use crate::generate::tables::{
ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable,
ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable,
ParseTableEntry,
};
use core::ops::Range;
use hashbrown::hash_map::Entry;
use hashbrown::{HashMap, HashSet};
use std::collections::hash_map::DefaultHasher;
use std::collections::VecDeque;
use std::collections::{BTreeMap, VecDeque};
use std::u32;
use std::fmt::Write;
@ -36,6 +36,7 @@ struct ParseStateQueueEntry {
struct ParseTableBuilder<'a> {
item_set_builder: ParseItemSetBuilder<'a>,
field_names_by_hidden_symbol: HashMap<Symbol, Vec<String>>,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
@ -48,7 +49,7 @@ struct ParseTableBuilder<'a> {
impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<ParseTable> {
// Ensure that the empty alias sequence has index 0.
self.parse_table.child_info_sequences.push(Vec::new());
self.parse_table.child_infos.push(ChildInfo::default());
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
@ -177,7 +178,7 @@ impl<'a> ParseTableBuilder<'a> {
precedence: item.precedence(),
associativity: item.associativity(),
dynamic_precedence: item.production.dynamic_precedence,
child_info_sequence_id: self.get_child_info_sequence_id(item),
child_info_id: self.get_child_info_id(item),
}
};
@ -646,34 +647,56 @@ impl<'a> ParseTableBuilder<'a> {
}
}
fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId {
let mut child_info_sequence: Vec<ChildInfo> = item
.production
.steps
.iter()
.map(|s| ChildInfo {
alias: s.alias.clone(),
field_name: s.field_name.clone(),
})
.collect();
while child_info_sequence.last() == Some(&ChildInfo::default()) {
child_info_sequence.pop();
fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId {
let mut child_info = ChildInfo {
alias_sequence: Vec::new(),
field_map: BTreeMap::new(),
};
for (i, step) in item.production.steps.iter().enumerate() {
child_info.alias_sequence.push(step.alias.clone());
if let Some(field_name) = &step.field_name {
child_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.push(FieldLocation {
index: i,
inherited: false,
});
}
if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) {
for field_name in field_names {
child_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.push(FieldLocation {
index: i,
inherited: true,
});
}
}
}
while child_info.alias_sequence.last() == Some(&None) {
child_info.alias_sequence.pop();
}
if item.production.steps.len() > self.parse_table.max_production_length_with_child_info {
self.parse_table.max_production_length_with_child_info = item.production.steps.len()
}
if let Some(index) = self
.parse_table
.child_info_sequences
.child_infos
.iter()
.position(|seq| *seq == child_info_sequence)
.position(|seq| *seq == child_info)
{
index
} else {
self.parse_table
.child_info_sequences
.push(child_info_sequence);
self.parse_table.child_info_sequences.len() - 1
self.parse_table.child_infos.push(child_info);
self.parse_table.child_infos.len() - 1
}
}
@ -720,6 +743,26 @@ fn populate_following_tokens(
}
}
fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap<Symbol, Vec<String>> {
let mut result = HashMap::new();
for (i, variable) in grammar.variables.iter().enumerate() {
let mut field_names = Vec::new();
if variable.kind == VariableType::Hidden {
for production in &variable.productions {
for step in &production.steps {
if let Some(field_name) = &step.field_name {
if let Err(i) = field_names.binary_search(field_name) {
field_names.insert(i, field_name.clone());
}
}
}
}
}
result.insert(Symbol::non_terminal(i), field_names);
}
result
}
pub(crate) fn build_parse_table(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -746,9 +789,10 @@ pub(crate) fn build_parse_table(
parse_table: ParseTable {
states: Vec::new(),
symbols: Vec::new(),
child_info_sequences: Vec::new(),
child_infos: Vec::new(),
max_production_length_with_child_info: 0,
},
field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar),
}
.build()?;

View file

@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> {
ParseAction::ShiftExtra => continue,
ParseAction::Reduce {
child_count: 1,
child_info_sequence_id: 0,
child_info_id: 0,
symbol,
..
} => {

View file

@ -1,7 +1,9 @@
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use super::nfa::CharacterSet;
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use super::tables::{
AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
};
use core::ops::Range;
use hashbrown::{HashMap, HashSet};
use std::fmt::Write;
@ -70,12 +72,14 @@ impl Generator {
self.add_symbol_names_list();
self.add_symbol_metadata_list();
if self.parse_table.child_info_sequences.len() > 1 {
if !self.field_names.is_empty() {
self.add_field_name_enum();
}
if !self.field_names.is_empty() {
self.add_field_name_enum();
self.add_field_name_names_list();
self.add_child_info_sequences();
self.add_field_sequences();
}
if !self.alias_ids.is_empty() {
self.add_alias_sequences();
}
let mut main_lex_table = LexTable::default();
@ -109,13 +113,13 @@ impl Generator {
}
let mut field_names = Vec::new();
for child_info_sequence in &self.parse_table.child_info_sequences {
for entry in child_info_sequence {
if let Some(field_name) = &entry.field_name {
field_names.push(field_name);
}
for child_info in &self.parse_table.child_infos {
for field_name in child_info.field_map.keys() {
field_names.push(field_name);
}
if let Some(alias) = &entry.alias {
for alias in &child_info.alias_sequence {
if let Some(alias) = &alias {
let alias_kind = if alias.is_named {
VariableType::Named
} else {
@ -350,22 +354,22 @@ impl Generator {
add_line!(self, "");
}
fn add_child_info_sequences(&mut self) {
fn add_alias_sequences(&mut self) {
add_line!(
self,
"static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
self.parse_table.child_info_sequences.len()
self.parse_table.child_infos.len()
);
indent!(self);
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
if sequence.iter().all(|i| i.alias.is_none()) {
for (i, child_info) in self.parse_table.child_infos.iter().enumerate() {
if child_info.alias_sequence.is_empty() {
continue;
}
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, child_info) in sequence.iter().enumerate() {
if let Some(alias) = &child_info.alias {
for (j, alias) in child_info.alias_sequence.iter().enumerate() {
if let Some(alias) = alias {
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
}
}
@ -375,28 +379,66 @@ impl Generator {
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
add_line!(
self,
"static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
self.parse_table.child_info_sequences.len()
fn add_field_sequences(&mut self) {
let mut flat_field_maps = vec![];
let mut next_flat_field_map_index = self.parse_table.child_infos.len();
self.get_field_map_id(
&Vec::new(),
&mut flat_field_maps,
&mut next_flat_field_map_index,
);
indent!(self);
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
if sequence.iter().all(|i| i.field_name.is_none()) {
continue;
}
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, child_info) in sequence.iter().enumerate() {
if let Some(field_name) = &child_info.field_name {
add_line!(self, "[{}] = {},", j, self.field_id(&field_name));
let mut field_map_ids = Vec::new();
for child_info in &self.parse_table.child_infos {
if !child_info.field_map.is_empty() {
let mut flat_field_map = Vec::new();
for (field_name, locations) in &child_info.field_map {
for location in locations {
flat_field_map.push((field_name.clone(), *location));
}
}
field_map_ids.push((
self.get_field_map_id(
&flat_field_map,
&mut flat_field_maps,
&mut next_flat_field_map_index,
),
flat_field_map.len(),
));
} else {
field_map_ids.push((0, 0));
}
}
add_line!(self, "static const TSFieldMapping ts_field_map[] = {{",);
indent!(self);
add_line!(self, "/* child info id -> (field map index, count) */");
for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
if length > 0 {
add_line!(self, "[{}] = {{{}, {}, 0}},", child_info_id, row_id, length);
}
}
add!(self, "\n");
add_line!(self, "/* field id -> child index */");
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
add_line!(self, "[{}] =", row_index);
indent!(self);
for (field_name, location) in field_pairs {
add_line!(
self,
"{{{}, {}, {}}},",
self.field_id(&field_name),
location.index,
location.inherited
);
}
dedent!(self);
add_line!(self, "}},");
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
@ -762,19 +804,15 @@ impl Generator {
symbol,
child_count,
dynamic_precedence,
child_info_sequence_id,
child_info_id,
..
} => {
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
}
if child_info_sequence_id != 0 {
add!(
self,
", .child_info_sequence_id = {}",
child_info_sequence_id
);
if child_info_id != 0 {
add!(self, ", .child_info_id = {}", child_info_id);
}
add!(self, ")");
}
@ -839,17 +877,17 @@ impl Generator {
add_line!(self, ".lex_modes = ts_lex_modes,");
add_line!(self, ".symbol_names = ts_symbol_names,");
if self.parse_table.child_info_sequences.len() > 1 {
if !self.alias_ids.is_empty() {
add_line!(
self,
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
);
}
add_line!(self, ".field_count = FIELD_COUNT,");
add_line!(
self,
".field_sequences = (const TSFieldId *)ts_field_sequences,"
);
add_line!(self, ".field_count = FIELD_COUNT,");
if !self.field_names.is_empty() {
add_line!(self, ".field_map = (const TSFieldMapping *)ts_field_map,");
add_line!(self, ".field_names = ts_field_names,");
}
@ -907,6 +945,22 @@ impl Generator {
result
}
fn get_field_map_id(
&self,
flat_field_map: &Vec<(String, FieldLocation)>,
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
next_flat_field_map_index: &mut usize,
) -> usize {
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
return *index;
}
let result = *next_flat_field_map_index;
flat_field_maps.push((result, flat_field_map.clone()));
*next_flat_field_map_index += flat_field_map.len();
result
}
fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
self.external_scanner_states
.iter()

View file

@ -1,8 +1,9 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Associativity, Symbol};
use hashbrown::HashMap;
use std::collections::BTreeMap;
pub(crate) type ChildInfoSequenceId = usize;
pub(crate) type ChildInfoId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
@ -21,7 +22,7 @@ pub(crate) enum ParseAction {
precedence: i32,
dynamic_precedence: i32,
associativity: Option<Associativity>,
child_info_sequence_id: ChildInfoSequenceId,
child_info_id: ChildInfoId,
},
}
@ -39,17 +40,23 @@ pub(crate) struct ParseState {
pub unfinished_item_signature: u64,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldLocation {
pub index: usize,
pub inherited: bool,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct ChildInfo {
pub alias: Option<Alias>,
pub field_name: Option<String>,
pub alias_sequence: Vec<Option<Alias>>,
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ParseTable {
pub states: Vec<ParseState>,
pub symbols: Vec<Symbol>,
pub child_info_sequences: Vec<Vec<ChildInfo>>,
pub child_infos: Vec<ChildInfo>,
pub max_production_length_with_child_info: usize,
}

View file

@ -343,7 +343,7 @@ fn test_node_field_names() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_refs",
"name": "test_grammar_with_fields",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
@ -354,30 +354,54 @@ fn test_node_field_names() {
{
"type": "FIELD",
"name": "field_1",
"content": {
"type": "STRING",
"value": "child-1"
}
"content": {"type": "STRING", "value": "child-0"}
},
{
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "child-1"},
{"type": "BLANK"},
// This isn't used in the test, but prevents `_hidden_rule1`
// from being eliminated as a unit reduction.
{
"type": "STRING",
"value": "child-2"
},
{
"type": "BLANK"
"type": "ALIAS",
"value": "x",
"named": true,
"content": {
"type": "SYMBOL",
"name": "_hidden_rule1"
}
}
]
},
{
"type": "FIELD",
"name": "field_2",
"content": {
"type": "STRING",
"value": "child-3"
}
"content": {"type": "SYMBOL", "name": "_hidden_rule1"}
},
{"type": "SYMBOL", "name": "_hidden_rule2"}
]
},
// Fields pointing to hidden nodes with a single child resolve to the child.
"_hidden_rule1": {
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "child-2"},
{"type": "STRING", "value": "child-2.5"}
]
},
// Fields within hidden nodes can be referenced through the parent node.
"_hidden_rule2": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "child-3"},
{
"type": "FIELD",
"name": "field_3",
"content": {"type": "STRING", "value": "child-4"}
}
]
}
@ -391,10 +415,30 @@ fn test_node_field_names() {
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
let tree = parser.parse("child-0 child-1 child-2 child-3 child-4", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4));
let mut cursor = root_node.walk();
assert_eq!(cursor.field_name(), None);
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "child-0");
assert_eq!(cursor.field_name(), Some("field_1"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-1");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-2");
assert_eq!(cursor.field_name(), Some("field_2"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-3");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-4");
assert_eq!(cursor.field_name(), Some("field_3"));
}
fn get_all_nodes(tree: &Tree) -> Vec<Node> {

View file

@ -19,6 +19,12 @@ typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapping;
typedef uint16_t TSStateId;
typedef struct {
@ -55,7 +61,7 @@ typedef struct {
TSSymbol symbol;
int16_t dynamic_precedence;
uint8_t child_count;
uint8_t child_info_sequence_id;
uint8_t child_info_id;
};
} params;
TSParseActionType type : 4;
@ -100,7 +106,7 @@ struct TSLanguage {
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
uint32_t field_count;
const TSFieldId *field_sequences;
const TSFieldMapping *field_map;
const char **field_names;
};

View file

@ -108,7 +108,7 @@ static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLa
.subtree = tree,
.position = length_zero(),
.child_index = 0,
.child_info_offset = 0,
.structural_child_index = 0,
}));
return (Iterator) {
.cursor = *cursor,
@ -144,11 +144,15 @@ Length iterator_end_position(Iterator *self) {
static bool iterator_tree_is_visible(const Iterator *self) {
TreeCursorEntry entry = *array_back(&self->cursor.stack);
if (ts_subtree_visible(*entry.subtree)) return true;
if (entry.child_info_offset) {
return self->language->alias_sequences[entry.child_info_offset] != 0;
} else {
return false;
if (self->cursor.stack.size > 1) {
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent.ptr->child_info_id
);
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
}
return false;
}
static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
@ -163,8 +167,15 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
for (; i + 1 > 0; i--) {
TreeCursorEntry entry = self->cursor.stack.contents[i];
if (entry.child_info_offset) {
*alias_symbol = self->language->alias_sequences[entry.child_info_offset];
if (i > 0) {
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent->ptr->child_info_id
);
if (alias_sequence) {
*alias_symbol = alias_sequence[entry.structural_child_index];
}
}
if (ts_subtree_visible(*entry.subtree) || *alias_symbol) {
@ -190,9 +201,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
did_descend = false;
TreeCursorEntry entry = *array_back(&self->cursor.stack);
Length position = entry.position;
uint32_t child_info_offset =
self->language->max_child_info_production_length *
ts_subtree_child_info_sequence_id(*entry.subtree);
uint32_t structural_child_index = 0;
for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) {
const Subtree *child = &entry.subtree->ptr->children[i];
Length child_left = length_add(position, ts_subtree_padding(*child));
@ -203,7 +212,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
.subtree = child,
.position = position,
.child_index = i,
.child_info_offset = child_info_offset,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {
@ -220,9 +229,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) {
}
position = child_right;
if (!ts_subtree_extra(*child) && child_info_offset) {
child_info_offset++;
}
if (!ts_subtree_extra(*child)) structural_child_index++;
}
} while (did_descend);
@ -249,17 +256,15 @@ static void iterator_advance(Iterator *self) {
uint32_t child_index = entry.child_index + 1;
if (ts_subtree_child_count(*parent) > child_index) {
Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree));
uint32_t child_info_offset = entry.child_info_offset;
if (child_info_offset && !ts_subtree_extra(*entry.subtree)) {
child_info_offset++;
}
uint32_t structural_child_index = entry.structural_child_index;
if (!ts_subtree_extra(*entry.subtree)) structural_child_index++;
const Subtree *next_child = &parent->ptr->children[child_index];
array_push(&self->cursor.stack, ((TreeCursorEntry){
.subtree = next_child,
.position = position,
.child_index = child_index,
.child_info_offset = child_info_offset,
.structural_child_index = structural_child_index,
}));
if (iterator_tree_is_visible(self)) {

View file

@ -81,12 +81,29 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
}
static inline const TSSymbol *
ts_language_alias_sequence(const TSLanguage *self, unsigned id) {
return id > 0 ?
self->alias_sequences + id * self->max_child_info_production_length :
ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) {
return child_info_id > 0 ?
self->alias_sequences + child_info_id * self->max_child_info_production_length :
NULL;
}
static inline void ts_language_field_map(
const TSLanguage *self,
uint32_t child_info_id,
const TSFieldMapping **start,
const TSFieldMapping **end
) {
// To find the field mappings for a given child info id, first index
// into the field map using the child info id directly. This 'header'
// row contains two values:
// * the index where the field mappings start
// * the number of field mappings.
const TSFieldMapping *field_map = self->field_map;
TSFieldMapping header = field_map[child_info_id];
*start = &field_map[header.field_id];
*end = &field_map[header.field_id] + header.child_index;
}
#ifdef __cplusplus
}
#endif

View file

@ -8,8 +8,8 @@ typedef struct {
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t child_info_offset;
TSFieldId last_field_id;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} NodeChildIterator;
// TSNode - constructors
@ -49,29 +49,35 @@ static inline Subtree ts_node__subtree(TSNode self) {
static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
Subtree subtree = ts_node__subtree(*node);
if (ts_subtree_child_count(subtree) == 0) {
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, 0};
return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL};
}
uint32_t child_info_offset =
subtree.ptr->child_info_sequence_id *
node->tree->language->max_child_info_production_length;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->tree->language,
subtree.ptr->child_info_id
);
return (NodeChildIterator) {
.tree = node->tree,
.parent = subtree,
.position = {ts_node_start_byte(*node), ts_node_start_point(*node)},
.child_index = 0,
.child_info_offset = child_info_offset,
.last_field_id = 0,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
return self->child_index == self->parent.ptr->child_count;
}
static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child) && self->child_info_offset) {
alias_symbol = self->tree->language->alias_sequences[self->child_info_offset];
self->last_field_id = self->tree->language->field_sequences[self->child_info_offset];
self->child_info_offset++;
if (!ts_subtree_extra(*child)) {
if (self->alias_sequence) {
alias_symbol = self->alias_sequence[self->structural_child_index];
}
self->structural_child_index++;
}
if (self->child_index > 0) {
self->position = length_add(self->position, ts_subtree_padding(*child));
@ -452,15 +458,68 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
}
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
if (field_id) {
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (iterator.last_field_id == field_id) {
recur:
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
const TSFieldMapping *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->child_info_id,
&field_map,
&field_map_end
);
if (field_map == field_map_end) return ts_node__null();
// The field mappings are sorted by their field id. Scan all
// the mappings to find the ones for the given field id.
while (field_map->field_id < field_id) {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
while (field_map_end[-1].field_id > field_id) {
field_map_end--;
if (field_map == field_map_end) return ts_node__null();
}
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (!ts_subtree_extra(ts_node__subtree(child))) {
uint32_t index = iterator.structural_child_index - 1;
if (index < field_map->child_index) continue;
// Hidden nodes' fields are "inherited" by their visible parent.
if (field_map->inherited) {
// If this is the *last* possible child node for this field,
// then perform a tail call to avoid recursion.
if (field_map + 1 == field_map_end) {
self = child;
goto recur;
}
// Otherwise, descend into this child, but if that child doesn't
// contain the field, continue searching subsequent children.
else {
TSNode result = ts_node_child_by_field_id(child, field_id);
if (result.id) return result;
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
else if (ts_node__is_relevant(child, true)) {
return child;
}
// If the field refers to a hidden node, return its first visible
// child.
else {
return ts_node_child(child, 0);
}
}
}
return ts_node__null();
}

View file

@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su
static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol,
uint32_t count, int dynamic_precedence,
uint16_t child_info_sequence_id, bool fragile) {
uint16_t child_info_id, bool fragile) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
uint32_t removed_version_count = 0;
StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
}
MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
symbol, &children, child_info_sequence_id, self->language
symbol, &children, child_info_id, self->language
);
// This pop operation may have caused multiple stack versions to collapse
@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
}
parent.ptr->dynamic_precedence += dynamic_precedence;
parent.ptr->child_info_sequence_id = child_info_sequence_id;
parent.ptr->child_info_id = child_info_id;
TSStateId state = ts_stack_state(self->stack, slice_version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look
&self->tree_pool,
ts_subtree_symbol(child),
&trees,
child.ptr->child_info_sequence_id,
child.ptr->child_info_id,
self->language
));
ts_subtree_release(&self->tree_pool, child);
@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence,
.child_info_sequence_id = action.params.child_info_sequence_id,
.child_info_id = action.params.child_info_id,
});
default:
break;
@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
reduction_version = ts_parser__reduce(
self, version, action.symbol, action.count,
action.dynamic_precedence, action.child_info_sequence_id,
action.dynamic_precedence, action.child_info_id,
true
);
}
@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
StackVersion reduction_version = ts_parser__reduce(
self, version, action.params.symbol, action.params.child_count,
action.params.dynamic_precedence, action.params.child_info_sequence_id,
action.params.dynamic_precedence, action.params.child_info_id,
is_fragile
);
if (reduction_version != STACK_VERSION_NONE) {

View file

@ -12,7 +12,7 @@ typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short child_info_sequence_id;
unsigned short child_info_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -379,7 +379,7 @@ void ts_subtree_set_children(
self.ptr->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id);
uint32_t lookahead_end_byte = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
@ -474,7 +474,7 @@ void ts_subtree_set_children(
}
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
SubtreeArray *children, unsigned child_info_sequence_id,
SubtreeArray *children, unsigned child_info_id,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
*data = (SubtreeHeapData) {
.ref_count = 1,
.symbol = symbol,
.child_info_sequence_id = child_info_sequence_id,
.child_info_id = child_info_id,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim
}
if (ts_subtree_child_count(self)) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
@ -913,20 +913,17 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
fprintf(f, "\"]\n");
uint32_t child_start_offset = start_offset;
uint32_t structural_child_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
language,
ts_subtree_child_info_sequence_id(*self)
);
uint32_t child_info_offset =
language->max_child_info_production_length *
ts_subtree_child_info_id(*self);
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
const Subtree *child = &self->ptr->children[i];
if (ts_subtree_extra(*child)) {
ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
structural_child_index++;
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child) && child_info_offset) {
alias_symbol = language->alias_sequences[child_info_offset];
child_info_offset++;
}
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
child_start_offset += ts_subtree_total_bytes(*child);
}

View file

@ -73,7 +73,7 @@ typedef struct {
uint32_t node_count;
uint32_t repeat_depth;
int32_t dynamic_precedence;
uint16_t child_info_sequence_id;
uint16_t child_info_id;
struct {
TSSymbol symbol;
TSStateId parse_state;
@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}
static inline uint16_t ts_subtree_child_info_sequence_id(Subtree self) {
static inline uint16_t ts_subtree_child_info_id(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->child_info_sequence_id;
return self.ptr->child_info_id;
} else {
return 0;
}

View file

@ -9,7 +9,8 @@ typedef struct {
const TSTree *tree;
Length position;
uint32_t child_index;
uint32_t child_info_offset;
uint32_t structural_child_index;
const TSSymbol *alias_sequence;
} CursorChildIterator;
// CursorChildIterator
@ -17,17 +18,19 @@ typedef struct {
static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) {
TreeCursorEntry *last_entry = array_back(&self->stack);
if (ts_subtree_child_count(*last_entry->subtree) == 0) {
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0};
return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL};
}
uint32_t child_info_offset =
last_entry->subtree->ptr->child_info_sequence_id *
self->tree->language->max_child_info_production_length;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->ptr->child_info_id
);
return (CursorChildIterator) {
.tree = self->tree,
.parent = *last_entry->subtree,
.position = last_entry->position,
.child_index = 0,
.child_info_offset = child_info_offset,
.structural_child_index = 0,
.alias_sequence = alias_sequence,
};
}
@ -40,13 +43,13 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
.subtree = child,
.position = self->position,
.child_index = self->child_index,
.child_info_offset = self->child_info_offset,
.structural_child_index = self->structural_child_index,
};
*visible = ts_subtree_visible(*child);
bool extra = ts_subtree_extra(*child);
if (!extra && self->child_info_offset) {
*visible |= self->tree->language->alias_sequences[self->child_info_offset];
self->child_info_offset++;
if (!extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
self->structural_child_index++;
}
self->position = length_add(self->position, ts_subtree_size(*child));
@ -82,7 +85,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) {
ts_node_start_point(node)
},
.child_index = 0,
.child_info_offset = 0,
.structural_child_index = 0,
}));
}
@ -173,7 +176,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) {
TreeCursorEntry entry = array_pop(&self->stack);
CursorChildIterator iterator = ts_tree_cursor_iterate_children(self);
iterator.child_index = entry.child_index;
iterator.child_info_offset = entry.child_info_offset;
iterator.structural_child_index = entry.structural_child_index;
iterator.position = entry.position;
bool visible = false;
@ -204,9 +207,12 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursorEntry *entry = &self->stack.contents[i];
bool is_aliased = false;
if (i > 0) {
is_aliased =
entry->child_info_offset &&
self->tree->language->alias_sequences[entry->child_info_offset];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->child_info_id
);
is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
}
if (ts_subtree_visible(*entry->subtree) || is_aliased) {
self->stack.size = i + 1;
@ -220,8 +226,15 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *last_entry = array_back(&self->stack);
TSSymbol alias_symbol = 0;
if (last_entry->child_info_offset) {
alias_symbol = self->tree->language->alias_sequences[last_entry->child_info_offset];
if (self->stack.size > 1) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->child_info_id
);
if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
alias_symbol = alias_sequence[last_entry->structural_child_index];
}
}
return ts_node_new(
self->tree,
@ -233,12 +246,40 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
TreeCursorEntry *entry = array_back(&self->stack);
if (entry->child_info_offset) {
return self->tree->language->field_sequences[entry->child_info_offset];
} else {
return 0;
// Walk up the tree, visiting the current node and its invisible ancestors.
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
// Stop walking up when another visible node is found.
if (i != self->stack.size - 1) {
if (ts_subtree_visible(*entry->subtree)) break;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->child_info_id
);
if (alias_sequence && alias_sequence[entry->structural_child_index]) {
break;
}
}
const TSFieldMapping *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->child_info_id,
&field_map, &field_map_end
);
while (field_map < field_map_end) {
if (
!field_map->inherited &&
field_map->child_index == entry->structural_child_index
) return field_map->field_id;
field_map++;
}
}
return 0;
}
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {

View file

@ -7,7 +7,7 @@ typedef struct {
const Subtree *subtree;
Length position;
uint32_t child_index;
uint32_t child_info_offset;
uint32_t structural_child_index;
} TreeCursorEntry;
typedef struct {