From 56309a1c284f200bc1278fae2830c5014f1619a5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Feb 2019 11:06:18 -0800 Subject: [PATCH] Generate node-fields.json file --- .../build_tables/build_parse_table.rs | 364 ++++++++++++++++-- .../build_tables/minimize_parse_table.rs | 2 +- cli/src/generate/grammars.rs | 6 + cli/src/generate/mod.rs | 128 +++++- cli/src/generate/render.rs | 30 +- cli/src/generate/tables.rs | 30 +- lib/include/tree_sitter/parser.h | 2 +- lib/src/get_changed_ranges.c | 4 +- lib/src/language.h | 10 +- lib/src/node.c | 8 +- lib/src/parser.c | 14 +- lib/src/reduce_action.h | 2 +- lib/src/subtree.c | 10 +- lib/src/subtree.h | 6 +- lib/src/tree_cursor.c | 10 +- 15 files changed, 535 insertions(+), 91 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index d37fce33..417d5d3a 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,18 +6,17 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, - ParseTableEntry, + ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, + ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo, }; use core::ops::Range; use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeMap, VecDeque}; -use std::u32; - use std::fmt::Write; use std::hash::Hasher; +use std::{mem, u32}; #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -36,7 +35,6 @@ struct ParseStateQueueEntry { struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, - field_names_by_hidden_symbol: HashMap>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, state_ids_by_item_set: HashMap, ParseStateId>, @@ -49,7 +47,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.child_infos.push(ChildInfo::default()); + self.parse_table.production_infos.push(ProductionInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -178,7 +176,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - child_info_id: self.get_child_info_id(item), + production_id: self.get_production_id(item), } }; @@ -646,16 +644,16 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId { - let mut child_info = ChildInfo { + fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId { + let mut production_info = ProductionInfo { alias_sequence: Vec::new(), field_map: BTreeMap::new(), }; for (i, step) in item.production.steps.iter().enumerate() { - child_info.alias_sequence.push(step.alias.clone()); + production_info.alias_sequence.push(step.alias.clone()); if let Some(field_name) = &step.field_name { - child_info + production_info .field_map .entry(field_name.clone()) .or_insert(Vec::new()) @@ -664,9 +662,15 @@ impl<'a> ParseTableBuilder<'a> { inherited: false, }); } - if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) { - for field_name in field_names { - child_info + + if step.symbol.kind == SymbolType::NonTerminal + && !self.syntax_grammar.variables[step.symbol.index] + .kind + .is_visible() + { + let info = &self.parse_table.variable_info[step.symbol.index]; + for (field_name, _) in &info.fields { + production_info .field_map .entry(field_name.clone()) .or_insert(Vec::new()) @@ -678,8 +682,8 @@ impl<'a> ParseTableBuilder<'a> { } } - while child_info.alias_sequence.last() == Some(&None) { - child_info.alias_sequence.pop(); + while production_info.alias_sequence.last() == Some(&None) { + production_info.alias_sequence.pop(); } if item.production.steps.len() > self.parse_table.max_aliased_production_length { @@ -688,14 +692,14 @@ impl<'a> ParseTableBuilder<'a> { if let Some(index) = self .parse_table - .child_infos + .production_infos .iter() - .position(|seq| *seq == child_info) + .position(|seq| *seq == production_info) { index } else { - self.parse_table.child_infos.push(child_info); - self.parse_table.child_infos.len() - 1 + self.parse_table.production_infos.push(production_info); + self.parse_table.production_infos.len() - 1 } } @@ -742,23 +746,155 @@ fn populate_following_tokens( } } -fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap> { - let mut result = HashMap::new(); - for (i, variable) in grammar.variables.iter().enumerate() { - let mut field_names = Vec::new(); - if variable.kind == VariableType::Hidden { +pub(crate) fn get_variable_info( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> Vec { + let mut result = Vec::new(); + + // Determine which field names and child node types can appear directly + // within each type of node. + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + let mut info = VariableInfo { + fields: HashMap::new(), + child_types: HashSet::new(), + }; + let is_recursive = variable + .productions + .iter() + .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); + + for production in &variable.productions { + for step in &production.steps { + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(step.symbol) + }; + + if let Some(field_name) = &step.field_name { + let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { + multiple: false, + required: true, + types: HashSet::new(), + }); + field_info.multiple |= is_recursive; + field_info.types.insert(child_type.clone()); + } + + info.child_types.insert(child_type); + } + } + + for production in &variable.productions { + let production_fields: Vec<&String> = production + .steps + .iter() + .filter_map(|s| s.field_name.as_ref()) + .collect(); + for (field_name, field_info) in info.fields.iter_mut() { + if !production_fields.contains(&field_name) { + field_info.required = false; + } + } + } + + result.push(info); + } + + // Expand each node type's information recursively to inherit the properties of + // hidden children. + let mut done = false; + while !done { + done = true; + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + // Move this variable's info out of the vector so it can be modified + // while reading from other entries of the vector. + let mut variable_info = VariableInfo { + fields: HashMap::new(), + child_types: HashSet::new(), + }; + mem::swap(&mut variable_info, &mut result[i]); + for production in &variable.productions { for step in &production.steps { - if let Some(field_name) = &step.field_name { - if let Err(i) = field_names.binary_search(field_name) { - field_names.insert(i, field_name.clone()); + if step.symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[step.symbol.index] + .kind + .is_visible() + { + let production_info = &result[step.symbol.index]; + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &production_info.fields { + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert_with(|| { + done = false; + child_field_info.clone() + }); + if child_field_info.multiple && !field_info.multiple { + field_info.multiple = child_field_info.multiple; + done = false; + } + if !child_field_info.required && field_info.required { + field_info.required = child_field_info.required; + done = false; + } + for child_type in &child_field_info.types { + if field_info.types.insert(child_type.clone()) { + done = false; + } + } + } + + // Inherit child types from this hidden child + for child_type in &production_info.child_types { + if variable_info.child_types.insert(child_type.clone()) { + done = false; + } + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &production_info.child_types { + if field_info.types.insert(child_type.clone()) { + done = false; + } + } } } } } + + // Move this variable's info back into the vector. + result[i] = variable_info; } - result.insert(Symbol::non_terminal(i), field_names); } + + let child_type_is_visible = |child_type: &ChildType| match child_type { + ChildType::Aliased(_) => true, + ChildType::Normal(symbol) => { + let step_kind = match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + }; + step_kind.is_visible() + } + }; + + for variable_info in result.iter_mut() { + variable_info.child_types.retain(&child_type_is_visible); + for (_, field_info) in variable_info.fields.iter_mut() { + field_info.types.retain(&child_type_is_visible); + } + } + result } @@ -788,12 +924,178 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - child_infos: Vec::new(), + production_infos: Vec::new(), max_aliased_production_length: 0, + variable_info: get_variable_info(syntax_grammar, lexical_grammar), }, - field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar), } .build()?; Ok((table, following_tokens)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + + #[test] + fn test_get_variable_info() { + let variable_info = get_variable_info( + &build_syntax_grammar(vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"), + ], + }, + ], + }, + ]), + &build_lexical_grammar(), + ); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(1))] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + + assert_eq!( + variable_info[2].fields, + vec![( + "field2".to_string(), + FieldInfo { + required: false, + multiple: false, + types: vec![ + ChildType::Normal(Symbol::terminal(2)), + ChildType::Normal(Symbol::terminal(3)), + ] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_inherited_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar(vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ]), + &build_lexical_grammar(), + ); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(3))] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + } + + fn build_syntax_grammar(variables: Vec) -> SyntaxGrammar { + let mut syntax_grammar = SyntaxGrammar::default(); + syntax_grammar.variables = variables; + syntax_grammar + } + + fn build_lexical_grammar() -> LexicalGrammar { + let mut lexical_grammar = LexicalGrammar::default(); + for i in 0..10 { + lexical_grammar.variables.push(LexicalVariable { + name: format!("token_{}", i), + kind: VariableType::Named, + implicit_precedence: 0, + start_state: 0, + }); + } + lexical_grammar + } +} diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index f3862732..a9d26124 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - child_info_id: 0, + production_id: 0, symbol, .. } => { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 7f9e09d6..6cc1a5f7 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -187,6 +187,12 @@ impl Variable { } } +impl VariableType { + pub fn is_visible(&self) -> bool { + *self == VariableType::Named || *self == VariableType::Anonymous + } +} + impl LexicalGrammar { pub fn variable_indices_for_nfa_states<'a>( &'a self, diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index b13dfbbd..0983dfc3 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,10 +1,15 @@ use self::build_tables::build_tables; +use self::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; +use self::rules::{AliasMap, Symbol, SymbolType}; +use self::tables::{ChildType, VariableInfo}; use crate::error::{Error, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; +use serde_derive::Serialize; +use std::collections::BTreeMap; use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; @@ -27,6 +32,12 @@ lazy_static! { .unwrap(); } +struct GeneratedParser { + name: String, + c_code: String, + fields_json: String, +} + pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, @@ -47,13 +58,18 @@ pub fn generate_parser_in_directory( } } - let (language_name, c_code) = - generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?; + let GeneratedParser { + name: language_name, + c_code, + fields_json, + } = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?; let repo_header_path = repo_src_path.join("tree_sitter"); fs::create_dir_all(&repo_src_path)?; fs::create_dir_all(&repo_header_path)?; fs::write(&repo_src_path.join("parser.c"), c_code) .map_err(|e| format!("Failed to write parser.c: {}", e))?; + fs::write(&repo_src_path.join("node-fields.json"), fields_json) + .map_err(|e| format!("Failed to write parser.c: {}", e))?; fs::write( &repo_header_path.join("parser.h"), tree_sitter::PARSER_HEADER, @@ -73,14 +89,15 @@ pub fn generate_parser_in_directory( pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); - generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new()) + let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())?; + Ok((parser.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( grammar_json: &str, minimize: bool, state_ids_to_log: Vec, -) -> Result<(String, String)> { +) -> Result { let input_grammar = parse_grammar(grammar_json)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; @@ -92,8 +109,15 @@ fn generate_parser_for_grammar_with_opts( minimize, state_ids_to_log, )?; + let name = input_grammar.name; + let fields_json = generate_field_info_json( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &parse_table.variable_info, + ); let c_code = render_c_code( - &input_grammar.name, + &name, parse_table, main_lex_table, keyword_lex_table, @@ -102,7 +126,11 @@ fn generate_parser_for_grammar_with_opts( lexical_grammar, simple_aliases, ); - Ok((input_grammar.name, c_code)) + Ok(GeneratedParser { + name, + c_code, + fields_json, + }) } fn load_grammar_file(grammar_path: &Path) -> Result { @@ -153,3 +181,91 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> .map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e))) } } + +#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] +struct FieldTypeJSON { + kind: String, + named: bool, +} + +#[derive(Debug, Serialize)] +struct FieldInfoJSON { + multiple: bool, + required: bool, + types: Vec, +} + +fn generate_field_info_json( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, + simple_aliases: &AliasMap, + variable_info: &Vec, +) -> String { + let mut map = BTreeMap::new(); + for (i, info) in variable_info.iter().enumerate() { + let variable = &syntax_grammar.variables[i]; + if !variable.kind.is_visible() || info.fields.is_empty() { + continue; + } + + let name = simple_aliases + .get(&Symbol::non_terminal(i)) + .map_or(&variable.name, |alias| &alias.value); + + let fields = map.entry(name.clone()).or_insert_with(|| BTreeMap::new()); + for (field, field_info) in info.fields.iter() { + let field_info_json = fields.entry(field.clone()).or_insert(FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + }); + + field_info_json.multiple |= field_info.multiple; + field_info_json.required &= field_info.required; + field_info_json.types.extend(field_info.types.iter().map( + |child_type| match child_type { + ChildType::Aliased(alias) => FieldTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + FieldTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + } + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), + } + } + } + }, + )); + field_info_json.types.sort_unstable(); + field_info_json.types.dedup(); + } + } + serde_json::to_string_pretty(&map).unwrap() +} diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 22bf655c..f2d84bf7 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -113,12 +113,12 @@ impl Generator { } let mut field_names = Vec::new(); - for child_info in &self.parse_table.child_infos { - for field_name in child_info.field_map.keys() { + for production_info in &self.parse_table.production_infos { + for field_name in production_info.field_map.keys() { field_names.push(field_name); } - for alias in &child_info.alias_sequence { + for alias in &production_info.alias_sequence { if let Some(alias) = &alias { let alias_kind = if alias.is_named { VariableType::Named @@ -358,17 +358,17 @@ impl Generator { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - self.parse_table.child_infos.len() + self.parse_table.production_infos.len() ); indent!(self); - for (i, child_info) in self.parse_table.child_infos.iter().enumerate() { - if child_info.alias_sequence.is_empty() { + for (i, production_info) in self.parse_table.production_infos.iter().enumerate() { + if production_info.alias_sequence.is_empty() { continue; } add_line!(self, "[{}] = {{", i); indent!(self); - for (j, alias) in child_info.alias_sequence.iter().enumerate() { + for (j, alias) in production_info.alias_sequence.iter().enumerate() { if let Some(alias) = alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } @@ -391,10 +391,10 @@ impl Generator { ); let mut field_map_ids = Vec::new(); - for child_info in &self.parse_table.child_infos { - if !child_info.field_map.is_empty() { + for production_info in &self.parse_table.production_infos { + if !production_info.field_map.is_empty() { let mut flat_field_map = Vec::new(); - for (field_name, locations) in &child_info.field_map { + for (field_name, locations) in &production_info.field_map { for location in locations { flat_field_map.push((field_name.clone(), *location)); } @@ -417,12 +417,12 @@ impl Generator { "static const TSFieldMapSlice ts_field_map_slices[] = {{", ); indent!(self); - for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { + for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { if length > 0 { add_line!( self, "[{}] = {{.index = {}, .length = {}}},", - child_info_id, + production_id, row_id, length ); @@ -816,15 +816,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - child_info_id, + production_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if child_info_id != 0 { - add!(self, ", .child_info_id = {}", child_info_id); + if production_id != 0 { + add!(self, ", .production_id = {}", production_id); } add!(self, ")"); } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index a39ae099..e358f4fa 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,9 +1,9 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet}; use std::collections::BTreeMap; -pub(crate) type ChildInfoId = usize; +pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -22,7 +22,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - child_info_id: ChildInfoId, + production_id: ProductionInfoId, }, } @@ -47,16 +47,36 @@ pub(crate) struct FieldLocation { } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct ChildInfo { +pub(crate) struct ProductionInfo { pub alias_sequence: Vec>, pub field_map: BTreeMap>, } +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub(crate) enum ChildType { + Normal(Symbol), + Aliased(Alias), +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldInfo { + pub required: bool, + pub multiple: bool, + pub types: HashSet, +} + +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct VariableInfo { + pub fields: HashMap, + pub child_types: HashSet, +} + #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub child_infos: Vec, + pub variable_info: Vec, + pub production_infos: Vec, pub max_aliased_production_length: usize, } diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 90ab6200..6e67d90c 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -66,7 +66,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t child_info_id; + uint8_t production_id; }; } params; TSParseActionType type : 4; diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 83331cce..46add435 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -148,7 +148,7 @@ static bool iterator_tree_is_visible(const Iterator *self) { Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, - parent.ptr->child_info_id + parent.ptr->production_id ); return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } @@ -171,7 +171,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, - parent->ptr->child_info_id + parent->ptr->production_id ); if (alias_sequence) { *alias_symbol = alias_sequence[entry.structural_child_index]; diff --git a/lib/src/language.h b/lib/src/language.h index 59c0fadc..84e3fbc4 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -81,19 +81,19 @@ ts_language_enabled_external_tokens(const TSLanguage *self, } static inline const TSSymbol * -ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { - return child_info_id > 0 ? - self->alias_sequences + child_info_id * self->max_alias_sequence_length : +ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) { + return production_id > 0 ? + self->alias_sequences + production_id * self->max_alias_sequence_length : NULL; } static inline void ts_language_field_map( const TSLanguage *self, - uint32_t child_info_id, + uint32_t production_id, const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { - TSFieldMapSlice slice = self->field_map_slices[child_info_id]; + TSFieldMapSlice slice = self->field_map_slices[production_id]; *start = &self->field_map_entries[slice.index]; *end = &self->field_map_entries[slice.index] + slice.length; } diff --git a/lib/src/node.c b/lib/src/node.c index 6ac4636d..96f934a7 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -53,7 +53,7 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { } const TSSymbol *alias_sequence = ts_language_alias_sequence( node->tree->language, - subtree.ptr->child_info_id + subtree.ptr->production_id ); return (NodeChildIterator) { .tree = node->tree, @@ -464,7 +464,7 @@ recur: const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self.tree->language, - ts_node__subtree(self).ptr->child_info_id, + ts_node__subtree(self).ptr->production_id, &field_map, &field_map_end ); @@ -498,8 +498,8 @@ recur: goto recur; } - // Otherwise, descend into this child, but if that child doesn't - // contain the field, continue searching subsequent children. + // Otherwise, descend into this child, but if it doesn't contain + // the field, continue searching subsequent children. else { TSNode result = ts_node_child_by_field_id(child, field_id); if (result.id) return result; diff --git a/lib/src/parser.c b/lib/src/parser.c index 5fd75cd8..4e5727f6 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t child_info_id, bool fragile) { + uint16_t production_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, child_info_id, self->language + symbol, &children, production_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->child_info_id = child_info_id; + parent.ptr->production_id = production_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->child_info_id, + child.ptr->production_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .child_info_id = action.params.child_info_id, + .production_id = action.params.production_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.child_info_id, + action.dynamic_precedence, action.production_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.child_info_id, + action.params.dynamic_precedence, action.params.production_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 9eca0327..72aff08d 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short child_info_id; + unsigned short production_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 3d588890..776a86fc 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned child_info_id, + SubtreeArray *children, unsigned production_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .child_info_id = child_info_id, + .production_id = production_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -915,7 +915,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t child_start_offset = start_offset; uint32_t child_info_offset = language->max_alias_sequence_length * - ts_subtree_child_info_id(*self); + ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; TSSymbol alias_symbol = 0; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index f32edfc2..611996d5 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t child_info_id; + uint16_t production_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_child_info_id(Subtree self) { +static inline uint16_t ts_subtree_production_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->child_info_id; + return self.ptr->production_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 4f3f9ae7..c3ba54c5 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -22,7 +22,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs } const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - last_entry->subtree->ptr->child_info_id + last_entry->subtree->ptr->production_id ); return (CursorChildIterator) { .tree = self->tree, @@ -210,7 +210,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; } @@ -230,7 +230,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { alias_symbol = alias_sequence[last_entry->structural_child_index]; @@ -257,7 +257,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { if (ts_subtree_visible(*entry->subtree)) break; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); if (alias_sequence && alias_sequence[entry->structural_child_index]) { break; @@ -267,7 +267,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self->tree->language, - parent_entry->subtree->ptr->child_info_id, + parent_entry->subtree->ptr->production_id, &field_map, &field_map_end );