diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 463dca97..7a111622 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -4,9 +4,10 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; -use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType}; +use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, + ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -652,14 +653,14 @@ impl<'a> ParseTableBuilder<'a> { .iter() .map(|s| ChildInfo { alias: s.alias.clone(), - child_ref: s.child_ref.clone(), + field_name: s.field_name.clone(), }) .collect(); while child_info_sequence.last() == Some(&ChildInfo::default()) { child_info_sequence.pop(); } - if item.production.steps.len() > self.parse_table.max_aliased_production_length { - self.parse_table.max_aliased_production_length = item.production.steps.len() + if item.production.steps.len() > self.parse_table.max_production_length_with_child_info { + self.parse_table.max_production_length_with_child_info = item.production.steps.len() } if let Some(index) = self .parse_table @@ -669,7 +670,9 @@ impl<'a> ParseTableBuilder<'a> { { index } else { - self.parse_table.child_info_sequences.push(child_info_sequence); + self.parse_table + .child_info_sequences + .push(child_info_sequence); self.parse_table.child_info_sequences.len() - 1 } } @@ -744,7 +747,7 @@ pub(crate) fn build_parse_table( states: Vec::new(), symbols: Vec::new(), child_info_sequences: Vec::new(), - max_aliased_production_length: 0, + max_production_length_with_child_info: 0, }, } .build()?; diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 0fc9c5f8..0d7a4e29 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -20,7 +20,7 @@ lazy_static! { precedence: 0, associativity: None, alias: None, - child_ref: None, + field_name: None, }], }; } diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index c18ac530..428fc604 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -34,6 +34,14 @@ function blank() { }; } +function field(name, rule) { + return { + type: "FIELD", + name: name, + content: rule + } +} + function choice(...elements) { return { type: "CHOICE", @@ -363,6 +371,7 @@ global.seq = seq; global.sym = sym; global.token = token; global.grammar = grammar; +global.field = field; const result = require(process.env.TREE_SITTER_GRAMMAR_PATH); console.log(JSON.stringify(result, null, 2)); diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index f4862449..7f9e09d6 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -54,7 +54,7 @@ pub(crate) struct ProductionStep { pub precedence: i32, pub associativity: Option, pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -100,7 +100,7 @@ impl ProductionStep { precedence: 0, associativity: None, alias: None, - child_ref: None, + field_name: None, } } @@ -110,7 +110,7 @@ impl ProductionStep { precedence, associativity, alias: self.alias, - child_ref: self.child_ref, + field_name: self.field_name, } } @@ -123,16 +123,16 @@ impl ProductionStep { value: value.to_string(), is_named, }), - child_ref: self.child_ref, + field_name: self.field_name, } } - pub(crate) fn with_child_ref(self, name: &str) -> Self { + pub(crate) fn with_field_name(self, name: &str) -> Self { Self { symbol: self.symbol, precedence: self.precedence, associativity: self.associativity, alias: self.alias, - child_ref: Some(name.to_string()), + field_name: Some(name.to_string()), } } } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index a11140ac..5b244c87 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -26,8 +26,8 @@ enum RuleJSON { CHOICE { members: Vec, }, - REF { - value: String, + FIELD { + name: String, content: Box, }, SEQ { @@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::PATTERN { value } => Rule::Pattern(value), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), - RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)), + RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)), RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)), RuleJSON::REPEAT { content } => { diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 95071937..1c050a6b 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -11,7 +11,7 @@ struct RuleFlattener { precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, - child_ref_stack: Vec, + field_name_stack: Vec, } impl RuleFlattener { @@ -24,7 +24,7 @@ impl RuleFlattener { precedence_stack: Vec::new(), associativity_stack: Vec::new(), alias_stack: Vec::new(), - child_ref_stack: Vec::new(), + field_name_stack: Vec::new(), } } @@ -62,10 +62,10 @@ impl RuleFlattener { self.alias_stack.push(alias); } - let mut has_child_ref = false; - if let Some(child_ref) = params.child_ref { - has_child_ref = true; - self.child_ref_stack.push(child_ref); + let mut has_field_name = false; + if let Some(field_name) = params.field_name { + has_field_name = true; + self.field_name_stack.push(field_name); } if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() { @@ -94,8 +94,8 @@ impl RuleFlattener { self.alias_stack.pop(); } - if has_child_ref { - self.child_ref_stack.pop(); + if has_field_name { + self.field_name_stack.pop(); } did_push @@ -106,7 +106,7 @@ impl RuleFlattener { precedence: self.precedence_stack.last().cloned().unwrap_or(0), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), - child_ref: self.child_ref_stack.last().cloned(), + field_name: self.field_name_stack.last().cloned(), }); true } @@ -370,16 +370,16 @@ mod tests { } #[test] - fn test_flatten_grammar_with_child_refs() { + fn test_flatten_grammar_with_field_names() { let result = flatten_variable(Variable { name: "test".to_string(), kind: VariableType::Named, rule: Rule::seq(vec![ - Rule::child_ref("first-thing".to_string(), Rule::terminal(1)), + Rule::field("first-thing".to_string(), Rule::terminal(1)), Rule::terminal(2), Rule::choice(vec![ Rule::Blank, - Rule::child_ref("second-thing".to_string(), Rule::terminal(3)), + Rule::field("second-thing".to_string(), Rule::terminal(3)), ]), ]), }) @@ -391,16 +391,16 @@ mod tests { Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), ProductionStep::new(Symbol::terminal(2)) ] }, Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"), + ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"), ] }, ] diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 05153a0c..089edb79 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -6,6 +6,7 @@ use core::ops::Range; use hashbrown::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; +use tree_sitter::LANGUAGE_VERSION; macro_rules! add { ($this: tt, $($arg: tt)*) => {{ @@ -56,10 +57,12 @@ struct Generator { alias_ids: HashMap, external_scanner_states: Vec>, alias_map: HashMap>, + field_names: Vec, } impl Generator { fn generate(mut self) -> String { + self.init(); self.add_includes(); self.add_pragmas(); self.add_stats(); @@ -68,7 +71,11 @@ impl Generator { self.add_symbol_metadata_list(); if self.parse_table.child_info_sequences.len() > 1 { - self.add_alias_sequences(); + if !self.field_names.is_empty() { + self.add_field_name_enum(); + } + self.add_field_name_names_list(); + self.add_child_info_sequences(); } let mut main_lex_table = LexTable::default(); @@ -95,6 +102,49 @@ impl Generator { self.buffer } + fn init(&mut self) { + let mut symbol_identifiers = HashSet::new(); + for i in 0..self.parse_table.symbols.len() { + self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); + } + + let mut field_names = Vec::new(); + for child_info_sequence in &self.parse_table.child_info_sequences { + for entry in child_info_sequence { + if let Some(field_name) = &entry.field_name { + field_names.push(field_name); + } + + if let Some(alias) = &entry.alias { + let alias_kind = if alias.is_named { + VariableType::Named + } else { + VariableType::Anonymous + }; + let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias_kind + }); + let alias_id = if let Some(symbol) = matching_symbol { + self.symbol_ids[&symbol].clone() + } else if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + self.alias_ids.entry(alias.clone()).or_insert(alias_id); + self.alias_map + .entry(alias.clone()) + .or_insert(matching_symbol); + } + } + } + + field_names.sort_unstable(); + field_names.dedup(); + self.field_names = field_names.into_iter().cloned().collect(); + } + fn add_includes(&mut self) { add_line!(self, "#include "); add_line!(self, ""); @@ -143,39 +193,7 @@ impl Generator { }) .count(); - let mut symbol_identifiers = HashSet::new(); - for i in 0..self.parse_table.symbols.len() { - self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); - } - - for child_info_sequence in &self.parse_table.child_info_sequences { - for entry in child_info_sequence { - if let Some(alias) = &entry.alias { - let alias_kind = if alias.is_named { - VariableType::Named - } else { - VariableType::Anonymous - }; - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind - }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - self.alias_ids.entry(alias.clone()).or_insert(alias_id); - self.alias_map - .entry(alias.clone()) - .or_insert(matching_symbol); - } - } - } - - add_line!(self, "#define LANGUAGE_VERSION {}", 9); + add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION); add_line!( self, "#define STATE_COUNT {}", @@ -197,10 +215,11 @@ impl Generator { "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len() ); + add_line!(self, "#define FIELD_COUNT {}", self.field_names.len()); add_line!( self, - "#define MAX_ALIAS_SEQUENCE_LENGTH {}", - self.parse_table.max_aliased_production_length + "#define MAX_CHILD_INFO_PRODUCTION_LENGTH {}", + self.parse_table.max_production_length_with_child_info ); add_line!(self, ""); } @@ -253,6 +272,34 @@ impl Generator { add_line!(self, ""); } + fn add_field_name_enum(&mut self) { + add_line!(self, "enum {{"); + indent!(self); + for (i, field_name) in self.field_names.iter().enumerate() { + add_line!(self, "{} = {},", self.field_id(field_name), i + 1); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + + fn add_field_name_names_list(&mut self) { + add_line!(self, "static const char *ts_field_names[] = {{"); + indent!(self); + add_line!(self, "[0] = NULL,"); + for field_name in &self.field_names { + add_line!( + self, + "[{}] = \"{}\",", + self.field_id(field_name), + field_name + ); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_symbol_metadata_list(&mut self) { add_line!( self, @@ -303,14 +350,18 @@ impl Generator { add_line!(self, ""); } - fn add_alias_sequences(&mut self) { + fn add_child_info_sequences(&mut self) { add_line!( self, - "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", + "static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", self.parse_table.child_info_sequences.len() ); indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) { + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { + if sequence.iter().all(|i| i.alias.is_none()) { + continue; + } + add_line!(self, "[{}] = {{", i); indent!(self); for (j, child_info) in sequence.iter().enumerate() { @@ -324,6 +375,31 @@ impl Generator { dedent!(self); add_line!(self, "}};"); add_line!(self, ""); + + add_line!( + self, + "static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", + self.parse_table.child_info_sequences.len() + ); + indent!(self); + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { + if sequence.iter().all(|i| i.field_name.is_none()) { + continue; + } + + add_line!(self, "[{}] = {{", i); + indent!(self); + for (j, child_info) in sequence.iter().enumerate() { + if let Some(field_name) = &child_info.field_name { + add_line!(self, "[{}] = {},", j, self.field_id(&field_name)); + } + } + dedent!(self); + add_line!(self, "}},"); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); } fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { @@ -694,7 +770,11 @@ impl Generator { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } if child_info_sequence_id != 0 { - add!(self, ", .alias_sequence_id = {}", child_info_sequence_id); + add!( + self, + ", .child_info_sequence_id = {}", + child_info_sequence_id + ); } add!(self, ")"); } @@ -764,11 +844,18 @@ impl Generator { self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); + + add_line!(self, ".field_count = FIELD_COUNT,"); + add_line!( + self, + ".field_sequences = (const TSFieldId *)ts_field_sequences," + ); + add_line!(self, ".field_names = ts_field_names,"); } add_line!( self, - ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," + ".max_child_info_production_length = MAX_CHILD_INFO_PRODUCTION_LENGTH," ); add_line!(self, ".lex_fn = ts_lex,"); @@ -865,6 +952,10 @@ impl Generator { self.symbol_ids.insert(symbol, id); } + fn field_id(&self, field_name: &String) -> String { + format!("field_id_{}", field_name) + } + fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { match symbol.kind { SymbolType::End => ("end", VariableType::Hidden), @@ -996,6 +1087,7 @@ pub(crate) fn render_c_code( alias_ids: HashMap::new(), external_scanner_states: Vec::new(), alias_map: HashMap::new(), + field_names: Vec::new(), } .generate() } diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index f1939cb1..174e06e5 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -32,7 +32,7 @@ pub(crate) struct MetadataParams { pub is_active: bool, pub is_main_token: bool, pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -58,9 +58,9 @@ pub(crate) enum Rule { } impl Rule { - pub fn child_ref(name: String, content: Rule) -> Self { + pub fn field(name: String, content: Rule) -> Self { add_metadata(content, move |params| { - params.child_ref = Some(name); + params.field_name = Some(name); }) } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index f798544b..fc1ad642 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -42,7 +42,7 @@ pub(crate) struct ParseState { #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct ChildInfo { pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Debug, PartialEq, Eq)] @@ -50,7 +50,7 @@ pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, pub child_info_sequences: Vec>, - pub max_aliased_production_length: usize, + pub max_production_length_with_child_info: usize, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/cli/src/parse.rs b/cli/src/parse.rs index bd134457..f7961754 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -49,9 +49,8 @@ pub fn parse_file_at_path( let mut did_visit_children = false; loop { let node = cursor.node(); - let is_named = node.is_named(); if did_visit_children { - if is_named { + if node.is_named() { stdout.write(b")")?; needs_newline = true; } @@ -64,13 +63,16 @@ pub fn parse_file_at_path( break; } } else { - if is_named { + if node.is_named() { if needs_newline { stdout.write(b"\n")?; } for _ in 0..indent_level { stdout.write(b" ")?; } + if let Some(field_name) = cursor.field_name() { + write!(&mut stdout, "{}: ", field_name)?; + } let start = node.start_position(); let end = node.end_position(); write!( diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 157f09a8..af2b4582 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,6 +1,5 @@ mod corpus_test; mod helpers; -mod node_refs; mod node_test; mod parser_test; mod properties_test; diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index e7501569..dc4bb7a2 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -338,6 +338,65 @@ fn test_node_edit() { } } +#[test] +fn test_node_field_names() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_grammar_with_refs", + "extras": [ + {"type": "PATTERN", "value": "\\s+"} + ], + "rules": { + "rule_a": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field_1", + "content": { + "type": "STRING", + "value": "child-1" + } + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "child-2" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "FIELD", + "name": "field_2", + "content": { + "type": "STRING", + "value": "child-3" + } + } + ] + } + } + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + let language = get_test_language(&parser_name, &parser_code, None); + parser.set_language(language).unwrap(); + + let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); + let root_node = tree.root_node(); + assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); + assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); +} + fn get_all_nodes(tree: &Tree) -> Vec { let mut result = Vec::new(); let mut visited_children = false; diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 3d71f804..3e12619b 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -3,6 +3,7 @@ pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; +pub type TSFieldId = u16; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLanguage { @@ -228,7 +229,14 @@ extern "C" { pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - pub fn ts_node_child_by_ref(arg1: TSNode, arg2: *const ::std::os::raw::c_char) -> TSNode; + pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; +} +extern "C" { + pub fn ts_node_child_by_field_name( + arg1: TSNode, + arg2: *const ::std::os::raw::c_char, + arg3: u32, + ) -> TSNode; } extern "C" { pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; @@ -289,6 +297,14 @@ extern "C" { extern "C" { pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } +extern "C" { + pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; +} +extern "C" { + pub fn ts_tree_cursor_current_field_name( + arg1: *const TSTreeCursor, + ) -> *const ::std::os::raw::c_char; +} extern "C" { pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; } @@ -316,6 +332,22 @@ extern "C" { arg2: *const ::std::os::raw::c_char, ) -> TSSymbol; } +extern "C" { + pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32; +} +extern "C" { + pub fn ts_language_field_name_for_id( + arg1: *const TSLanguage, + arg2: TSFieldId, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_language_field_id_for_name( + arg1: *const TSLanguage, + arg2: *const ::std::os::raw::c_char, + arg3: u32, + ) -> TSFieldId; +} extern "C" { pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } @@ -323,4 +355,5 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 10; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 841f5895..c5738608 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -12,15 +12,12 @@ use std::os::unix::io::AsRawFd; use regex::Regex; use serde::de::DeserializeOwned; use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::fmt; +use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; -use std::ptr; -use std::slice; -use std::str; -use std::u16; +use std::{fmt, ptr, slice, str, u16}; +pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); #[derive(Clone, Copy)] @@ -157,15 +154,21 @@ impl Parser { pub fn set_language(&mut self, language: Language) -> Result<(), String> { unsafe { let version = ffi::ts_language_version(language.0) as usize; - if version == ffi::TREE_SITTER_LANGUAGE_VERSION { - ffi::ts_parser_set_language(self.0, language.0); - Ok(()) - } else { + if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION { + Err(format!( + "Incompatible language version {}. Expected {} or greater.", + version, + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + )) + } else if version > ffi::TREE_SITTER_LANGUAGE_VERSION { Err(format!( "Incompatible language version {}. Expected {}.", version, ffi::TREE_SITTER_LANGUAGE_VERSION )) + } else { + ffi::ts_parser_set_language(self.0, language.0); + Ok(()) } } } @@ -463,12 +466,15 @@ impl<'tree> Node<'tree> { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } - pub fn child_by_ref(&self, ref_name: &str) -> Option { - if let Ok(c_ref_name) = CString::new(ref_name) { - Self::new(unsafe { ffi::ts_node_child_by_ref(self.0, c_ref_name.as_ptr()) }) - } else { - None - } + pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { + let field_name = field_name.as_ref(); + Self::new(unsafe { + ffi::ts_node_child_by_field_name( + self.0, + field_name.as_ptr() as *const c_char, + field_name.len() as u32, + ) + }) } pub fn child_count(&self) -> usize { @@ -587,6 +593,28 @@ impl<'a> TreeCursor<'a> { ) } + pub fn field_id(&self) -> Option { + unsafe { + let id = ffi::ts_tree_cursor_current_field_id(&self.0); + if id == 0 { + None + } else { + Some(id) + } + } + } + + pub fn field_name(&self) -> Option<&str> { + unsafe { + let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); + if ptr.is_null() { + None + } else { + Some(CStr::from_ptr(ptr).to_str().unwrap()) + } + } + } + pub fn goto_first_child(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1fa105cd..dddf7c09 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -10,9 +10,11 @@ extern "C" { #include #include -#define TREE_SITTER_LANGUAGE_VERSION 9 +#define TREE_SITTER_LANGUAGE_VERSION 10 +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9 typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; typedef struct TSParser TSParser; typedef struct TSTree TSTree; @@ -119,7 +121,8 @@ bool ts_node_has_changes(TSNode); bool ts_node_has_error(TSNode); TSNode ts_node_parent(TSNode); TSNode ts_node_child(TSNode, uint32_t); -TSNode ts_node_child_by_ref(TSNode, const char *); +TSNode ts_node_child_by_field_id(TSNode, TSFieldId); +TSNode ts_node_child_by_field_name(TSNode, const char *, uint32_t); TSNode ts_node_named_child(TSNode, uint32_t); uint32_t ts_node_child_count(TSNode); uint32_t ts_node_named_child_count(TSNode); @@ -139,6 +142,8 @@ TSTreeCursor ts_tree_cursor_new(TSNode); void ts_tree_cursor_delete(TSTreeCursor *); void ts_tree_cursor_reset(TSTreeCursor *, TSNode); TSNode ts_tree_cursor_current_node(const TSTreeCursor *); +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); bool ts_tree_cursor_goto_parent(TSTreeCursor *); bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); bool ts_tree_cursor_goto_first_child(TSTreeCursor *); @@ -147,6 +152,9 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); uint32_t ts_language_symbol_count(const TSLanguage *); const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *); +uint32_t ts_language_field_count(const TSLanguage *); +const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId); +TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t); TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol); uint32_t ts_language_version(const TSLanguage *); diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index e5037062..d9d63614 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -15,6 +15,7 @@ extern "C" { #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; #endif @@ -54,7 +55,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t alias_sequence_id; + uint8_t child_info_sequence_id; }; } params; TSParseActionType type : 4; @@ -85,7 +86,7 @@ struct TSLanguage { const TSParseActionEntry *parse_actions; const TSLexMode *lex_modes; const TSSymbol *alias_sequences; - uint16_t max_alias_sequence_length; + uint16_t max_child_info_production_length; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -98,6 +99,9 @@ struct TSLanguage { unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; + uint32_t field_count; + const TSFieldId *field_sequences; + const char **field_names; }; /* diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 8eb89d46..fad30e84 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -108,7 +108,7 @@ static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLa .subtree = tree, .position = length_zero(), .child_index = 0, - .structural_child_index = 0, + .child_info_offset = 0, })); return (Iterator) { .cursor = *cursor, @@ -144,15 +144,11 @@ Length iterator_end_position(Iterator *self) { static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (ts_subtree_visible(*entry.subtree)) return true; - if (self->cursor.stack.size > 1) { - Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->language, - parent.ptr->alias_sequence_id - ); - return alias_sequence && alias_sequence[entry.structural_child_index] != 0; + if (entry.child_info_offset) { + return self->language->alias_sequences[entry.child_info_offset] != 0; + } else { + return false; } - return false; } static void iterator_get_visible_state(const Iterator *self, Subtree *tree, @@ -167,15 +163,8 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, for (; i + 1 > 0; i--) { TreeCursorEntry entry = self->cursor.stack.contents[i]; - if (i > 0) { - const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->language, - parent->ptr->alias_sequence_id - ); - if (alias_sequence) { - *alias_symbol = alias_sequence[entry.structural_child_index]; - } + if (entry.child_info_offset) { + *alias_symbol = self->language->alias_sequences[entry.child_info_offset]; } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { @@ -201,7 +190,9 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; - uint32_t structural_child_index = 0; + uint32_t child_info_offset = + self->language->max_child_info_production_length * + ts_subtree_child_info_sequence_id(*entry.subtree); for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { const Subtree *child = &entry.subtree->ptr->children[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); @@ -212,7 +203,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { .subtree = child, .position = position, .child_index = i, - .structural_child_index = structural_child_index, + .child_info_offset = child_info_offset, })); if (iterator_tree_is_visible(self)) { @@ -229,7 +220,9 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { } position = child_right; - if (!ts_subtree_extra(*child)) structural_child_index++; + if (!ts_subtree_extra(*child) && child_info_offset) { + child_info_offset++; + } } } while (did_descend); @@ -256,15 +249,17 @@ static void iterator_advance(Iterator *self) { uint32_t child_index = entry.child_index + 1; if (ts_subtree_child_count(*parent) > child_index) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t structural_child_index = entry.structural_child_index; - if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; + uint32_t child_info_offset = entry.child_info_offset; + if (child_info_offset && !ts_subtree_extra(*entry.subtree)) { + child_info_offset++; + } const Subtree *next_child = &parent->ptr->children[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, .position = position, .child_index = child_index, - .structural_child_index = structural_child_index, + .child_info_offset = child_info_offset, })); if (iterator_tree_is_visible(self)) { diff --git a/lib/src/language.c b/lib/src/language.c index 9541bba2..74a7b58d 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -3,6 +3,8 @@ #include "./error_costs.h" #include +#define LANGUAGE_VERSION_WITH_FIELDS 10 + void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { @@ -69,3 +71,39 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol return TSSymbolTypeAuxiliary; } } + +uint32_t ts_language_field_count(const TSLanguage *self) { + if (self->version >= LANGUAGE_VERSION_WITH_FIELDS) { + return self->field_count; + } else { + return 0; + } +} + +const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) { + uint32_t count = ts_language_field_count(self); + if (count) { + return self->field_names[id]; + } else { + return NULL; + } +} + +TSFieldId ts_language_field_id_for_name( + const TSLanguage *self, + const char *name, + uint32_t name_length +) { + uint32_t count = ts_language_field_count(self); + for (TSSymbol i = 1; i < count + 1; i++) { + switch (strncmp(name, self->field_names[i], name_length)) { + case 0: + return i; + case -1: + return 0; + default: + break; + } + } + return 0; +} diff --git a/lib/src/language.h b/lib/src/language.h index 0a0f108f..43a5eaa5 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -83,7 +83,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self, static inline const TSSymbol * ts_language_alias_sequence(const TSLanguage *self, unsigned id) { return id > 0 ? - self->alias_sequences + id * self->max_alias_sequence_length : + self->alias_sequences + id * self->max_child_info_production_length : NULL; } diff --git a/lib/src/node.c b/lib/src/node.c index 081ac803..8ed8355e 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -8,8 +8,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t structural_child_index; - const TSSymbol *alias_sequence; + uint32_t child_info_offset; + TSFieldId last_field_id; } NodeChildIterator; // TSNode - constructors @@ -49,19 +49,18 @@ static inline Subtree ts_node__subtree(TSNode self) { static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, 0}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( - node->tree->language, - subtree.ptr->alias_sequence_id - ); + uint32_t child_info_offset = + subtree.ptr->child_info_sequence_id * + node->tree->language->max_child_info_production_length; return (NodeChildIterator) { .tree = node->tree, .parent = subtree, .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, + .child_info_offset = child_info_offset, + .last_field_id = 0, }; } @@ -69,11 +68,10 @@ static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode * if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; const Subtree *child = &self->parent.ptr->children[self->child_index]; TSSymbol alias_symbol = 0; - if (!ts_subtree_extra(*child)) { - if (self->alias_sequence) { - alias_symbol = self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; + if (!ts_subtree_extra(*child) && self->child_info_offset) { + alias_symbol = self->tree->language->alias_sequences[self->child_info_offset]; + self->last_field_id = self->tree->language->field_sequences[self->child_info_offset]; + self->child_info_offset++; } if (self->child_index > 0) { self->position = length_add(self->position, ts_subtree_padding(*child)); @@ -453,10 +451,32 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { return ts_node__child(self, child_index, false); } -TSNode ts_node_child_by_ref(TSNode self, const char *ref_name) { +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { + if (field_id) { + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.last_field_id == field_id) { + return child; + } + } + } return ts_node__null(); } +TSNode ts_node_child_by_field_name( + TSNode self, + const char *name, + uint32_t name_length +) { + TSFieldId field_id = ts_language_field_id_for_name( + self.tree->language, + name, + name_length + ); + return ts_node_child_by_field_id(self, field_id); +} + uint32_t ts_node_child_count(TSNode self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) { diff --git a/lib/src/parser.c b/lib/src/parser.c index 85452f8d..0c4453e9 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t alias_sequence_id, bool fragile) { + uint16_t child_info_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, alias_sequence_id, self->language + symbol, &children, child_info_sequence_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->alias_sequence_id = alias_sequence_id; + parent.ptr->child_info_sequence_id = child_info_sequence_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->alias_sequence_id, + child.ptr->child_info_sequence_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .alias_sequence_id = action.params.alias_sequence_id, + .child_info_sequence_id = action.params.child_info_sequence_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.alias_sequence_id, + action.dynamic_precedence, action.child_info_sequence_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.alias_sequence_id, + action.params.dynamic_precedence, action.params.child_info_sequence_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { @@ -1526,7 +1526,10 @@ const TSLanguage *ts_parser_language(const TSParser *self) { } bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { - if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false; + if (language) { + if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false; + if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false; + } if (self->external_scanner_payload && self->language->external_scanner.destroy) { self->language->external_scanner.destroy(self->external_scanner_payload); diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 5956fb5d..557e92d7 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short alias_sequence_id; + unsigned short child_info_sequence_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 6ca00792..ec1c11ee 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned alias_sequence_id, + SubtreeArray *children, unsigned child_info_sequence_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .alias_sequence_id = alias_sequence_id, + .child_info_sequence_id = child_info_sequence_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -916,7 +916,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t structural_child_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence( language, - ts_subtree_alias_sequence_id(*self) + ts_subtree_child_info_sequence_id(*self) ); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index b0423afb..6226e4f6 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t alias_sequence_id; + uint16_t child_info_sequence_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_alias_sequence_id(Subtree self) { +static inline uint16_t ts_subtree_child_info_sequence_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->alias_sequence_id; + return self.ptr->child_info_sequence_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 5ccf4501..f6cb00b4 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -9,8 +9,7 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t structural_child_index; - const TSSymbol *alias_sequence; + uint32_t child_info_offset; } CursorChildIterator; // CursorChildIterator @@ -18,19 +17,17 @@ typedef struct { static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - last_entry->subtree->ptr->alias_sequence_id - ); + uint32_t child_info_offset = + last_entry->subtree->ptr->child_info_sequence_id * + self->tree->language->max_child_info_production_length; return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, + .child_info_offset = child_info_offset, }; } @@ -43,17 +40,17 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, .subtree = child, .position = self->position, .child_index = self->child_index, - .structural_child_index = self->structural_child_index, + .child_info_offset = self->child_info_offset, }; *visible = ts_subtree_visible(*child); bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; + if (!extra && self->child_info_offset) { + *visible |= self->tree->language->alias_sequences[self->child_info_offset]; + self->child_info_offset++; } self->position = length_add(self->position, ts_subtree_size(*child)); self->child_index++; - if (!extra) self->structural_child_index++; if (self->child_index < self->parent.ptr->child_count) { Subtree next_child = self->parent.ptr->children[self->child_index]; @@ -85,7 +82,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { ts_node_start_point(node) }, .child_index = 0, - .structural_child_index = 0, + .child_info_offset = 0, })); } @@ -176,7 +173,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { TreeCursorEntry entry = array_pop(&self->stack); CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); iterator.child_index = entry.child_index; - iterator.structural_child_index = entry.structural_child_index; + iterator.child_info_offset = entry.child_info_offset; iterator.position = entry.position; bool visible = false; @@ -207,12 +204,9 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_aliased = false; if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->alias_sequence_id - ); - is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; + is_aliased = + entry->child_info_offset && + self->tree->language->alias_sequences[entry->child_info_offset]; } if (ts_subtree_visible(*entry->subtree) || is_aliased) { self->stack.size = i + 1; @@ -226,15 +220,8 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); TSSymbol alias_symbol = 0; - if (self->stack.size > 1) { - TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->alias_sequence_id - ); - if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { - alias_symbol = alias_sequence[last_entry->structural_child_index]; - } + if (last_entry->child_info_offset) { + alias_symbol = self->tree->language->alias_sequences[last_entry->child_info_offset]; } return ts_node_new( self->tree, @@ -243,3 +230,19 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { alias_symbol ); } + +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *entry = array_back(&self->stack); + if (entry->child_info_offset) { + return self->tree->language->field_sequences[entry->child_info_offset]; + } else { + return 0; + } +} + +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { + TSFieldId id = ts_tree_cursor_current_field_id(_self); + const TreeCursor *self = (const TreeCursor *)_self; + return self->tree->language->field_names[id]; +} diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 55bdad86..f50bdb63 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -7,7 +7,7 @@ typedef struct { const Subtree *subtree; Length position; uint32_t child_index; - uint32_t structural_child_index; + uint32_t child_info_offset; } TreeCursorEntry; typedef struct { diff --git a/script/generate-bindings b/script/generate-bindings index f9299095..802f1ccf 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -12,6 +12,13 @@ bindgen \ $header_path > $output_path echo "" >> $output_path -version_constant='TREE_SITTER_LANGUAGE_VERSION' -version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3) -echo "pub const $version_constant: usize = $version_number;" >> $output_path + +defines=( + TREE_SITTER_LANGUAGE_VERSION + TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION +) + +for define in ${defines[@]}; do + define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3) + echo "pub const $define: usize = $define_value;" >> $output_path +done