From bf4e1304f87e5769b9fefa5bfd6c47bf851cd4fc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Jan 2019 12:02:17 -0800 Subject: [PATCH 01/25] Start work on new `ref` API, for giving names to nodes' children Co-Authored-By: Ayman Nadeem --- cli/src/generate/parse_grammar.rs | 5 +++ cli/src/tests/mod.rs | 1 + cli/src/tests/node_refs.rs | 62 +++++++++++++++++++++++++++++++ lib/binding/bindings.rs | 3 ++ lib/binding/lib.rs | 10 ++++- lib/include/tree_sitter/api.h | 1 + lib/src/node.c | 4 ++ 7 files changed, 85 insertions(+), 1 deletion(-) create mode 100644 cli/src/tests/node_refs.rs diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index cf2005ad..4f049572 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -26,6 +26,10 @@ enum RuleJSON { CHOICE { members: Vec, }, + REF { + value: String, + content: Box, + }, SEQ { members: Vec, }, @@ -120,6 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::PATTERN { value } => Rule::Pattern(value), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), + RuleJSON::REF { content, value } => parse_rule(*content), RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)), RuleJSON::REPEAT { content } => { diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index af2b4582..157f09a8 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,5 +1,6 @@ mod corpus_test; mod helpers; +mod node_refs; mod node_test; mod parser_test; mod properties_test; diff --git a/cli/src/tests/node_refs.rs b/cli/src/tests/node_refs.rs new file mode 100644 index 00000000..143ae7f6 --- /dev/null +++ b/cli/src/tests/node_refs.rs @@ -0,0 +1,62 @@ +use super::helpers::fixtures::get_test_language; +use crate::generate::generate_parser_for_grammar; +use tree_sitter::Parser; + +#[test] +fn test_basic_node_refs() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_grammar_with_refs", + "extras": [ + {"type": "PATTERN", "value": "\\s+"} + ], + "rules": { + "rule_a": { + "type": "SEQ", + "members": [ + { + "type": "REF", + "value": "ref_1", + "content": { + "type": "STRING", + "value": "child-1" + } + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "child-2" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "REF", + "value": "ref_2", + "content": { + "type": "STRING", + "value": "child-3" + } + } + ] + } + } + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + let language = get_test_language(&parser_name, &parser_code, None); + parser.set_language(language).unwrap(); + + let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); + let root_node = tree.root_node(); + assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0)); + assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2)); +} diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 9d1f3490..3d71f804 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -227,6 +227,9 @@ extern "C" { extern "C" { pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } +extern "C" { + pub fn ts_node_child_by_ref(arg1: TSNode, arg2: *const ::std::os::raw::c_char) -> TSNode; +} extern "C" { pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; } diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 66444a55..841f5895 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -12,7 +12,7 @@ use std::os::unix::io::AsRawFd; use regex::Regex; use serde::de::DeserializeOwned; use std::collections::HashMap; -use std::ffi::CStr; +use std::ffi::{CStr, CString}; use std::fmt; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; @@ -463,6 +463,14 @@ impl<'tree> Node<'tree> { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } + pub fn child_by_ref(&self, ref_name: &str) -> Option { + if let Ok(c_ref_name) = CString::new(ref_name) { + Self::new(unsafe { ffi::ts_node_child_by_ref(self.0, c_ref_name.as_ptr()) }) + } else { + None + } + } + pub fn child_count(&self) -> usize { unsafe { ffi::ts_node_child_count(self.0) as usize } } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 16841c8e..1fa105cd 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -119,6 +119,7 @@ bool ts_node_has_changes(TSNode); bool ts_node_has_error(TSNode); TSNode ts_node_parent(TSNode); TSNode ts_node_child(TSNode, uint32_t); +TSNode ts_node_child_by_ref(TSNode, const char *); TSNode ts_node_named_child(TSNode, uint32_t); uint32_t ts_node_child_count(TSNode); uint32_t ts_node_named_child_count(TSNode); diff --git a/lib/src/node.c b/lib/src/node.c index eb4a3121..081ac803 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -453,6 +453,10 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { return ts_node__child(self, child_index, false); } +TSNode ts_node_child_by_ref(TSNode self, const char *ref_name) { + return ts_node__null(); +} + uint32_t ts_node_child_count(TSNode self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) { From 108ca989ea372464426999ba2aae3f33a706b87d Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 23 Jan 2019 15:13:06 -0800 Subject: [PATCH 02/25] Start work on including child refs in generated parsers --- .../build_tables/build_parse_table.rs | 29 ++++++----- cli/src/generate/build_tables/item.rs | 1 + .../build_tables/minimize_parse_table.rs | 2 +- cli/src/generate/grammars.rs | 13 +++++ cli/src/generate/parse_grammar.rs | 2 +- .../prepare_grammar/flatten_grammar.rs | 51 +++++++++++++++++++ cli/src/generate/render.rs | 24 ++++----- cli/src/generate/rules.rs | 7 +++ cli/src/generate/tables.rs | 12 +++-- 9 files changed, 111 insertions(+), 30 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 5351f72e..463dca97 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,7 +6,7 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -47,7 +47,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.alias_sequences.push(Vec::new()); + self.parse_table.child_info_sequences.push(Vec::new()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -176,7 +176,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - alias_sequence_id: self.get_alias_sequence_id(item), + child_info_sequence_id: self.get_child_info_sequence_id(item), } }; @@ -645,29 +645,32 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId { - let mut alias_sequence: Vec> = item + fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId { + let mut child_info_sequence: Vec = item .production .steps .iter() - .map(|s| s.alias.clone()) + .map(|s| ChildInfo { + alias: s.alias.clone(), + child_ref: s.child_ref.clone(), + }) .collect(); - while alias_sequence.last() == Some(&None) { - alias_sequence.pop(); + while child_info_sequence.last() == Some(&ChildInfo::default()) { + child_info_sequence.pop(); } if item.production.steps.len() > self.parse_table.max_aliased_production_length { self.parse_table.max_aliased_production_length = item.production.steps.len() } if let Some(index) = self .parse_table - .alias_sequences + .child_info_sequences .iter() - .position(|seq| *seq == alias_sequence) + .position(|seq| *seq == child_info_sequence) { index } else { - self.parse_table.alias_sequences.push(alias_sequence); - self.parse_table.alias_sequences.len() - 1 + self.parse_table.child_info_sequences.push(child_info_sequence); + self.parse_table.child_info_sequences.len() - 1 } } @@ -740,7 +743,7 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - alias_sequences: Vec::new(), + child_info_sequences: Vec::new(), max_aliased_production_length: 0, }, } diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index b450bb75..0fc9c5f8 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -20,6 +20,7 @@ lazy_static! { precedence: 0, associativity: None, alias: None, + child_ref: None, }], }; } diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 9b012afe..81a153d3 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - alias_sequence_id: 0, + child_info_sequence_id: 0, symbol, .. } => { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index c9282da3..f4862449 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -54,6 +54,7 @@ pub(crate) struct ProductionStep { pub precedence: i32, pub associativity: Option, pub alias: Option, + pub child_ref: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -99,6 +100,7 @@ impl ProductionStep { precedence: 0, associativity: None, alias: None, + child_ref: None, } } @@ -108,6 +110,7 @@ impl ProductionStep { precedence, associativity, alias: self.alias, + child_ref: self.child_ref, } } @@ -120,6 +123,16 @@ impl ProductionStep { value: value.to_string(), is_named, }), + child_ref: self.child_ref, + } + } + pub(crate) fn with_child_ref(self, name: &str) -> Self { + Self { + symbol: self.symbol, + precedence: self.precedence, + associativity: self.associativity, + alias: self.alias, + child_ref: Some(name.to_string()), } } } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 4f049572..a11140ac 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::PATTERN { value } => Rule::Pattern(value), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), - RuleJSON::REF { content, value } => parse_rule(*content), + RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)), RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)), RuleJSON::REPEAT { content } => { diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 98276b7e..95071937 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -11,6 +11,7 @@ struct RuleFlattener { precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, + child_ref_stack: Vec, } impl RuleFlattener { @@ -23,6 +24,7 @@ impl RuleFlattener { precedence_stack: Vec::new(), associativity_stack: Vec::new(), alias_stack: Vec::new(), + child_ref_stack: Vec::new(), } } @@ -60,6 +62,12 @@ impl RuleFlattener { self.alias_stack.push(alias); } + let mut has_child_ref = false; + if let Some(child_ref) = params.child_ref { + has_child_ref = true; + self.child_ref_stack.push(child_ref); + } + if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() { self.production.dynamic_precedence = params.dynamic_precedence; } @@ -86,6 +94,10 @@ impl RuleFlattener { self.alias_stack.pop(); } + if has_child_ref { + self.child_ref_stack.pop(); + } + did_push } Rule::Symbol(symbol) => { @@ -94,6 +106,7 @@ impl RuleFlattener { precedence: self.precedence_stack.last().cloned().unwrap_or(0), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), + child_ref: self.child_ref_stack.last().cloned(), }); true } @@ -355,4 +368,42 @@ mod tests { }] ); } + + #[test] + fn test_flatten_grammar_with_child_refs() { + let result = flatten_variable(Variable { + name: "test".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::child_ref("first-thing".to_string(), Rule::terminal(1)), + Rule::terminal(2), + Rule::choice(vec![ + Rule::Blank, + Rule::child_ref("second-thing".to_string(), Rule::terminal(3)), + ]), + ]), + }) + .unwrap(); + + assert_eq!( + result.productions, + vec![ + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(2)) + ] + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"), + ] + }, + ] + ); + } } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 86ed3dc7..05153a0c 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -67,7 +67,7 @@ impl Generator { self.add_symbol_names_list(); self.add_symbol_metadata_list(); - if self.parse_table.alias_sequences.len() > 1 { + if self.parse_table.child_info_sequences.len() > 1 { self.add_alias_sequences(); } @@ -148,9 +148,9 @@ impl Generator { self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); } - for alias_sequence in &self.parse_table.alias_sequences { - for entry in alias_sequence { - if let Some(alias) = entry { + for child_info_sequence in &self.parse_table.child_info_sequences { + for entry in child_info_sequence { + if let Some(alias) = &entry.alias { let alias_kind = if alias.is_named { VariableType::Named } else { @@ -307,14 +307,14 @@ impl Generator { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - self.parse_table.alias_sequences.len() + self.parse_table.child_info_sequences.len() ); indent!(self); - for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) { + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) { add_line!(self, "[{}] = {{", i); indent!(self); - for (j, alias) in sequence.iter().enumerate() { - if let Some(alias) = alias { + for (j, child_info) in sequence.iter().enumerate() { + if let Some(alias) = &child_info.alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } } @@ -686,15 +686,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - alias_sequence_id, + child_info_sequence_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if alias_sequence_id != 0 { - add!(self, ", .alias_sequence_id = {}", alias_sequence_id); + if child_info_sequence_id != 0 { + add!(self, ", .alias_sequence_id = {}", child_info_sequence_id); } add!(self, ")"); } @@ -759,7 +759,7 @@ impl Generator { add_line!(self, ".lex_modes = ts_lex_modes,"); add_line!(self, ".symbol_names = ts_symbol_names,"); - if self.parse_table.alias_sequences.len() > 1 { + if self.parse_table.child_info_sequences.len() > 1 { add_line!( self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index 09a20294..f1939cb1 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -32,6 +32,7 @@ pub(crate) struct MetadataParams { pub is_active: bool, pub is_main_token: bool, pub alias: Option, + pub child_ref: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -57,6 +58,12 @@ pub(crate) enum Rule { } impl Rule { + pub fn child_ref(name: String, content: Rule) -> Self { + add_metadata(content, move |params| { + params.child_ref = Some(name); + }) + } + pub fn alias(content: Rule, value: String, is_named: bool) -> Self { add_metadata(content, move |params| { params.alias = Some(Alias { is_named, value }); diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 6c3da68e..f798544b 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -2,7 +2,7 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; use hashbrown::HashMap; -pub(crate) type AliasSequenceId = usize; +pub(crate) type ChildInfoSequenceId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -21,7 +21,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - alias_sequence_id: AliasSequenceId, + child_info_sequence_id: ChildInfoSequenceId, }, } @@ -39,11 +39,17 @@ pub(crate) struct ParseState { pub unfinished_item_signature: u64, } +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct ChildInfo { + pub alias: Option, + pub child_ref: Option, +} + #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub alias_sequences: Vec>>, + pub child_info_sequences: Vec>, pub max_aliased_production_length: usize, } From 7f66d2406fc68358c8e3cff62a86ae983f676fe4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Feb 2019 12:28:48 -0800 Subject: [PATCH 03/25] test script: Tell cargo which package has the tests --- script/test | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/script/test b/script/test index 09cf9f83..066778b7 100755 --- a/script/test +++ b/script/test @@ -76,8 +76,8 @@ else fi if [[ "${mode}" == "debug" ]]; then - test_binary=$(cargo test --no-run --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]') - lldb "${test_binary}" -- $top_level_filter + test_binary=$(cargo test --no-run --package=tree-sitter-cli --lib --message-format=json 2> /dev/null | jq -rs '.[-1].filenames[0]') + lldb "${test_binary}" -- "${top_level_filter}" else - cargo test --jobs 1 $top_level_filter -- --nocapture + cargo test --package=tree-sitter-cli --lib --jobs 1 $top_level_filter -- --nocapture fi From 18a13b457de3a7a258445affd4c24ad227e30a21 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Feb 2019 12:29:20 -0800 Subject: [PATCH 04/25] Get basic field API working --- .../build_tables/build_parse_table.rs | 17 +- cli/src/generate/build_tables/item.rs | 2 +- cli/src/generate/dsl.js | 9 + cli/src/generate/grammars.rs | 12 +- cli/src/generate/parse_grammar.rs | 6 +- .../prepare_grammar/flatten_grammar.rs | 30 +-- cli/src/generate/render.rs | 174 +++++++++++++----- cli/src/generate/rules.rs | 6 +- cli/src/generate/tables.rs | 4 +- cli/src/parse.rs | 8 +- cli/src/tests/mod.rs | 1 - cli/src/tests/node_test.rs | 59 ++++++ lib/binding/bindings.rs | 37 +++- lib/binding/lib.rs | 60 ++++-- lib/include/tree_sitter/api.h | 12 +- lib/include/tree_sitter/parser.h | 8 +- lib/src/get_changed_ranges.c | 43 ++--- lib/src/language.c | 38 ++++ lib/src/language.h | 2 +- lib/src/node.c | 50 +++-- lib/src/parser.c | 19 +- lib/src/reduce_action.h | 2 +- lib/src/subtree.c | 10 +- lib/src/subtree.h | 6 +- lib/src/tree_cursor.c | 63 ++++--- lib/src/tree_cursor.h | 2 +- script/generate-bindings | 13 +- 27 files changed, 498 insertions(+), 195 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 463dca97..7a111622 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -4,9 +4,10 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; -use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType}; +use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfoSequenceId, ChildInfo, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, + ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -652,14 +653,14 @@ impl<'a> ParseTableBuilder<'a> { .iter() .map(|s| ChildInfo { alias: s.alias.clone(), - child_ref: s.child_ref.clone(), + field_name: s.field_name.clone(), }) .collect(); while child_info_sequence.last() == Some(&ChildInfo::default()) { child_info_sequence.pop(); } - if item.production.steps.len() > self.parse_table.max_aliased_production_length { - self.parse_table.max_aliased_production_length = item.production.steps.len() + if item.production.steps.len() > self.parse_table.max_production_length_with_child_info { + self.parse_table.max_production_length_with_child_info = item.production.steps.len() } if let Some(index) = self .parse_table @@ -669,7 +670,9 @@ impl<'a> ParseTableBuilder<'a> { { index } else { - self.parse_table.child_info_sequences.push(child_info_sequence); + self.parse_table + .child_info_sequences + .push(child_info_sequence); self.parse_table.child_info_sequences.len() - 1 } } @@ -744,7 +747,7 @@ pub(crate) fn build_parse_table( states: Vec::new(), symbols: Vec::new(), child_info_sequences: Vec::new(), - max_aliased_production_length: 0, + max_production_length_with_child_info: 0, }, } .build()?; diff --git a/cli/src/generate/build_tables/item.rs b/cli/src/generate/build_tables/item.rs index 0fc9c5f8..0d7a4e29 100644 --- a/cli/src/generate/build_tables/item.rs +++ b/cli/src/generate/build_tables/item.rs @@ -20,7 +20,7 @@ lazy_static! { precedence: 0, associativity: None, alias: None, - child_ref: None, + field_name: None, }], }; } diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index c18ac530..428fc604 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -34,6 +34,14 @@ function blank() { }; } +function field(name, rule) { + return { + type: "FIELD", + name: name, + content: rule + } +} + function choice(...elements) { return { type: "CHOICE", @@ -363,6 +371,7 @@ global.seq = seq; global.sym = sym; global.token = token; global.grammar = grammar; +global.field = field; const result = require(process.env.TREE_SITTER_GRAMMAR_PATH); console.log(JSON.stringify(result, null, 2)); diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index f4862449..7f9e09d6 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -54,7 +54,7 @@ pub(crate) struct ProductionStep { pub precedence: i32, pub associativity: Option, pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Clone, Debug, PartialEq, Eq)] @@ -100,7 +100,7 @@ impl ProductionStep { precedence: 0, associativity: None, alias: None, - child_ref: None, + field_name: None, } } @@ -110,7 +110,7 @@ impl ProductionStep { precedence, associativity, alias: self.alias, - child_ref: self.child_ref, + field_name: self.field_name, } } @@ -123,16 +123,16 @@ impl ProductionStep { value: value.to_string(), is_named, }), - child_ref: self.child_ref, + field_name: self.field_name, } } - pub(crate) fn with_child_ref(self, name: &str) -> Self { + pub(crate) fn with_field_name(self, name: &str) -> Self { Self { symbol: self.symbol, precedence: self.precedence, associativity: self.associativity, alias: self.alias, - child_ref: Some(name.to_string()), + field_name: Some(name.to_string()), } } } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index a11140ac..5b244c87 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -26,8 +26,8 @@ enum RuleJSON { CHOICE { members: Vec, }, - REF { - value: String, + FIELD { + name: String, content: Box, }, SEQ { @@ -124,7 +124,7 @@ fn parse_rule(json: RuleJSON) -> Rule { RuleJSON::PATTERN { value } => Rule::Pattern(value), RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name), RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()), - RuleJSON::REF { content, value } => Rule::child_ref(value, parse_rule(*content)), + RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)), RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()), RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)), RuleJSON::REPEAT { content } => { diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 95071937..1c050a6b 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -11,7 +11,7 @@ struct RuleFlattener { precedence_stack: Vec, associativity_stack: Vec, alias_stack: Vec, - child_ref_stack: Vec, + field_name_stack: Vec, } impl RuleFlattener { @@ -24,7 +24,7 @@ impl RuleFlattener { precedence_stack: Vec::new(), associativity_stack: Vec::new(), alias_stack: Vec::new(), - child_ref_stack: Vec::new(), + field_name_stack: Vec::new(), } } @@ -62,10 +62,10 @@ impl RuleFlattener { self.alias_stack.push(alias); } - let mut has_child_ref = false; - if let Some(child_ref) = params.child_ref { - has_child_ref = true; - self.child_ref_stack.push(child_ref); + let mut has_field_name = false; + if let Some(field_name) = params.field_name { + has_field_name = true; + self.field_name_stack.push(field_name); } if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() { @@ -94,8 +94,8 @@ impl RuleFlattener { self.alias_stack.pop(); } - if has_child_ref { - self.child_ref_stack.pop(); + if has_field_name { + self.field_name_stack.pop(); } did_push @@ -106,7 +106,7 @@ impl RuleFlattener { precedence: self.precedence_stack.last().cloned().unwrap_or(0), associativity: self.associativity_stack.last().cloned(), alias: self.alias_stack.last().cloned(), - child_ref: self.child_ref_stack.last().cloned(), + field_name: self.field_name_stack.last().cloned(), }); true } @@ -370,16 +370,16 @@ mod tests { } #[test] - fn test_flatten_grammar_with_child_refs() { + fn test_flatten_grammar_with_field_names() { let result = flatten_variable(Variable { name: "test".to_string(), kind: VariableType::Named, rule: Rule::seq(vec![ - Rule::child_ref("first-thing".to_string(), Rule::terminal(1)), + Rule::field("first-thing".to_string(), Rule::terminal(1)), Rule::terminal(2), Rule::choice(vec![ Rule::Blank, - Rule::child_ref("second-thing".to_string(), Rule::terminal(3)), + Rule::field("second-thing".to_string(), Rule::terminal(3)), ]), ]), }) @@ -391,16 +391,16 @@ mod tests { Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), ProductionStep::new(Symbol::terminal(2)) ] }, Production { dynamic_precedence: 0, steps: vec![ - ProductionStep::new(Symbol::terminal(1)).with_child_ref("first-thing"), + ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"), ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_child_ref("second-thing"), + ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"), ] }, ] diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 05153a0c..089edb79 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -6,6 +6,7 @@ use core::ops::Range; use hashbrown::{HashMap, HashSet}; use std::fmt::Write; use std::mem::swap; +use tree_sitter::LANGUAGE_VERSION; macro_rules! add { ($this: tt, $($arg: tt)*) => {{ @@ -56,10 +57,12 @@ struct Generator { alias_ids: HashMap, external_scanner_states: Vec>, alias_map: HashMap>, + field_names: Vec, } impl Generator { fn generate(mut self) -> String { + self.init(); self.add_includes(); self.add_pragmas(); self.add_stats(); @@ -68,7 +71,11 @@ impl Generator { self.add_symbol_metadata_list(); if self.parse_table.child_info_sequences.len() > 1 { - self.add_alias_sequences(); + if !self.field_names.is_empty() { + self.add_field_name_enum(); + } + self.add_field_name_names_list(); + self.add_child_info_sequences(); } let mut main_lex_table = LexTable::default(); @@ -95,6 +102,49 @@ impl Generator { self.buffer } + fn init(&mut self) { + let mut symbol_identifiers = HashSet::new(); + for i in 0..self.parse_table.symbols.len() { + self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); + } + + let mut field_names = Vec::new(); + for child_info_sequence in &self.parse_table.child_info_sequences { + for entry in child_info_sequence { + if let Some(field_name) = &entry.field_name { + field_names.push(field_name); + } + + if let Some(alias) = &entry.alias { + let alias_kind = if alias.is_named { + VariableType::Named + } else { + VariableType::Anonymous + }; + let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { + let (name, kind) = self.metadata_for_symbol(*symbol); + name == alias.value && kind == alias_kind + }); + let alias_id = if let Some(symbol) = matching_symbol { + self.symbol_ids[&symbol].clone() + } else if alias.is_named { + format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) + } else { + format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) + }; + self.alias_ids.entry(alias.clone()).or_insert(alias_id); + self.alias_map + .entry(alias.clone()) + .or_insert(matching_symbol); + } + } + } + + field_names.sort_unstable(); + field_names.dedup(); + self.field_names = field_names.into_iter().cloned().collect(); + } + fn add_includes(&mut self) { add_line!(self, "#include "); add_line!(self, ""); @@ -143,39 +193,7 @@ impl Generator { }) .count(); - let mut symbol_identifiers = HashSet::new(); - for i in 0..self.parse_table.symbols.len() { - self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers); - } - - for child_info_sequence in &self.parse_table.child_info_sequences { - for entry in child_info_sequence { - if let Some(alias) = &entry.alias { - let alias_kind = if alias.is_named { - VariableType::Named - } else { - VariableType::Anonymous - }; - let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| { - let (name, kind) = self.metadata_for_symbol(*symbol); - name == alias.value && kind == alias_kind - }); - let alias_id = if let Some(symbol) = matching_symbol { - self.symbol_ids[&symbol].clone() - } else if alias.is_named { - format!("alias_sym_{}", self.sanitize_identifier(&alias.value)) - } else { - format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value)) - }; - self.alias_ids.entry(alias.clone()).or_insert(alias_id); - self.alias_map - .entry(alias.clone()) - .or_insert(matching_symbol); - } - } - } - - add_line!(self, "#define LANGUAGE_VERSION {}", 9); + add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION); add_line!( self, "#define STATE_COUNT {}", @@ -197,10 +215,11 @@ impl Generator { "#define EXTERNAL_TOKEN_COUNT {}", self.syntax_grammar.external_tokens.len() ); + add_line!(self, "#define FIELD_COUNT {}", self.field_names.len()); add_line!( self, - "#define MAX_ALIAS_SEQUENCE_LENGTH {}", - self.parse_table.max_aliased_production_length + "#define MAX_CHILD_INFO_PRODUCTION_LENGTH {}", + self.parse_table.max_production_length_with_child_info ); add_line!(self, ""); } @@ -253,6 +272,34 @@ impl Generator { add_line!(self, ""); } + fn add_field_name_enum(&mut self) { + add_line!(self, "enum {{"); + indent!(self); + for (i, field_name) in self.field_names.iter().enumerate() { + add_line!(self, "{} = {},", self.field_id(field_name), i + 1); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + + fn add_field_name_names_list(&mut self) { + add_line!(self, "static const char *ts_field_names[] = {{"); + indent!(self); + add_line!(self, "[0] = NULL,"); + for field_name in &self.field_names { + add_line!( + self, + "[{}] = \"{}\",", + self.field_id(field_name), + field_name + ); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); + } + fn add_symbol_metadata_list(&mut self) { add_line!( self, @@ -303,14 +350,18 @@ impl Generator { add_line!(self, ""); } - fn add_alias_sequences(&mut self) { + fn add_child_info_sequences(&mut self) { add_line!( self, - "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", + "static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", self.parse_table.child_info_sequences.len() ); indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate().skip(1) { + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { + if sequence.iter().all(|i| i.alias.is_none()) { + continue; + } + add_line!(self, "[{}] = {{", i); indent!(self); for (j, child_info) in sequence.iter().enumerate() { @@ -324,6 +375,31 @@ impl Generator { dedent!(self); add_line!(self, "}};"); add_line!(self, ""); + + add_line!( + self, + "static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", + self.parse_table.child_info_sequences.len() + ); + indent!(self); + for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { + if sequence.iter().all(|i| i.field_name.is_none()) { + continue; + } + + add_line!(self, "[{}] = {{", i); + indent!(self); + for (j, child_info) in sequence.iter().enumerate() { + if let Some(field_name) = &child_info.field_name { + add_line!(self, "[{}] = {},", j, self.field_id(&field_name)); + } + } + dedent!(self); + add_line!(self, "}},"); + } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); } fn add_lex_function(&mut self, name: &str, lex_table: LexTable) { @@ -694,7 +770,11 @@ impl Generator { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } if child_info_sequence_id != 0 { - add!(self, ", .alias_sequence_id = {}", child_info_sequence_id); + add!( + self, + ", .child_info_sequence_id = {}", + child_info_sequence_id + ); } add!(self, ")"); } @@ -764,11 +844,18 @@ impl Generator { self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); + + add_line!(self, ".field_count = FIELD_COUNT,"); + add_line!( + self, + ".field_sequences = (const TSFieldId *)ts_field_sequences," + ); + add_line!(self, ".field_names = ts_field_names,"); } add_line!( self, - ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," + ".max_child_info_production_length = MAX_CHILD_INFO_PRODUCTION_LENGTH," ); add_line!(self, ".lex_fn = ts_lex,"); @@ -865,6 +952,10 @@ impl Generator { self.symbol_ids.insert(symbol, id); } + fn field_id(&self, field_name: &String) -> String { + format!("field_id_{}", field_name) + } + fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { match symbol.kind { SymbolType::End => ("end", VariableType::Hidden), @@ -996,6 +1087,7 @@ pub(crate) fn render_c_code( alias_ids: HashMap::new(), external_scanner_states: Vec::new(), alias_map: HashMap::new(), + field_names: Vec::new(), } .generate() } diff --git a/cli/src/generate/rules.rs b/cli/src/generate/rules.rs index f1939cb1..174e06e5 100644 --- a/cli/src/generate/rules.rs +++ b/cli/src/generate/rules.rs @@ -32,7 +32,7 @@ pub(crate) struct MetadataParams { pub is_active: bool, pub is_main_token: bool, pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)] @@ -58,9 +58,9 @@ pub(crate) enum Rule { } impl Rule { - pub fn child_ref(name: String, content: Rule) -> Self { + pub fn field(name: String, content: Rule) -> Self { add_metadata(content, move |params| { - params.child_ref = Some(name); + params.field_name = Some(name); }) } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index f798544b..fc1ad642 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -42,7 +42,7 @@ pub(crate) struct ParseState { #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct ChildInfo { pub alias: Option, - pub child_ref: Option, + pub field_name: Option, } #[derive(Debug, PartialEq, Eq)] @@ -50,7 +50,7 @@ pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, pub child_info_sequences: Vec>, - pub max_aliased_production_length: usize, + pub max_production_length_with_child_info: usize, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/cli/src/parse.rs b/cli/src/parse.rs index bd134457..f7961754 100644 --- a/cli/src/parse.rs +++ b/cli/src/parse.rs @@ -49,9 +49,8 @@ pub fn parse_file_at_path( let mut did_visit_children = false; loop { let node = cursor.node(); - let is_named = node.is_named(); if did_visit_children { - if is_named { + if node.is_named() { stdout.write(b")")?; needs_newline = true; } @@ -64,13 +63,16 @@ pub fn parse_file_at_path( break; } } else { - if is_named { + if node.is_named() { if needs_newline { stdout.write(b"\n")?; } for _ in 0..indent_level { stdout.write(b" ")?; } + if let Some(field_name) = cursor.field_name() { + write!(&mut stdout, "{}: ", field_name)?; + } let start = node.start_position(); let end = node.end_position(); write!( diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 157f09a8..af2b4582 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -1,6 +1,5 @@ mod corpus_test; mod helpers; -mod node_refs; mod node_test; mod parser_test; mod properties_test; diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index e7501569..dc4bb7a2 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -338,6 +338,65 @@ fn test_node_edit() { } } +#[test] +fn test_node_field_names() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_grammar_with_refs", + "extras": [ + {"type": "PATTERN", "value": "\\s+"} + ], + "rules": { + "rule_a": { + "type": "SEQ", + "members": [ + { + "type": "FIELD", + "name": "field_1", + "content": { + "type": "STRING", + "value": "child-1" + } + }, + { + "type": "CHOICE", + "members": [ + { + "type": "STRING", + "value": "child-2" + }, + { + "type": "BLANK" + } + ] + }, + { + "type": "FIELD", + "name": "field_2", + "content": { + "type": "STRING", + "value": "child-3" + } + } + ] + } + } + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + let language = get_test_language(&parser_name, &parser_code, None); + parser.set_language(language).unwrap(); + + let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); + let root_node = tree.root_node(); + assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); + assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); +} + fn get_all_nodes(tree: &Tree) -> Vec { let mut result = Vec::new(); let mut visited_children = false; diff --git a/lib/binding/bindings.rs b/lib/binding/bindings.rs index 3d71f804..3e12619b 100644 --- a/lib/binding/bindings.rs +++ b/lib/binding/bindings.rs @@ -3,6 +3,7 @@ pub type __darwin_size_t = ::std::os::raw::c_ulong; pub type FILE = [u64; 19usize]; pub type TSSymbol = u16; +pub type TSFieldId = u16; #[repr(C)] #[derive(Debug, Copy, Clone)] pub struct TSLanguage { @@ -228,7 +229,14 @@ extern "C" { pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode; } extern "C" { - pub fn ts_node_child_by_ref(arg1: TSNode, arg2: *const ::std::os::raw::c_char) -> TSNode; + pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode; +} +extern "C" { + pub fn ts_node_child_by_field_name( + arg1: TSNode, + arg2: *const ::std::os::raw::c_char, + arg3: u32, + ) -> TSNode; } extern "C" { pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode; @@ -289,6 +297,14 @@ extern "C" { extern "C" { pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; } +extern "C" { + pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId; +} +extern "C" { + pub fn ts_tree_cursor_current_field_name( + arg1: *const TSTreeCursor, + ) -> *const ::std::os::raw::c_char; +} extern "C" { pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool; } @@ -316,6 +332,22 @@ extern "C" { arg2: *const ::std::os::raw::c_char, ) -> TSSymbol; } +extern "C" { + pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32; +} +extern "C" { + pub fn ts_language_field_name_for_id( + arg1: *const TSLanguage, + arg2: TSFieldId, + ) -> *const ::std::os::raw::c_char; +} +extern "C" { + pub fn ts_language_field_id_for_name( + arg1: *const TSLanguage, + arg2: *const ::std::os::raw::c_char, + arg3: u32, + ) -> TSFieldId; +} extern "C" { pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType; } @@ -323,4 +355,5 @@ extern "C" { pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } -pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9; +pub const TREE_SITTER_LANGUAGE_VERSION: usize = 10; +pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9; diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 841f5895..c5738608 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -12,15 +12,12 @@ use std::os::unix::io::AsRawFd; use regex::Regex; use serde::de::DeserializeOwned; use std::collections::HashMap; -use std::ffi::{CStr, CString}; -use std::fmt; +use std::ffi::CStr; use std::marker::PhantomData; use std::os::raw::{c_char, c_void}; -use std::ptr; -use std::slice; -use std::str; -use std::u16; +use std::{fmt, ptr, slice, str, u16}; +pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION; pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h"); #[derive(Clone, Copy)] @@ -157,15 +154,21 @@ impl Parser { pub fn set_language(&mut self, language: Language) -> Result<(), String> { unsafe { let version = ffi::ts_language_version(language.0) as usize; - if version == ffi::TREE_SITTER_LANGUAGE_VERSION { - ffi::ts_parser_set_language(self.0, language.0); - Ok(()) - } else { + if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION { + Err(format!( + "Incompatible language version {}. Expected {} or greater.", + version, + ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION + )) + } else if version > ffi::TREE_SITTER_LANGUAGE_VERSION { Err(format!( "Incompatible language version {}. Expected {}.", version, ffi::TREE_SITTER_LANGUAGE_VERSION )) + } else { + ffi::ts_parser_set_language(self.0, language.0); + Ok(()) } } } @@ -463,12 +466,15 @@ impl<'tree> Node<'tree> { Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) }) } - pub fn child_by_ref(&self, ref_name: &str) -> Option { - if let Ok(c_ref_name) = CString::new(ref_name) { - Self::new(unsafe { ffi::ts_node_child_by_ref(self.0, c_ref_name.as_ptr()) }) - } else { - None - } + pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option { + let field_name = field_name.as_ref(); + Self::new(unsafe { + ffi::ts_node_child_by_field_name( + self.0, + field_name.as_ptr() as *const c_char, + field_name.len() as u32, + ) + }) } pub fn child_count(&self) -> usize { @@ -587,6 +593,28 @@ impl<'a> TreeCursor<'a> { ) } + pub fn field_id(&self) -> Option { + unsafe { + let id = ffi::ts_tree_cursor_current_field_id(&self.0); + if id == 0 { + None + } else { + Some(id) + } + } + } + + pub fn field_name(&self) -> Option<&str> { + unsafe { + let ptr = ffi::ts_tree_cursor_current_field_name(&self.0); + if ptr.is_null() { + None + } else { + Some(CStr::from_ptr(ptr).to_str().unwrap()) + } + } + } + pub fn goto_first_child(&mut self) -> bool { return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; } diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1fa105cd..dddf7c09 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -10,9 +10,11 @@ extern "C" { #include #include -#define TREE_SITTER_LANGUAGE_VERSION 9 +#define TREE_SITTER_LANGUAGE_VERSION 10 +#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9 typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; typedef struct TSParser TSParser; typedef struct TSTree TSTree; @@ -119,7 +121,8 @@ bool ts_node_has_changes(TSNode); bool ts_node_has_error(TSNode); TSNode ts_node_parent(TSNode); TSNode ts_node_child(TSNode, uint32_t); -TSNode ts_node_child_by_ref(TSNode, const char *); +TSNode ts_node_child_by_field_id(TSNode, TSFieldId); +TSNode ts_node_child_by_field_name(TSNode, const char *, uint32_t); TSNode ts_node_named_child(TSNode, uint32_t); uint32_t ts_node_child_count(TSNode); uint32_t ts_node_named_child_count(TSNode); @@ -139,6 +142,8 @@ TSTreeCursor ts_tree_cursor_new(TSNode); void ts_tree_cursor_delete(TSTreeCursor *); void ts_tree_cursor_reset(TSTreeCursor *, TSNode); TSNode ts_tree_cursor_current_node(const TSTreeCursor *); +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *); +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *); bool ts_tree_cursor_goto_parent(TSTreeCursor *); bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); bool ts_tree_cursor_goto_first_child(TSTreeCursor *); @@ -147,6 +152,9 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t); uint32_t ts_language_symbol_count(const TSLanguage *); const char *ts_language_symbol_name(const TSLanguage *, TSSymbol); TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *); +uint32_t ts_language_field_count(const TSLanguage *); +const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId); +TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t); TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol); uint32_t ts_language_version(const TSLanguage *); diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index e5037062..d9d63614 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -15,6 +15,7 @@ extern "C" { #ifndef TREE_SITTER_API_H_ typedef uint16_t TSSymbol; +typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; #endif @@ -54,7 +55,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t alias_sequence_id; + uint8_t child_info_sequence_id; }; } params; TSParseActionType type : 4; @@ -85,7 +86,7 @@ struct TSLanguage { const TSParseActionEntry *parse_actions; const TSLexMode *lex_modes; const TSSymbol *alias_sequences; - uint16_t max_alias_sequence_length; + uint16_t max_child_info_production_length; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; @@ -98,6 +99,9 @@ struct TSLanguage { unsigned (*serialize)(void *, char *); void (*deserialize)(void *, const char *, unsigned); } external_scanner; + uint32_t field_count; + const TSFieldId *field_sequences; + const char **field_names; }; /* diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 8eb89d46..fad30e84 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -108,7 +108,7 @@ static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLa .subtree = tree, .position = length_zero(), .child_index = 0, - .structural_child_index = 0, + .child_info_offset = 0, })); return (Iterator) { .cursor = *cursor, @@ -144,15 +144,11 @@ Length iterator_end_position(Iterator *self) { static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (ts_subtree_visible(*entry.subtree)) return true; - if (self->cursor.stack.size > 1) { - Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->language, - parent.ptr->alias_sequence_id - ); - return alias_sequence && alias_sequence[entry.structural_child_index] != 0; + if (entry.child_info_offset) { + return self->language->alias_sequences[entry.child_info_offset] != 0; + } else { + return false; } - return false; } static void iterator_get_visible_state(const Iterator *self, Subtree *tree, @@ -167,15 +163,8 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, for (; i + 1 > 0; i--) { TreeCursorEntry entry = self->cursor.stack.contents[i]; - if (i > 0) { - const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->language, - parent->ptr->alias_sequence_id - ); - if (alias_sequence) { - *alias_symbol = alias_sequence[entry.structural_child_index]; - } + if (entry.child_info_offset) { + *alias_symbol = self->language->alias_sequences[entry.child_info_offset]; } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { @@ -201,7 +190,9 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; - uint32_t structural_child_index = 0; + uint32_t child_info_offset = + self->language->max_child_info_production_length * + ts_subtree_child_info_sequence_id(*entry.subtree); for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { const Subtree *child = &entry.subtree->ptr->children[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); @@ -212,7 +203,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { .subtree = child, .position = position, .child_index = i, - .structural_child_index = structural_child_index, + .child_info_offset = child_info_offset, })); if (iterator_tree_is_visible(self)) { @@ -229,7 +220,9 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { } position = child_right; - if (!ts_subtree_extra(*child)) structural_child_index++; + if (!ts_subtree_extra(*child) && child_info_offset) { + child_info_offset++; + } } } while (did_descend); @@ -256,15 +249,17 @@ static void iterator_advance(Iterator *self) { uint32_t child_index = entry.child_index + 1; if (ts_subtree_child_count(*parent) > child_index) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t structural_child_index = entry.structural_child_index; - if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; + uint32_t child_info_offset = entry.child_info_offset; + if (child_info_offset && !ts_subtree_extra(*entry.subtree)) { + child_info_offset++; + } const Subtree *next_child = &parent->ptr->children[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, .position = position, .child_index = child_index, - .structural_child_index = structural_child_index, + .child_info_offset = child_info_offset, })); if (iterator_tree_is_visible(self)) { diff --git a/lib/src/language.c b/lib/src/language.c index 9541bba2..74a7b58d 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -3,6 +3,8 @@ #include "./error_costs.h" #include +#define LANGUAGE_VERSION_WITH_FIELDS 10 + void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { @@ -69,3 +71,39 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol return TSSymbolTypeAuxiliary; } } + +uint32_t ts_language_field_count(const TSLanguage *self) { + if (self->version >= LANGUAGE_VERSION_WITH_FIELDS) { + return self->field_count; + } else { + return 0; + } +} + +const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) { + uint32_t count = ts_language_field_count(self); + if (count) { + return self->field_names[id]; + } else { + return NULL; + } +} + +TSFieldId ts_language_field_id_for_name( + const TSLanguage *self, + const char *name, + uint32_t name_length +) { + uint32_t count = ts_language_field_count(self); + for (TSSymbol i = 1; i < count + 1; i++) { + switch (strncmp(name, self->field_names[i], name_length)) { + case 0: + return i; + case -1: + return 0; + default: + break; + } + } + return 0; +} diff --git a/lib/src/language.h b/lib/src/language.h index 0a0f108f..43a5eaa5 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -83,7 +83,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self, static inline const TSSymbol * ts_language_alias_sequence(const TSLanguage *self, unsigned id) { return id > 0 ? - self->alias_sequences + id * self->max_alias_sequence_length : + self->alias_sequences + id * self->max_child_info_production_length : NULL; } diff --git a/lib/src/node.c b/lib/src/node.c index 081ac803..8ed8355e 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -8,8 +8,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t structural_child_index; - const TSSymbol *alias_sequence; + uint32_t child_info_offset; + TSFieldId last_field_id; } NodeChildIterator; // TSNode - constructors @@ -49,19 +49,18 @@ static inline Subtree ts_node__subtree(TSNode self) { static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, 0}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( - node->tree->language, - subtree.ptr->alias_sequence_id - ); + uint32_t child_info_offset = + subtree.ptr->child_info_sequence_id * + node->tree->language->max_child_info_production_length; return (NodeChildIterator) { .tree = node->tree, .parent = subtree, .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, + .child_info_offset = child_info_offset, + .last_field_id = 0, }; } @@ -69,11 +68,10 @@ static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode * if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; const Subtree *child = &self->parent.ptr->children[self->child_index]; TSSymbol alias_symbol = 0; - if (!ts_subtree_extra(*child)) { - if (self->alias_sequence) { - alias_symbol = self->alias_sequence[self->structural_child_index]; - } - self->structural_child_index++; + if (!ts_subtree_extra(*child) && self->child_info_offset) { + alias_symbol = self->tree->language->alias_sequences[self->child_info_offset]; + self->last_field_id = self->tree->language->field_sequences[self->child_info_offset]; + self->child_info_offset++; } if (self->child_index > 0) { self->position = length_add(self->position, ts_subtree_padding(*child)); @@ -453,10 +451,32 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { return ts_node__child(self, child_index, false); } -TSNode ts_node_child_by_ref(TSNode self, const char *ref_name) { +TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { + if (field_id) { + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (iterator.last_field_id == field_id) { + return child; + } + } + } return ts_node__null(); } +TSNode ts_node_child_by_field_name( + TSNode self, + const char *name, + uint32_t name_length +) { + TSFieldId field_id = ts_language_field_id_for_name( + self.tree->language, + name, + name_length + ); + return ts_node_child_by_field_id(self, field_id); +} + uint32_t ts_node_child_count(TSNode self) { Subtree tree = ts_node__subtree(self); if (ts_subtree_child_count(tree) > 0) { diff --git a/lib/src/parser.c b/lib/src/parser.c index 85452f8d..0c4453e9 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t alias_sequence_id, bool fragile) { + uint16_t child_info_sequence_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, alias_sequence_id, self->language + symbol, &children, child_info_sequence_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->alias_sequence_id = alias_sequence_id; + parent.ptr->child_info_sequence_id = child_info_sequence_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->alias_sequence_id, + child.ptr->child_info_sequence_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .alias_sequence_id = action.params.alias_sequence_id, + .child_info_sequence_id = action.params.child_info_sequence_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.alias_sequence_id, + action.dynamic_precedence, action.child_info_sequence_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.alias_sequence_id, + action.params.dynamic_precedence, action.params.child_info_sequence_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { @@ -1526,7 +1526,10 @@ const TSLanguage *ts_parser_language(const TSParser *self) { } bool ts_parser_set_language(TSParser *self, const TSLanguage *language) { - if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false; + if (language) { + if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false; + if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false; + } if (self->external_scanner_payload && self->language->external_scanner.destroy) { self->language->external_scanner.destroy(self->external_scanner_payload); diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 5956fb5d..557e92d7 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short alias_sequence_id; + unsigned short child_info_sequence_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 6ca00792..ec1c11ee 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned alias_sequence_id, + SubtreeArray *children, unsigned child_info_sequence_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .alias_sequence_id = alias_sequence_id, + .child_info_sequence_id = child_info_sequence_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -916,7 +916,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t structural_child_index = 0; const TSSymbol *alias_sequence = ts_language_alias_sequence( language, - ts_subtree_alias_sequence_id(*self) + ts_subtree_child_info_sequence_id(*self) ); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index b0423afb..6226e4f6 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t alias_sequence_id; + uint16_t child_info_sequence_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_alias_sequence_id(Subtree self) { +static inline uint16_t ts_subtree_child_info_sequence_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->alias_sequence_id; + return self.ptr->child_info_sequence_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 5ccf4501..f6cb00b4 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -9,8 +9,7 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t structural_child_index; - const TSSymbol *alias_sequence; + uint32_t child_info_offset; } CursorChildIterator; // CursorChildIterator @@ -18,19 +17,17 @@ typedef struct { static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0}; } - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - last_entry->subtree->ptr->alias_sequence_id - ); + uint32_t child_info_offset = + last_entry->subtree->ptr->child_info_sequence_id * + self->tree->language->max_child_info_production_length; return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, .child_index = 0, - .structural_child_index = 0, - .alias_sequence = alias_sequence, + .child_info_offset = child_info_offset, }; } @@ -43,17 +40,17 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, .subtree = child, .position = self->position, .child_index = self->child_index, - .structural_child_index = self->structural_child_index, + .child_info_offset = self->child_info_offset, }; *visible = ts_subtree_visible(*child); bool extra = ts_subtree_extra(*child); - if (!extra && self->alias_sequence) { - *visible |= self->alias_sequence[self->structural_child_index]; + if (!extra && self->child_info_offset) { + *visible |= self->tree->language->alias_sequences[self->child_info_offset]; + self->child_info_offset++; } self->position = length_add(self->position, ts_subtree_size(*child)); self->child_index++; - if (!extra) self->structural_child_index++; if (self->child_index < self->parent.ptr->child_count) { Subtree next_child = self->parent.ptr->children[self->child_index]; @@ -85,7 +82,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { ts_node_start_point(node) }, .child_index = 0, - .structural_child_index = 0, + .child_info_offset = 0, })); } @@ -176,7 +173,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { TreeCursorEntry entry = array_pop(&self->stack); CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); iterator.child_index = entry.child_index; - iterator.structural_child_index = entry.structural_child_index; + iterator.child_info_offset = entry.child_info_offset; iterator.position = entry.position; bool visible = false; @@ -207,12 +204,9 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_aliased = false; if (i > 0) { - TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->alias_sequence_id - ); - is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; + is_aliased = + entry->child_info_offset && + self->tree->language->alias_sequences[entry->child_info_offset]; } if (ts_subtree_visible(*entry->subtree) || is_aliased) { self->stack.size = i + 1; @@ -226,15 +220,8 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); TSSymbol alias_symbol = 0; - if (self->stack.size > 1) { - TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - self->tree->language, - parent_entry->subtree->ptr->alias_sequence_id - ); - if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { - alias_symbol = alias_sequence[last_entry->structural_child_index]; - } + if (last_entry->child_info_offset) { + alias_symbol = self->tree->language->alias_sequences[last_entry->child_info_offset]; } return ts_node_new( self->tree, @@ -243,3 +230,19 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { alias_symbol ); } + +TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { + const TreeCursor *self = (const TreeCursor *)_self; + TreeCursorEntry *entry = array_back(&self->stack); + if (entry->child_info_offset) { + return self->tree->language->field_sequences[entry->child_info_offset]; + } else { + return 0; + } +} + +const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { + TSFieldId id = ts_tree_cursor_current_field_id(_self); + const TreeCursor *self = (const TreeCursor *)_self; + return self->tree->language->field_names[id]; +} diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index 55bdad86..f50bdb63 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -7,7 +7,7 @@ typedef struct { const Subtree *subtree; Length position; uint32_t child_index; - uint32_t structural_child_index; + uint32_t child_info_offset; } TreeCursorEntry; typedef struct { diff --git a/script/generate-bindings b/script/generate-bindings index f9299095..802f1ccf 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -12,6 +12,13 @@ bindgen \ $header_path > $output_path echo "" >> $output_path -version_constant='TREE_SITTER_LANGUAGE_VERSION' -version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3) -echo "pub const $version_constant: usize = $version_number;" >> $output_path + +defines=( + TREE_SITTER_LANGUAGE_VERSION + TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION +) + +for define in ${defines[@]}; do + define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3) + echo "pub const $define: usize = $define_value;" >> $output_path +done From bef80c162e2912159fc4dac0313e116fc8c81ac3 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Feb 2019 14:39:54 -0800 Subject: [PATCH 05/25] benchmarks: Use fixture languages' own copies of parser.h when compiling them --- cli/benches/benchmark.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/benches/benchmark.rs b/cli/benches/benchmark.rs index fa3a5e73..4a10da73 100644 --- a/cli/benches/benchmark.rs +++ b/cli/benches/benchmark.rs @@ -166,7 +166,8 @@ fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> us } fn get_language(name: &str) -> Language { + let src_dir = GRAMMARS_DIR.join(name).join("src"); TEST_LOADER - .load_language_at_path(name, &GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR) + .load_language_at_path(name, &src_dir, &src_dir) .unwrap() } From 1d1674811cb79b50d912f8c994e2db2a8c85ddc4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 7 Feb 2019 17:18:33 -0800 Subject: [PATCH 06/25] Fully implement ts_node_child_by_field_id --- .../build_tables/build_parse_table.rs | 90 ++++++++--- .../build_tables/minimize_parse_table.rs | 2 +- cli/src/generate/render.rs | 146 ++++++++++++------ cli/src/generate/tables.rs | 17 +- cli/src/tests/node_test.rs | 74 +++++++-- lib/include/tree_sitter/parser.h | 10 +- lib/src/get_changed_ranges.c | 43 +++--- lib/src/language.h | 23 ++- lib/src/node.c | 95 +++++++++--- lib/src/parser.c | 14 +- lib/src/reduce_action.h | 2 +- lib/src/subtree.c | 27 ++-- lib/src/subtree.h | 6 +- lib/src/tree_cursor.c | 85 +++++++--- lib/src/tree_cursor.h | 2 +- 15 files changed, 455 insertions(+), 181 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 7a111622..fc6c6003 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,14 +6,14 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, + ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, }; use core::ops::Range; use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; use std::collections::hash_map::DefaultHasher; -use std::collections::VecDeque; +use std::collections::{BTreeMap, VecDeque}; use std::u32; use std::fmt::Write; @@ -36,6 +36,7 @@ struct ParseStateQueueEntry { struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, + field_names_by_hidden_symbol: HashMap>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, state_ids_by_item_set: HashMap, ParseStateId>, @@ -48,7 +49,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.child_info_sequences.push(Vec::new()); + self.parse_table.child_infos.push(ChildInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -177,7 +178,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - child_info_sequence_id: self.get_child_info_sequence_id(item), + child_info_id: self.get_child_info_id(item), } }; @@ -646,34 +647,56 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId { - let mut child_info_sequence: Vec = item - .production - .steps - .iter() - .map(|s| ChildInfo { - alias: s.alias.clone(), - field_name: s.field_name.clone(), - }) - .collect(); - while child_info_sequence.last() == Some(&ChildInfo::default()) { - child_info_sequence.pop(); + fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId { + let mut child_info = ChildInfo { + alias_sequence: Vec::new(), + field_map: BTreeMap::new(), + }; + + for (i, step) in item.production.steps.iter().enumerate() { + child_info.alias_sequence.push(step.alias.clone()); + if let Some(field_name) = &step.field_name { + child_info + .field_map + .entry(field_name.clone()) + .or_insert(Vec::new()) + .push(FieldLocation { + index: i, + inherited: false, + }); + } + if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) { + for field_name in field_names { + child_info + .field_map + .entry(field_name.clone()) + .or_insert(Vec::new()) + .push(FieldLocation { + index: i, + inherited: true, + }); + } + } } + + while child_info.alias_sequence.last() == Some(&None) { + child_info.alias_sequence.pop(); + } + if item.production.steps.len() > self.parse_table.max_production_length_with_child_info { self.parse_table.max_production_length_with_child_info = item.production.steps.len() } + if let Some(index) = self .parse_table - .child_info_sequences + .child_infos .iter() - .position(|seq| *seq == child_info_sequence) + .position(|seq| *seq == child_info) { index } else { - self.parse_table - .child_info_sequences - .push(child_info_sequence); - self.parse_table.child_info_sequences.len() - 1 + self.parse_table.child_infos.push(child_info); + self.parse_table.child_infos.len() - 1 } } @@ -720,6 +743,26 @@ fn populate_following_tokens( } } +fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap> { + let mut result = HashMap::new(); + for (i, variable) in grammar.variables.iter().enumerate() { + let mut field_names = Vec::new(); + if variable.kind == VariableType::Hidden { + for production in &variable.productions { + for step in &production.steps { + if let Some(field_name) = &step.field_name { + if let Err(i) = field_names.binary_search(field_name) { + field_names.insert(i, field_name.clone()); + } + } + } + } + } + result.insert(Symbol::non_terminal(i), field_names); + } + result +} + pub(crate) fn build_parse_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -746,9 +789,10 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - child_info_sequences: Vec::new(), + child_infos: Vec::new(), max_production_length_with_child_info: 0, }, + field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar), } .build()?; diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index 81a153d3..f3862732 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - child_info_sequence_id: 0, + child_info_id: 0, symbol, .. } => { diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 089edb79..bc7a6aa0 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -1,7 +1,9 @@ use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType}; use super::nfa::CharacterSet; use super::rules::{Alias, AliasMap, Symbol, SymbolType}; -use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry}; +use super::tables::{ + AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry, +}; use core::ops::Range; use hashbrown::{HashMap, HashSet}; use std::fmt::Write; @@ -70,12 +72,14 @@ impl Generator { self.add_symbol_names_list(); self.add_symbol_metadata_list(); - if self.parse_table.child_info_sequences.len() > 1 { - if !self.field_names.is_empty() { - self.add_field_name_enum(); - } + if !self.field_names.is_empty() { + self.add_field_name_enum(); self.add_field_name_names_list(); - self.add_child_info_sequences(); + self.add_field_sequences(); + } + + if !self.alias_ids.is_empty() { + self.add_alias_sequences(); } let mut main_lex_table = LexTable::default(); @@ -109,13 +113,13 @@ impl Generator { } let mut field_names = Vec::new(); - for child_info_sequence in &self.parse_table.child_info_sequences { - for entry in child_info_sequence { - if let Some(field_name) = &entry.field_name { - field_names.push(field_name); - } + for child_info in &self.parse_table.child_infos { + for field_name in child_info.field_map.keys() { + field_names.push(field_name); + } - if let Some(alias) = &entry.alias { + for alias in &child_info.alias_sequence { + if let Some(alias) = &alias { let alias_kind = if alias.is_named { VariableType::Named } else { @@ -350,22 +354,22 @@ impl Generator { add_line!(self, ""); } - fn add_child_info_sequences(&mut self) { + fn add_alias_sequences(&mut self) { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", - self.parse_table.child_info_sequences.len() + self.parse_table.child_infos.len() ); indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { - if sequence.iter().all(|i| i.alias.is_none()) { + for (i, child_info) in self.parse_table.child_infos.iter().enumerate() { + if child_info.alias_sequence.is_empty() { continue; } add_line!(self, "[{}] = {{", i); indent!(self); - for (j, child_info) in sequence.iter().enumerate() { - if let Some(alias) = &child_info.alias { + for (j, alias) in child_info.alias_sequence.iter().enumerate() { + if let Some(alias) = alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } } @@ -375,28 +379,66 @@ impl Generator { dedent!(self); add_line!(self, "}};"); add_line!(self, ""); + } - add_line!( - self, - "static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", - self.parse_table.child_info_sequences.len() + fn add_field_sequences(&mut self) { + let mut flat_field_maps = vec![]; + let mut next_flat_field_map_index = self.parse_table.child_infos.len(); + self.get_field_map_id( + &Vec::new(), + &mut flat_field_maps, + &mut next_flat_field_map_index, ); - indent!(self); - for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() { - if sequence.iter().all(|i| i.field_name.is_none()) { - continue; - } - add_line!(self, "[{}] = {{", i); - indent!(self); - for (j, child_info) in sequence.iter().enumerate() { - if let Some(field_name) = &child_info.field_name { - add_line!(self, "[{}] = {},", j, self.field_id(&field_name)); + let mut field_map_ids = Vec::new(); + for child_info in &self.parse_table.child_infos { + if !child_info.field_map.is_empty() { + let mut flat_field_map = Vec::new(); + for (field_name, locations) in &child_info.field_map { + for location in locations { + flat_field_map.push((field_name.clone(), *location)); + } } + field_map_ids.push(( + self.get_field_map_id( + &flat_field_map, + &mut flat_field_maps, + &mut next_flat_field_map_index, + ), + flat_field_map.len(), + )); + } else { + field_map_ids.push((0, 0)); + } + } + + add_line!(self, "static const TSFieldMapping ts_field_map[] = {{",); + indent!(self); + + add_line!(self, "/* child info id -> (field map index, count) */"); + for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { + if length > 0 { + add_line!(self, "[{}] = {{{}, {}, 0}},", child_info_id, row_id, length); + } + } + + add!(self, "\n"); + add_line!(self, "/* field id -> child index */"); + for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { + add_line!(self, "[{}] =", row_index); + indent!(self); + for (field_name, location) in field_pairs { + add_line!( + self, + "{{{}, {}, {}}},", + self.field_id(&field_name), + location.index, + location.inherited + ); } dedent!(self); - add_line!(self, "}},"); } + dedent!(self); add_line!(self, "}};"); add_line!(self, ""); @@ -762,19 +804,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - child_info_sequence_id, + child_info_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if child_info_sequence_id != 0 { - add!( - self, - ", .child_info_sequence_id = {}", - child_info_sequence_id - ); + if child_info_id != 0 { + add!(self, ", .child_info_id = {}", child_info_id); } add!(self, ")"); } @@ -839,17 +877,17 @@ impl Generator { add_line!(self, ".lex_modes = ts_lex_modes,"); add_line!(self, ".symbol_names = ts_symbol_names,"); - if self.parse_table.child_info_sequences.len() > 1 { + if !self.alias_ids.is_empty() { add_line!( self, ".alias_sequences = (const TSSymbol *)ts_alias_sequences," ); + } - add_line!(self, ".field_count = FIELD_COUNT,"); - add_line!( - self, - ".field_sequences = (const TSFieldId *)ts_field_sequences," - ); + add_line!(self, ".field_count = FIELD_COUNT,"); + + if !self.field_names.is_empty() { + add_line!(self, ".field_map = (const TSFieldMapping *)ts_field_map,"); add_line!(self, ".field_names = ts_field_names,"); } @@ -907,6 +945,22 @@ impl Generator { result } + fn get_field_map_id( + &self, + flat_field_map: &Vec<(String, FieldLocation)>, + flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>, + next_flat_field_map_index: &mut usize, + ) -> usize { + if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) { + return *index; + } + + let result = *next_flat_field_map_index; + flat_field_maps.push((result, flat_field_map.clone())); + *next_flat_field_map_index += flat_field_map.len(); + result + } + fn get_external_scanner_state_id(&mut self, external_tokens: HashSet) -> usize { self.external_scanner_states .iter() diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index fc1ad642..99adde69 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,8 +1,9 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; use hashbrown::HashMap; +use std::collections::BTreeMap; -pub(crate) type ChildInfoSequenceId = usize; +pub(crate) type ChildInfoId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -21,7 +22,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - child_info_sequence_id: ChildInfoSequenceId, + child_info_id: ChildInfoId, }, } @@ -39,17 +40,23 @@ pub(crate) struct ParseState { pub unfinished_item_signature: u64, } +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldLocation { + pub index: usize, + pub inherited: bool, +} + #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct ChildInfo { - pub alias: Option, - pub field_name: Option, + pub alias_sequence: Vec>, + pub field_map: BTreeMap>, } #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub child_info_sequences: Vec>, + pub child_infos: Vec, pub max_production_length_with_child_info: usize, } diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index dc4bb7a2..06a6fd8f 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -343,7 +343,7 @@ fn test_node_field_names() { let (parser_name, parser_code) = generate_parser_for_grammar( r#" { - "name": "test_grammar_with_refs", + "name": "test_grammar_with_fields", "extras": [ {"type": "PATTERN", "value": "\\s+"} ], @@ -354,30 +354,54 @@ fn test_node_field_names() { { "type": "FIELD", "name": "field_1", - "content": { - "type": "STRING", - "value": "child-1" - } + "content": {"type": "STRING", "value": "child-0"} }, { "type": "CHOICE", "members": [ + {"type": "STRING", "value": "child-1"}, + {"type": "BLANK"}, + + // This isn't used in the test, but prevents `_hidden_rule1` + // from being eliminated as a unit reduction. { - "type": "STRING", - "value": "child-2" - }, - { - "type": "BLANK" + "type": "ALIAS", + "value": "x", + "named": true, + "content": { + "type": "SYMBOL", + "name": "_hidden_rule1" + } } ] }, { "type": "FIELD", "name": "field_2", - "content": { - "type": "STRING", - "value": "child-3" - } + "content": {"type": "SYMBOL", "name": "_hidden_rule1"} + }, + {"type": "SYMBOL", "name": "_hidden_rule2"} + ] + }, + + // Fields pointing to hidden nodes with a single child resolve to the child. + "_hidden_rule1": { + "type": "CHOICE", + "members": [ + {"type": "STRING", "value": "child-2"}, + {"type": "STRING", "value": "child-2.5"} + ] + }, + + // Fields within hidden nodes can be referenced through the parent node. + "_hidden_rule2": { + "type": "SEQ", + "members": [ + {"type": "STRING", "value": "child-3"}, + { + "type": "FIELD", + "name": "field_3", + "content": {"type": "STRING", "value": "child-4"} } ] } @@ -391,10 +415,30 @@ fn test_node_field_names() { let language = get_test_language(&parser_name, &parser_code, None); parser.set_language(language).unwrap(); - let tree = parser.parse("child-1 child-2 child-3", None).unwrap(); + let tree = parser.parse("child-0 child-1 child-2 child-3 child-4", None).unwrap(); let root_node = tree.root_node(); + assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); + assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4)); + + let mut cursor = root_node.walk(); + assert_eq!(cursor.field_name(), None); + cursor.goto_first_child(); + assert_eq!(cursor.node().kind(), "child-0"); + assert_eq!(cursor.field_name(), Some("field_1")); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-1"); + assert_eq!(cursor.field_name(), None); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-2"); + assert_eq!(cursor.field_name(), Some("field_2")); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-3"); + assert_eq!(cursor.field_name(), None); + cursor.goto_next_sibling(); + assert_eq!(cursor.node().kind(), "child-4"); + assert_eq!(cursor.field_name(), Some("field_3")); } fn get_all_nodes(tree: &Tree) -> Vec { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index d9d63614..d81d0271 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -19,6 +19,12 @@ typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; #endif +typedef struct { + TSFieldId field_id; + uint8_t child_index; + bool inherited; +} TSFieldMapping; + typedef uint16_t TSStateId; typedef struct { @@ -55,7 +61,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t child_info_sequence_id; + uint8_t child_info_id; }; } params; TSParseActionType type : 4; @@ -100,7 +106,7 @@ struct TSLanguage { void (*deserialize)(void *, const char *, unsigned); } external_scanner; uint32_t field_count; - const TSFieldId *field_sequences; + const TSFieldMapping *field_map; const char **field_names; }; diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index fad30e84..83331cce 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -108,7 +108,7 @@ static Iterator iterator_new(TreeCursor *cursor, const Subtree *tree, const TSLa .subtree = tree, .position = length_zero(), .child_index = 0, - .child_info_offset = 0, + .structural_child_index = 0, })); return (Iterator) { .cursor = *cursor, @@ -144,11 +144,15 @@ Length iterator_end_position(Iterator *self) { static bool iterator_tree_is_visible(const Iterator *self) { TreeCursorEntry entry = *array_back(&self->cursor.stack); if (ts_subtree_visible(*entry.subtree)) return true; - if (entry.child_info_offset) { - return self->language->alias_sequences[entry.child_info_offset] != 0; - } else { - return false; + if (self->cursor.stack.size > 1) { + Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->language, + parent.ptr->child_info_id + ); + return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } + return false; } static void iterator_get_visible_state(const Iterator *self, Subtree *tree, @@ -163,8 +167,15 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, for (; i + 1 > 0; i--) { TreeCursorEntry entry = self->cursor.stack.contents[i]; - if (entry.child_info_offset) { - *alias_symbol = self->language->alias_sequences[entry.child_info_offset]; + if (i > 0) { + const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->language, + parent->ptr->child_info_id + ); + if (alias_sequence) { + *alias_symbol = alias_sequence[entry.structural_child_index]; + } } if (ts_subtree_visible(*entry.subtree) || *alias_symbol) { @@ -190,9 +201,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { did_descend = false; TreeCursorEntry entry = *array_back(&self->cursor.stack); Length position = entry.position; - uint32_t child_info_offset = - self->language->max_child_info_production_length * - ts_subtree_child_info_sequence_id(*entry.subtree); + uint32_t structural_child_index = 0; for (uint32_t i = 0, n = ts_subtree_child_count(*entry.subtree); i < n; i++) { const Subtree *child = &entry.subtree->ptr->children[i]; Length child_left = length_add(position, ts_subtree_padding(*child)); @@ -203,7 +212,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { .subtree = child, .position = position, .child_index = i, - .child_info_offset = child_info_offset, + .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { @@ -220,9 +229,7 @@ static bool iterator_descend(Iterator *self, uint32_t goal_position) { } position = child_right; - if (!ts_subtree_extra(*child) && child_info_offset) { - child_info_offset++; - } + if (!ts_subtree_extra(*child)) structural_child_index++; } } while (did_descend); @@ -249,17 +256,15 @@ static void iterator_advance(Iterator *self) { uint32_t child_index = entry.child_index + 1; if (ts_subtree_child_count(*parent) > child_index) { Length position = length_add(entry.position, ts_subtree_total_size(*entry.subtree)); - uint32_t child_info_offset = entry.child_info_offset; - if (child_info_offset && !ts_subtree_extra(*entry.subtree)) { - child_info_offset++; - } + uint32_t structural_child_index = entry.structural_child_index; + if (!ts_subtree_extra(*entry.subtree)) structural_child_index++; const Subtree *next_child = &parent->ptr->children[child_index]; array_push(&self->cursor.stack, ((TreeCursorEntry){ .subtree = next_child, .position = position, .child_index = child_index, - .child_info_offset = child_info_offset, + .structural_child_index = structural_child_index, })); if (iterator_tree_is_visible(self)) { diff --git a/lib/src/language.h b/lib/src/language.h index 43a5eaa5..09adca62 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -81,12 +81,29 @@ ts_language_enabled_external_tokens(const TSLanguage *self, } static inline const TSSymbol * -ts_language_alias_sequence(const TSLanguage *self, unsigned id) { - return id > 0 ? - self->alias_sequences + id * self->max_child_info_production_length : +ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { + return child_info_id > 0 ? + self->alias_sequences + child_info_id * self->max_child_info_production_length : NULL; } +static inline void ts_language_field_map( + const TSLanguage *self, + uint32_t child_info_id, + const TSFieldMapping **start, + const TSFieldMapping **end +) { + // To find the field mappings for a given child info id, first index + // into the field map using the child info id directly. This 'header' + // row contains two values: + // * the index where the field mappings start + // * the number of field mappings. + const TSFieldMapping *field_map = self->field_map; + TSFieldMapping header = field_map[child_info_id]; + *start = &field_map[header.field_id]; + *end = &field_map[header.field_id] + header.child_index; +} + #ifdef __cplusplus } #endif diff --git a/lib/src/node.c b/lib/src/node.c index 8ed8355e..7ae8f115 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -8,8 +8,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t child_info_offset; - TSFieldId last_field_id; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; } NodeChildIterator; // TSNode - constructors @@ -49,29 +49,35 @@ static inline Subtree ts_node__subtree(TSNode self) { static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { Subtree subtree = ts_node__subtree(*node); if (ts_subtree_child_count(subtree) == 0) { - return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, 0}; + return (NodeChildIterator) {NULL_SUBTREE, node->tree, length_zero(), 0, 0, NULL}; } - uint32_t child_info_offset = - subtree.ptr->child_info_sequence_id * - node->tree->language->max_child_info_production_length; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + node->tree->language, + subtree.ptr->child_info_id + ); return (NodeChildIterator) { .tree = node->tree, .parent = subtree, .position = {ts_node_start_byte(*node), ts_node_start_point(*node)}, .child_index = 0, - .child_info_offset = child_info_offset, - .last_field_id = 0, + .structural_child_index = 0, + .alias_sequence = alias_sequence, }; } +static inline bool ts_node_child_iterator_done(NodeChildIterator *self) { + return self->child_index == self->parent.ptr->child_count; +} + static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) { - if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false; + if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false; const Subtree *child = &self->parent.ptr->children[self->child_index]; TSSymbol alias_symbol = 0; - if (!ts_subtree_extra(*child) && self->child_info_offset) { - alias_symbol = self->tree->language->alias_sequences[self->child_info_offset]; - self->last_field_id = self->tree->language->field_sequences[self->child_info_offset]; - self->child_info_offset++; + if (!ts_subtree_extra(*child)) { + if (self->alias_sequence) { + alias_symbol = self->alias_sequence[self->structural_child_index]; + } + self->structural_child_index++; } if (self->child_index > 0) { self->position = length_add(self->position, ts_subtree_padding(*child)); @@ -452,15 +458,68 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) { } TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { - if (field_id) { - TSNode child; - NodeChildIterator iterator = ts_node_iterate_children(&self); - while (ts_node_child_iterator_next(&iterator, &child)) { - if (iterator.last_field_id == field_id) { +recur: + if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); + + const TSFieldMapping *field_map, *field_map_end; + ts_language_field_map( + self.tree->language, + ts_node__subtree(self).ptr->child_info_id, + &field_map, + &field_map_end + ); + if (field_map == field_map_end) return ts_node__null(); + + // The field mappings are sorted by their field id. Scan all + // the mappings to find the ones for the given field id. + while (field_map->field_id < field_id) { + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + while (field_map_end[-1].field_id > field_id) { + field_map_end--; + if (field_map == field_map_end) return ts_node__null(); + } + + TSNode child; + NodeChildIterator iterator = ts_node_iterate_children(&self); + while (ts_node_child_iterator_next(&iterator, &child)) { + if (!ts_subtree_extra(ts_node__subtree(child))) { + uint32_t index = iterator.structural_child_index - 1; + if (index < field_map->child_index) continue; + + // Hidden nodes' fields are "inherited" by their visible parent. + if (field_map->inherited) { + + // If this is the *last* possible child node for this field, + // then perform a tail call to avoid recursion. + if (field_map + 1 == field_map_end) { + self = child; + goto recur; + } + + // Otherwise, descend into this child, but if that child doesn't + // contain the field, continue searching subsequent children. + else { + TSNode result = ts_node_child_by_field_id(child, field_id); + if (result.id) return result; + field_map++; + if (field_map == field_map_end) return ts_node__null(); + } + } + + else if (ts_node__is_relevant(child, true)) { return child; } + + // If the field refers to a hidden node, return its first visible + // child. + else { + return ts_node_child(child, 0); + } } } + return ts_node__null(); } diff --git a/lib/src/parser.c b/lib/src/parser.c index 0c4453e9..5fd75cd8 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t child_info_sequence_id, bool fragile) { + uint16_t child_info_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, child_info_sequence_id, self->language + symbol, &children, child_info_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->child_info_sequence_id = child_info_sequence_id; + parent.ptr->child_info_id = child_info_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->child_info_sequence_id, + child.ptr->child_info_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .child_info_sequence_id = action.params.child_info_sequence_id, + .child_info_id = action.params.child_info_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.child_info_sequence_id, + action.dynamic_precedence, action.child_info_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.child_info_sequence_id, + action.params.dynamic_precedence, action.params.child_info_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 557e92d7..9eca0327 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short child_info_sequence_id; + unsigned short child_info_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index ec1c11ee..4c93d6d0 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned child_info_sequence_id, + SubtreeArray *children, unsigned child_info_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .child_info_sequence_id = child_info_sequence_id, + .child_info_id = child_info_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_sequence_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -913,20 +913,17 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, fprintf(f, "\"]\n"); uint32_t child_start_offset = start_offset; - uint32_t structural_child_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence( - language, - ts_subtree_child_info_sequence_id(*self) - ); + uint32_t child_info_offset = + language->max_child_info_production_length * + ts_subtree_child_info_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; - if (ts_subtree_extra(*child)) { - ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f); - } else { - TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; - ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); - structural_child_index++; + TSSymbol alias_symbol = 0; + if (!ts_subtree_extra(*child) && child_info_offset) { + alias_symbol = language->alias_sequences[child_info_offset]; + child_info_offset++; } + ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f); fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i); child_start_offset += ts_subtree_total_bytes(*child); } diff --git a/lib/src/subtree.h b/lib/src/subtree.h index 6226e4f6..f32edfc2 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t child_info_sequence_id; + uint16_t child_info_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_child_info_sequence_id(Subtree self) { +static inline uint16_t ts_subtree_child_info_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->child_info_sequence_id; + return self.ptr->child_info_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index f6cb00b4..099992b9 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -9,7 +9,8 @@ typedef struct { const TSTree *tree; Length position; uint32_t child_index; - uint32_t child_info_offset; + uint32_t structural_child_index; + const TSSymbol *alias_sequence; } CursorChildIterator; // CursorChildIterator @@ -17,17 +18,19 @@ typedef struct { static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCursor *self) { TreeCursorEntry *last_entry = array_back(&self->stack); if (ts_subtree_child_count(*last_entry->subtree) == 0) { - return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0}; + return (CursorChildIterator) {NULL_SUBTREE, self->tree, length_zero(), 0, 0, NULL}; } - uint32_t child_info_offset = - last_entry->subtree->ptr->child_info_sequence_id * - self->tree->language->max_child_info_production_length; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + last_entry->subtree->ptr->child_info_id + ); return (CursorChildIterator) { .tree = self->tree, .parent = *last_entry->subtree, .position = last_entry->position, .child_index = 0, - .child_info_offset = child_info_offset, + .structural_child_index = 0, + .alias_sequence = alias_sequence, }; } @@ -40,13 +43,13 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self, .subtree = child, .position = self->position, .child_index = self->child_index, - .child_info_offset = self->child_info_offset, + .structural_child_index = self->structural_child_index, }; *visible = ts_subtree_visible(*child); bool extra = ts_subtree_extra(*child); - if (!extra && self->child_info_offset) { - *visible |= self->tree->language->alias_sequences[self->child_info_offset]; - self->child_info_offset++; + if (!extra && self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index++; } self->position = length_add(self->position, ts_subtree_size(*child)); @@ -82,7 +85,7 @@ void ts_tree_cursor_init(TreeCursor *self, TSNode node) { ts_node_start_point(node) }, .child_index = 0, - .child_info_offset = 0, + .structural_child_index = 0, })); } @@ -173,7 +176,7 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *_self) { TreeCursorEntry entry = array_pop(&self->stack); CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); iterator.child_index = entry.child_index; - iterator.child_info_offset = entry.child_info_offset; + iterator.structural_child_index = entry.structural_child_index; iterator.position = entry.position; bool visible = false; @@ -204,9 +207,12 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *entry = &self->stack.contents[i]; bool is_aliased = false; if (i > 0) { - is_aliased = - entry->child_info_offset && - self->tree->language->alias_sequences[entry->child_info_offset]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; } if (ts_subtree_visible(*entry->subtree) || is_aliased) { self->stack.size = i + 1; @@ -220,8 +226,15 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; TreeCursorEntry *last_entry = array_back(&self->stack); TSSymbol alias_symbol = 0; - if (last_entry->child_info_offset) { - alias_symbol = self->tree->language->alias_sequences[last_entry->child_info_offset]; + if (self->stack.size > 1) { + TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { + alias_symbol = alias_sequence[last_entry->structural_child_index]; + } } return ts_node_new( self->tree, @@ -233,12 +246,40 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TreeCursor *self = (const TreeCursor *)_self; - TreeCursorEntry *entry = array_back(&self->stack); - if (entry->child_info_offset) { - return self->tree->language->field_sequences[entry->child_info_offset]; - } else { - return 0; + + // Walk up the tree, visiting the current node and its invisible ancestors. + for (unsigned i = self->stack.size - 1; i > 0; i--) { + TreeCursorEntry *entry = &self->stack.contents[i]; + TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; + + // Stop walking up when another visible node is found. + if (i != self->stack.size - 1) { + if (ts_subtree_visible(*entry->subtree)) break; + const TSSymbol *alias_sequence = ts_language_alias_sequence( + self->tree->language, + parent_entry->subtree->ptr->child_info_id + ); + if (alias_sequence && alias_sequence[entry->structural_child_index]) { + break; + } + } + + const TSFieldMapping *field_map, *field_map_end; + ts_language_field_map( + self->tree->language, + parent_entry->subtree->ptr->child_info_id, + &field_map, &field_map_end + ); + + while (field_map < field_map_end) { + if ( + !field_map->inherited && + field_map->child_index == entry->structural_child_index + ) return field_map->field_id; + field_map++; + } } + return 0; } const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { diff --git a/lib/src/tree_cursor.h b/lib/src/tree_cursor.h index f50bdb63..55bdad86 100644 --- a/lib/src/tree_cursor.h +++ b/lib/src/tree_cursor.h @@ -7,7 +7,7 @@ typedef struct { const Subtree *subtree; Length position; uint32_t child_index; - uint32_t child_info_offset; + uint32_t structural_child_index; } TreeCursorEntry; typedef struct { From 51a9f14f7d2f38a6bfe6ee3008436522fcf2056e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 09:39:00 -0800 Subject: [PATCH 07/25] Ensure symbols are written in a predictable order in conflict messages --- cli/src/generate/build_tables/build_parse_table.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index fc6c6003..0554c06e 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -520,11 +520,12 @@ impl<'a> ParseTableBuilder<'a> { let mut resolution_count = 0; write!(&mut msg, "\nPossible resolutions:\n\n").unwrap(); - let shift_items = conflicting_items + let mut shift_items = conflicting_items .iter() .filter(|i| !i.is_done()) .cloned() .collect::>(); + shift_items.sort_unstable(); if actual_conflict.len() > 1 { if shift_items.len() > 0 { resolution_count += 1; From d8a2c0dda23252379952d2c868cea02b47ec2210 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 16:06:29 -0800 Subject: [PATCH 08/25] Use a separate type for storing field map headers --- cli/src/generate/render.rs | 51 ++++++++++++++++++++++---------- lib/include/tree_sitter/parser.h | 10 +++++-- lib/src/language.h | 16 ++++------ lib/src/node.c | 2 +- lib/src/tree_cursor.c | 2 +- 5 files changed, 50 insertions(+), 31 deletions(-) diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index bc7a6aa0..af167a5d 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -383,7 +383,7 @@ impl Generator { fn add_field_sequences(&mut self) { let mut flat_field_maps = vec![]; - let mut next_flat_field_map_index = self.parse_table.child_infos.len(); + let mut next_flat_field_map_index = 0; self.get_field_map_id( &Vec::new(), &mut flat_field_maps, @@ -412,29 +412,41 @@ impl Generator { } } - add_line!(self, "static const TSFieldMapping ts_field_map[] = {{",); + add_line!( + self, + "static const TSFieldMapSlice ts_field_map_slices[] = {{", + ); indent!(self); - - add_line!(self, "/* child info id -> (field map index, count) */"); for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { if length > 0 { - add_line!(self, "[{}] = {{{}, {}, 0}},", child_info_id, row_id, length); + add_line!( + self, + "[{}] = {{.index = {}, .length = {}}},", + child_info_id, + row_id, + length + ); } } + dedent!(self); + add_line!(self, "}};"); + add_line!(self, ""); - add!(self, "\n"); - add_line!(self, "/* field id -> child index */"); + add_line!( + self, + "static const TSFieldMapEntry ts_field_map_entries[] = {{", + ); + indent!(self); for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) { add_line!(self, "[{}] =", row_index); indent!(self); for (field_name, location) in field_pairs { - add_line!( - self, - "{{{}, {}, {}}},", - self.field_id(&field_name), - location.index, - location.inherited - ); + add_whitespace!(self); + add!(self, "{{{}, {}", self.field_id(&field_name), location.index); + if location.inherited { + add!(self, ", .inherited = true"); + } + add!(self, "}},\n"); } dedent!(self); } @@ -887,8 +899,15 @@ impl Generator { add_line!(self, ".field_count = FIELD_COUNT,"); if !self.field_names.is_empty() { - add_line!(self, ".field_map = (const TSFieldMapping *)ts_field_map,"); add_line!(self, ".field_names = ts_field_names,"); + add_line!( + self, + ".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices," + ); + add_line!( + self, + ".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries," + ); } add_line!( @@ -1007,7 +1026,7 @@ impl Generator { } fn field_id(&self, field_name: &String) -> String { - format!("field_id_{}", field_name) + format!("field_{}", field_name) } fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) { diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index d81d0271..9d25a40f 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -23,7 +23,12 @@ typedef struct { TSFieldId field_id; uint8_t child_index; bool inherited; -} TSFieldMapping; +} TSFieldMapEntry; + +typedef struct { + uint16_t index; + uint16_t length; +} TSFieldMapSlice; typedef uint16_t TSStateId; @@ -106,7 +111,8 @@ struct TSLanguage { void (*deserialize)(void *, const char *, unsigned); } external_scanner; uint32_t field_count; - const TSFieldMapping *field_map; + const TSFieldMapSlice *field_map_slices; + const TSFieldMapEntry *field_map_entries; const char **field_names; }; diff --git a/lib/src/language.h b/lib/src/language.h index 09adca62..908c0646 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -90,18 +90,12 @@ ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { static inline void ts_language_field_map( const TSLanguage *self, uint32_t child_info_id, - const TSFieldMapping **start, - const TSFieldMapping **end + const TSFieldMapEntry **start, + const TSFieldMapEntry **end ) { - // To find the field mappings for a given child info id, first index - // into the field map using the child info id directly. This 'header' - // row contains two values: - // * the index where the field mappings start - // * the number of field mappings. - const TSFieldMapping *field_map = self->field_map; - TSFieldMapping header = field_map[child_info_id]; - *start = &field_map[header.field_id]; - *end = &field_map[header.field_id] + header.child_index; + TSFieldMapSlice slice = self->field_map_slices[child_info_id]; + *start = &self->field_map_entries[slice.index]; + *end = &self->field_map_entries[slice.index] + slice.length; } #ifdef __cplusplus diff --git a/lib/src/node.c b/lib/src/node.c index 7ae8f115..6ac4636d 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -461,7 +461,7 @@ TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) { recur: if (!field_id || ts_node_child_count(self) == 0) return ts_node__null(); - const TSFieldMapping *field_map, *field_map_end; + const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self.tree->language, ts_node__subtree(self).ptr->child_info_id, diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 099992b9..4f3f9ae7 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -264,7 +264,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { } } - const TSFieldMapping *field_map, *field_map_end; + const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self->tree->language, parent_entry->subtree->ptr->child_info_id, From 79d90f0d3e4bf9292a8b4b4a249e2de7789acf9e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 16:14:18 -0800 Subject: [PATCH 09/25] Restore naming of alias sequence lengths Fields aren't stored in sequences now, so the max length is back to being just for aliases. --- cli/src/generate/build_tables/build_parse_table.rs | 6 +++--- cli/src/generate/render.rs | 8 ++++---- cli/src/generate/tables.rs | 2 +- lib/include/tree_sitter/parser.h | 2 +- lib/src/language.h | 2 +- lib/src/subtree.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 0554c06e..729e3f3a 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -684,8 +684,8 @@ impl<'a> ParseTableBuilder<'a> { child_info.alias_sequence.pop(); } - if item.production.steps.len() > self.parse_table.max_production_length_with_child_info { - self.parse_table.max_production_length_with_child_info = item.production.steps.len() + if item.production.steps.len() > self.parse_table.max_aliased_production_length { + self.parse_table.max_aliased_production_length = item.production.steps.len() } if let Some(index) = self @@ -791,7 +791,7 @@ pub(crate) fn build_parse_table( states: Vec::new(), symbols: Vec::new(), child_infos: Vec::new(), - max_production_length_with_child_info: 0, + max_aliased_production_length: 0, }, field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar), } diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index af167a5d..22bf655c 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -222,8 +222,8 @@ impl Generator { add_line!(self, "#define FIELD_COUNT {}", self.field_names.len()); add_line!( self, - "#define MAX_CHILD_INFO_PRODUCTION_LENGTH {}", - self.parse_table.max_production_length_with_child_info + "#define MAX_ALIAS_SEQUENCE_LENGTH {}", + self.parse_table.max_aliased_production_length ); add_line!(self, ""); } @@ -357,7 +357,7 @@ impl Generator { fn add_alias_sequences(&mut self) { add_line!( self, - "static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{", + "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", self.parse_table.child_infos.len() ); indent!(self); @@ -912,7 +912,7 @@ impl Generator { add_line!( self, - ".max_child_info_production_length = MAX_CHILD_INFO_PRODUCTION_LENGTH," + ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH," ); add_line!(self, ".lex_fn = ts_lex,"); diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 99adde69..a39ae099 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -57,7 +57,7 @@ pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, pub child_infos: Vec, - pub max_production_length_with_child_info: usize, + pub max_aliased_production_length: usize, } #[derive(Clone, Debug, PartialEq, Eq)] diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 9d25a40f..90ab6200 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -97,7 +97,7 @@ struct TSLanguage { const TSParseActionEntry *parse_actions; const TSLexMode *lex_modes; const TSSymbol *alias_sequences; - uint16_t max_child_info_production_length; + uint16_t max_alias_sequence_length; bool (*lex_fn)(TSLexer *, TSStateId); bool (*keyword_lex_fn)(TSLexer *, TSStateId); TSSymbol keyword_capture_token; diff --git a/lib/src/language.h b/lib/src/language.h index 908c0646..59c0fadc 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -83,7 +83,7 @@ ts_language_enabled_external_tokens(const TSLanguage *self, static inline const TSSymbol * ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { return child_info_id > 0 ? - self->alias_sequences + child_info_id * self->max_child_info_production_length : + self->alias_sequences + child_info_id * self->max_alias_sequence_length : NULL; } diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 4c93d6d0..3d588890 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -914,7 +914,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t child_start_offset = start_offset; uint32_t child_info_offset = - language->max_child_info_production_length * + language->max_alias_sequence_length * ts_subtree_child_info_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; From a7206b1b8b326f2a18ccd24603c2b1f0ed7fcb91 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 16:25:27 -0800 Subject: [PATCH 10/25] Add some assertions to node field test --- cli/src/tests/node_test.rs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 06a6fd8f..08080798 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -421,6 +421,8 @@ fn test_node_field_names() { assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4)); + assert_eq!(root_node.child(0).unwrap().child_by_field_name("field_1"), None); + assert_eq!(root_node.child_by_field_name("not_a_real_field"), None); let mut cursor = root_node.walk(); assert_eq!(cursor.field_name(), None); From e579e09569c909fde223b14504a087e1f022a6ee Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 17:11:44 -0800 Subject: [PATCH 11/25] Ensure interpretations are in a predictable order in conflict messages --- cli/src/generate/build_tables/build_parse_table.rs | 12 +++++------- .../associativity_missing/expected_error.txt | 4 ++-- .../conflict_in_repeat_rule/expected_error.txt | 4 ++-- .../expected_error.txt | 4 ++-- .../conflicting_precedence/expected_error.txt | 4 ++-- .../partially_resolved_conflict/expected_error.txt | 4 ++-- .../expected_error.txt | 4 ++-- 7 files changed, 17 insertions(+), 19 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 729e3f3a..d37fce33 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -443,13 +443,10 @@ impl<'a> ParseTableBuilder<'a> { .unwrap(); write!(&mut msg, "Possible interpretations:\n\n").unwrap(); - let interpretions = conflicting_items + let mut interpretions = conflicting_items .iter() - .enumerate() - .map(|(i, item)| { + .map(|item| { let mut line = String::new(); - write!(&mut line, " {}:", i + 1).unwrap(); - for preceding_symbol in preceding_symbols .iter() .take(preceding_symbols.len() - item.step_index as usize) @@ -505,8 +502,9 @@ impl<'a> ParseTableBuilder<'a> { .map(|i| i.0.chars().count()) .max() .unwrap(); - - for (line, prec_suffix) in interpretions { + interpretions.sort_unstable(); + for (i, (line, prec_suffix)) in interpretions.into_iter().enumerate() { + write!(&mut msg, " {}:", i + 1).unwrap(); msg += &line; if let Some(prec_suffix) = prec_suffix { for _ in line.chars().count()..max_interpretation_length { diff --git a/test/fixtures/test_grammars/associativity_missing/expected_error.txt b/test/fixtures/test_grammars/associativity_missing/expected_error.txt index 6f9b5824..f9cc955d 100644 --- a/test/fixtures/test_grammars/associativity_missing/expected_error.txt +++ b/test/fixtures/test_grammars/associativity_missing/expected_error.txt @@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: expression '+' (math_operation expression • '+' expression) - 2: (math_operation expression '+' expression) • '+' … + 1: (math_operation expression '+' expression) • '+' … + 2: expression '+' (math_operation expression • '+' expression) Possible resolutions: diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt index 94d1caa4..4357c5cf 100644 --- a/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule/expected_error.txt @@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: '[' (array_type_repeat1 identifier) • identifier … - 2: '[' (array_repeat1 identifier) • identifier … + 1: '[' (array_repeat1 identifier) • identifier … + 2: '[' (array_type_repeat1 identifier) • identifier … Possible resolutions: diff --git a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt index 4a81f0ef..4d0bdebc 100644 --- a/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt +++ b/test/fixtures/test_grammars/conflict_in_repeat_rule_after_external_token/expected_error.txt @@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: _program_start '[' (array_type_repeat1 identifier) • identifier … - 2: _program_start '[' (array_repeat1 identifier) • identifier … + 1: _program_start '[' (array_repeat1 identifier) • identifier … + 2: _program_start '[' (array_type_repeat1 identifier) • identifier … Possible resolutions: diff --git a/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt b/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt index ea23b072..92ae2157 100644 --- a/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt +++ b/test/fixtures/test_grammars/conflicting_precedence/expected_error.txt @@ -4,9 +4,9 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left) + 1: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left) 2: expression '+' (other_thing expression • '*' '*') (precedence: -1, associativity: Left) - 3: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left) + 3: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left) Possible resolutions: diff --git a/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt b/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt index a8699897..d5d2c1bf 100644 --- a/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt +++ b/test/fixtures/test_grammars/partially_resolved_conflict/expected_error.txt @@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: (unary_b '!' expression) • '<' … (precedence: 2) - 2: (unary_a '!' expression) • '<' … (precedence: 2) + 1: (unary_a '!' expression) • '<' … (precedence: 2) + 2: (unary_b '!' expression) • '<' … (precedence: 2) Possible resolutions: diff --git a/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt b/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt index 557f1837..281439ea 100644 --- a/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt +++ b/test/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt @@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence: Possible interpretations: - 1: identifier (function_call identifier • block) (precedence: 0, associativity: Right) - 2: identifier (expression identifier) • '{' … + 1: identifier (expression identifier) • '{' … + 2: identifier (function_call identifier • block) (precedence: 0, associativity: Right) Possible resolutions: From eb1e7af5ece7a174930a2433ed0dd26ab4a162df Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 17:11:58 -0800 Subject: [PATCH 12/25] Normalize rule argument to `field` function --- cli/src/generate/dsl.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index 428fc604..db84274f 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -38,7 +38,7 @@ function field(name, rule) { return { type: "FIELD", name: name, - content: rule + content: normalize(rule) } } From b7e38ccc96138c80353a2035126677266f21dff4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Feb 2019 17:12:08 -0800 Subject: [PATCH 13/25] Allow using fields in inlined rules --- cli/src/generate/prepare_grammar/process_inlines.rs | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index f58de63d..e067cd9e 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -114,6 +114,11 @@ impl InlinedProductionMapBuilder { inserted_step.alias = Some(alias.clone()); } } + if let Some(field_name) = removed_step.field_name { + for inserted_step in inserted_steps.iter_mut() { + inserted_step.field_name = Some(field_name.clone()); + } + } if let Some(last_inserted_step) = inserted_steps.last_mut() { if last_inserted_step.precedence == 0 { last_inserted_step.precedence = removed_step.precedence; From 56309a1c284f200bc1278fae2830c5014f1619a5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Feb 2019 11:06:18 -0800 Subject: [PATCH 14/25] Generate node-fields.json file --- .../build_tables/build_parse_table.rs | 364 ++++++++++++++++-- .../build_tables/minimize_parse_table.rs | 2 +- cli/src/generate/grammars.rs | 6 + cli/src/generate/mod.rs | 128 +++++- cli/src/generate/render.rs | 30 +- cli/src/generate/tables.rs | 30 +- lib/include/tree_sitter/parser.h | 2 +- lib/src/get_changed_ranges.c | 4 +- lib/src/language.h | 10 +- lib/src/node.c | 8 +- lib/src/parser.c | 14 +- lib/src/reduce_action.h | 2 +- lib/src/subtree.c | 10 +- lib/src/subtree.h | 6 +- lib/src/tree_cursor.c | 10 +- 15 files changed, 535 insertions(+), 91 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index d37fce33..417d5d3a 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -6,18 +6,17 @@ use crate::generate::grammars::{ }; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, - ParseTableEntry, + ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, + ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo, }; use core::ops::Range; use hashbrown::hash_map::Entry; use hashbrown::{HashMap, HashSet}; use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeMap, VecDeque}; -use std::u32; - use std::fmt::Write; use std::hash::Hasher; +use std::{mem, u32}; #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -36,7 +35,6 @@ struct ParseStateQueueEntry { struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, - field_names_by_hidden_symbol: HashMap>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, state_ids_by_item_set: HashMap, ParseStateId>, @@ -49,7 +47,7 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.child_infos.push(ChildInfo::default()); + self.parse_table.production_infos.push(ProductionInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -178,7 +176,7 @@ impl<'a> ParseTableBuilder<'a> { precedence: item.precedence(), associativity: item.associativity(), dynamic_precedence: item.production.dynamic_precedence, - child_info_id: self.get_child_info_id(item), + production_id: self.get_production_id(item), } }; @@ -646,16 +644,16 @@ impl<'a> ParseTableBuilder<'a> { } } - fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId { - let mut child_info = ChildInfo { + fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId { + let mut production_info = ProductionInfo { alias_sequence: Vec::new(), field_map: BTreeMap::new(), }; for (i, step) in item.production.steps.iter().enumerate() { - child_info.alias_sequence.push(step.alias.clone()); + production_info.alias_sequence.push(step.alias.clone()); if let Some(field_name) = &step.field_name { - child_info + production_info .field_map .entry(field_name.clone()) .or_insert(Vec::new()) @@ -664,9 +662,15 @@ impl<'a> ParseTableBuilder<'a> { inherited: false, }); } - if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) { - for field_name in field_names { - child_info + + if step.symbol.kind == SymbolType::NonTerminal + && !self.syntax_grammar.variables[step.symbol.index] + .kind + .is_visible() + { + let info = &self.parse_table.variable_info[step.symbol.index]; + for (field_name, _) in &info.fields { + production_info .field_map .entry(field_name.clone()) .or_insert(Vec::new()) @@ -678,8 +682,8 @@ impl<'a> ParseTableBuilder<'a> { } } - while child_info.alias_sequence.last() == Some(&None) { - child_info.alias_sequence.pop(); + while production_info.alias_sequence.last() == Some(&None) { + production_info.alias_sequence.pop(); } if item.production.steps.len() > self.parse_table.max_aliased_production_length { @@ -688,14 +692,14 @@ impl<'a> ParseTableBuilder<'a> { if let Some(index) = self .parse_table - .child_infos + .production_infos .iter() - .position(|seq| *seq == child_info) + .position(|seq| *seq == production_info) { index } else { - self.parse_table.child_infos.push(child_info); - self.parse_table.child_infos.len() - 1 + self.parse_table.production_infos.push(production_info); + self.parse_table.production_infos.len() - 1 } } @@ -742,23 +746,155 @@ fn populate_following_tokens( } } -fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap> { - let mut result = HashMap::new(); - for (i, variable) in grammar.variables.iter().enumerate() { - let mut field_names = Vec::new(); - if variable.kind == VariableType::Hidden { +pub(crate) fn get_variable_info( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> Vec { + let mut result = Vec::new(); + + // Determine which field names and child node types can appear directly + // within each type of node. + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + let mut info = VariableInfo { + fields: HashMap::new(), + child_types: HashSet::new(), + }; + let is_recursive = variable + .productions + .iter() + .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); + + for production in &variable.productions { + for step in &production.steps { + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(step.symbol) + }; + + if let Some(field_name) = &step.field_name { + let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { + multiple: false, + required: true, + types: HashSet::new(), + }); + field_info.multiple |= is_recursive; + field_info.types.insert(child_type.clone()); + } + + info.child_types.insert(child_type); + } + } + + for production in &variable.productions { + let production_fields: Vec<&String> = production + .steps + .iter() + .filter_map(|s| s.field_name.as_ref()) + .collect(); + for (field_name, field_info) in info.fields.iter_mut() { + if !production_fields.contains(&field_name) { + field_info.required = false; + } + } + } + + result.push(info); + } + + // Expand each node type's information recursively to inherit the properties of + // hidden children. + let mut done = false; + while !done { + done = true; + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + // Move this variable's info out of the vector so it can be modified + // while reading from other entries of the vector. + let mut variable_info = VariableInfo { + fields: HashMap::new(), + child_types: HashSet::new(), + }; + mem::swap(&mut variable_info, &mut result[i]); + for production in &variable.productions { for step in &production.steps { - if let Some(field_name) = &step.field_name { - if let Err(i) = field_names.binary_search(field_name) { - field_names.insert(i, field_name.clone()); + if step.symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[step.symbol.index] + .kind + .is_visible() + { + let production_info = &result[step.symbol.index]; + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &production_info.fields { + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert_with(|| { + done = false; + child_field_info.clone() + }); + if child_field_info.multiple && !field_info.multiple { + field_info.multiple = child_field_info.multiple; + done = false; + } + if !child_field_info.required && field_info.required { + field_info.required = child_field_info.required; + done = false; + } + for child_type in &child_field_info.types { + if field_info.types.insert(child_type.clone()) { + done = false; + } + } + } + + // Inherit child types from this hidden child + for child_type in &production_info.child_types { + if variable_info.child_types.insert(child_type.clone()) { + done = false; + } + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &production_info.child_types { + if field_info.types.insert(child_type.clone()) { + done = false; + } + } } } } } + + // Move this variable's info back into the vector. + result[i] = variable_info; } - result.insert(Symbol::non_terminal(i), field_names); } + + let child_type_is_visible = |child_type: &ChildType| match child_type { + ChildType::Aliased(_) => true, + ChildType::Normal(symbol) => { + let step_kind = match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + }; + step_kind.is_visible() + } + }; + + for variable_info in result.iter_mut() { + variable_info.child_types.retain(&child_type_is_visible); + for (_, field_info) in variable_info.fields.iter_mut() { + field_info.types.retain(&child_type_is_visible); + } + } + result } @@ -788,12 +924,178 @@ pub(crate) fn build_parse_table( parse_table: ParseTable { states: Vec::new(), symbols: Vec::new(), - child_infos: Vec::new(), + production_infos: Vec::new(), max_aliased_production_length: 0, + variable_info: get_variable_info(syntax_grammar, lexical_grammar), }, - field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar), } .build()?; Ok((table, following_tokens)) } + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::grammars::{ + LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, + }; + + #[test] + fn test_get_variable_info() { + let variable_info = get_variable_info( + &build_syntax_grammar(vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"), + ], + }, + ], + }, + ]), + &build_lexical_grammar(), + ); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(1))] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + + assert_eq!( + variable_info[2].fields, + vec![( + "field2".to_string(), + FieldInfo { + required: false, + multiple: false, + types: vec![ + ChildType::Normal(Symbol::terminal(2)), + ChildType::Normal(Symbol::terminal(3)), + ] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_inherited_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar(vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ]), + &build_lexical_grammar(), + ); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(3))] + .into_iter() + .collect::>(), + } + )] + .into_iter() + .collect::>() + ); + } + + fn build_syntax_grammar(variables: Vec) -> SyntaxGrammar { + let mut syntax_grammar = SyntaxGrammar::default(); + syntax_grammar.variables = variables; + syntax_grammar + } + + fn build_lexical_grammar() -> LexicalGrammar { + let mut lexical_grammar = LexicalGrammar::default(); + for i in 0..10 { + lexical_grammar.variables.push(LexicalVariable { + name: format!("token_{}", i), + kind: VariableType::Named, + implicit_precedence: 0, + start_state: 0, + }); + } + lexical_grammar + } +} diff --git a/cli/src/generate/build_tables/minimize_parse_table.rs b/cli/src/generate/build_tables/minimize_parse_table.rs index f3862732..a9d26124 100644 --- a/cli/src/generate/build_tables/minimize_parse_table.rs +++ b/cli/src/generate/build_tables/minimize_parse_table.rs @@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> { ParseAction::ShiftExtra => continue, ParseAction::Reduce { child_count: 1, - child_info_id: 0, + production_id: 0, symbol, .. } => { diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 7f9e09d6..6cc1a5f7 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -187,6 +187,12 @@ impl Variable { } } +impl VariableType { + pub fn is_visible(&self) -> bool { + *self == VariableType::Named || *self == VariableType::Anonymous + } +} + impl LexicalGrammar { pub fn variable_indices_for_nfa_states<'a>( &'a self, diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index b13dfbbd..0983dfc3 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,10 +1,15 @@ use self::build_tables::build_tables; +use self::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; +use self::rules::{AliasMap, Symbol, SymbolType}; +use self::tables::{ChildType, VariableInfo}; use crate::error::{Error, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; +use serde_derive::Serialize; +use std::collections::BTreeMap; use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; @@ -27,6 +32,12 @@ lazy_static! { .unwrap(); } +struct GeneratedParser { + name: String, + c_code: String, + fields_json: String, +} + pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, @@ -47,13 +58,18 @@ pub fn generate_parser_in_directory( } } - let (language_name, c_code) = - generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?; + let GeneratedParser { + name: language_name, + c_code, + fields_json, + } = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?; let repo_header_path = repo_src_path.join("tree_sitter"); fs::create_dir_all(&repo_src_path)?; fs::create_dir_all(&repo_header_path)?; fs::write(&repo_src_path.join("parser.c"), c_code) .map_err(|e| format!("Failed to write parser.c: {}", e))?; + fs::write(&repo_src_path.join("node-fields.json"), fields_json) + .map_err(|e| format!("Failed to write parser.c: {}", e))?; fs::write( &repo_header_path.join("parser.h"), tree_sitter::PARSER_HEADER, @@ -73,14 +89,15 @@ pub fn generate_parser_in_directory( pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); - generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new()) + let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())?; + Ok((parser.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( grammar_json: &str, minimize: bool, state_ids_to_log: Vec, -) -> Result<(String, String)> { +) -> Result { let input_grammar = parse_grammar(grammar_json)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; @@ -92,8 +109,15 @@ fn generate_parser_for_grammar_with_opts( minimize, state_ids_to_log, )?; + let name = input_grammar.name; + let fields_json = generate_field_info_json( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &parse_table.variable_info, + ); let c_code = render_c_code( - &input_grammar.name, + &name, parse_table, main_lex_table, keyword_lex_table, @@ -102,7 +126,11 @@ fn generate_parser_for_grammar_with_opts( lexical_grammar, simple_aliases, ); - Ok((input_grammar.name, c_code)) + Ok(GeneratedParser { + name, + c_code, + fields_json, + }) } fn load_grammar_file(grammar_path: &Path) -> Result { @@ -153,3 +181,91 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> .map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e))) } } + +#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] +struct FieldTypeJSON { + kind: String, + named: bool, +} + +#[derive(Debug, Serialize)] +struct FieldInfoJSON { + multiple: bool, + required: bool, + types: Vec, +} + +fn generate_field_info_json( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, + simple_aliases: &AliasMap, + variable_info: &Vec, +) -> String { + let mut map = BTreeMap::new(); + for (i, info) in variable_info.iter().enumerate() { + let variable = &syntax_grammar.variables[i]; + if !variable.kind.is_visible() || info.fields.is_empty() { + continue; + } + + let name = simple_aliases + .get(&Symbol::non_terminal(i)) + .map_or(&variable.name, |alias| &alias.value); + + let fields = map.entry(name.clone()).or_insert_with(|| BTreeMap::new()); + for (field, field_info) in info.fields.iter() { + let field_info_json = fields.entry(field.clone()).or_insert(FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + }); + + field_info_json.multiple |= field_info.multiple; + field_info_json.required &= field_info.required; + field_info_json.types.extend(field_info.types.iter().map( + |child_type| match child_type { + ChildType::Aliased(alias) => FieldTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + FieldTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + } + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + FieldTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), + } + } + } + }, + )); + field_info_json.types.sort_unstable(); + field_info_json.types.dedup(); + } + } + serde_json::to_string_pretty(&map).unwrap() +} diff --git a/cli/src/generate/render.rs b/cli/src/generate/render.rs index 22bf655c..f2d84bf7 100644 --- a/cli/src/generate/render.rs +++ b/cli/src/generate/render.rs @@ -113,12 +113,12 @@ impl Generator { } let mut field_names = Vec::new(); - for child_info in &self.parse_table.child_infos { - for field_name in child_info.field_map.keys() { + for production_info in &self.parse_table.production_infos { + for field_name in production_info.field_map.keys() { field_names.push(field_name); } - for alias in &child_info.alias_sequence { + for alias in &production_info.alias_sequence { if let Some(alias) = &alias { let alias_kind = if alias.is_named { VariableType::Named @@ -358,17 +358,17 @@ impl Generator { add_line!( self, "static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{", - self.parse_table.child_infos.len() + self.parse_table.production_infos.len() ); indent!(self); - for (i, child_info) in self.parse_table.child_infos.iter().enumerate() { - if child_info.alias_sequence.is_empty() { + for (i, production_info) in self.parse_table.production_infos.iter().enumerate() { + if production_info.alias_sequence.is_empty() { continue; } add_line!(self, "[{}] = {{", i); indent!(self); - for (j, alias) in child_info.alias_sequence.iter().enumerate() { + for (j, alias) in production_info.alias_sequence.iter().enumerate() { if let Some(alias) = alias { add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]); } @@ -391,10 +391,10 @@ impl Generator { ); let mut field_map_ids = Vec::new(); - for child_info in &self.parse_table.child_infos { - if !child_info.field_map.is_empty() { + for production_info in &self.parse_table.production_infos { + if !production_info.field_map.is_empty() { let mut flat_field_map = Vec::new(); - for (field_name, locations) in &child_info.field_map { + for (field_name, locations) in &production_info.field_map { for location in locations { flat_field_map.push((field_name.clone(), *location)); } @@ -417,12 +417,12 @@ impl Generator { "static const TSFieldMapSlice ts_field_map_slices[] = {{", ); indent!(self); - for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { + for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() { if length > 0 { add_line!( self, "[{}] = {{.index = {}, .length = {}}},", - child_info_id, + production_id, row_id, length ); @@ -816,15 +816,15 @@ impl Generator { symbol, child_count, dynamic_precedence, - child_info_id, + production_id, .. } => { add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count); if dynamic_precedence != 0 { add!(self, ", .dynamic_precedence = {}", dynamic_precedence); } - if child_info_id != 0 { - add!(self, ", .child_info_id = {}", child_info_id); + if production_id != 0 { + add!(self, ", .production_id = {}", production_id); } add!(self, ")"); } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index a39ae099..e358f4fa 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,9 +1,9 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; -use hashbrown::HashMap; +use hashbrown::{HashMap, HashSet}; use std::collections::BTreeMap; -pub(crate) type ChildInfoId = usize; +pub(crate) type ProductionInfoId = usize; pub(crate) type ParseStateId = usize; pub(crate) type LexStateId = usize; @@ -22,7 +22,7 @@ pub(crate) enum ParseAction { precedence: i32, dynamic_precedence: i32, associativity: Option, - child_info_id: ChildInfoId, + production_id: ProductionInfoId, }, } @@ -47,16 +47,36 @@ pub(crate) struct FieldLocation { } #[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct ChildInfo { +pub(crate) struct ProductionInfo { pub alias_sequence: Vec>, pub field_map: BTreeMap>, } +#[derive(Clone, Debug, PartialEq, Eq, Hash)] +pub(crate) enum ChildType { + Normal(Symbol), + Aliased(Alias), +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldInfo { + pub required: bool, + pub multiple: bool, + pub types: HashSet, +} + +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct VariableInfo { + pub fields: HashMap, + pub child_types: HashSet, +} + #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub child_infos: Vec, + pub variable_info: Vec, + pub production_infos: Vec, pub max_aliased_production_length: usize, } diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index 90ab6200..6e67d90c 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -66,7 +66,7 @@ typedef struct { TSSymbol symbol; int16_t dynamic_precedence; uint8_t child_count; - uint8_t child_info_id; + uint8_t production_id; }; } params; TSParseActionType type : 4; diff --git a/lib/src/get_changed_ranges.c b/lib/src/get_changed_ranges.c index 83331cce..46add435 100644 --- a/lib/src/get_changed_ranges.c +++ b/lib/src/get_changed_ranges.c @@ -148,7 +148,7 @@ static bool iterator_tree_is_visible(const Iterator *self) { Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, - parent.ptr->child_info_id + parent.ptr->production_id ); return alias_sequence && alias_sequence[entry.structural_child_index] != 0; } @@ -171,7 +171,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree, const Subtree *parent = self->cursor.stack.contents[i - 1].subtree; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->language, - parent->ptr->child_info_id + parent->ptr->production_id ); if (alias_sequence) { *alias_symbol = alias_sequence[entry.structural_child_index]; diff --git a/lib/src/language.h b/lib/src/language.h index 59c0fadc..84e3fbc4 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -81,19 +81,19 @@ ts_language_enabled_external_tokens(const TSLanguage *self, } static inline const TSSymbol * -ts_language_alias_sequence(const TSLanguage *self, uint32_t child_info_id) { - return child_info_id > 0 ? - self->alias_sequences + child_info_id * self->max_alias_sequence_length : +ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) { + return production_id > 0 ? + self->alias_sequences + production_id * self->max_alias_sequence_length : NULL; } static inline void ts_language_field_map( const TSLanguage *self, - uint32_t child_info_id, + uint32_t production_id, const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { - TSFieldMapSlice slice = self->field_map_slices[child_info_id]; + TSFieldMapSlice slice = self->field_map_slices[production_id]; *start = &self->field_map_entries[slice.index]; *end = &self->field_map_entries[slice.index] + slice.length; } diff --git a/lib/src/node.c b/lib/src/node.c index 6ac4636d..96f934a7 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -53,7 +53,7 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) { } const TSSymbol *alias_sequence = ts_language_alias_sequence( node->tree->language, - subtree.ptr->child_info_id + subtree.ptr->production_id ); return (NodeChildIterator) { .tree = node->tree, @@ -464,7 +464,7 @@ recur: const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self.tree->language, - ts_node__subtree(self).ptr->child_info_id, + ts_node__subtree(self).ptr->production_id, &field_map, &field_map_end ); @@ -498,8 +498,8 @@ recur: goto recur; } - // Otherwise, descend into this child, but if that child doesn't - // contain the field, continue searching subsequent children. + // Otherwise, descend into this child, but if it doesn't contain + // the field, continue searching subsequent children. else { TSNode result = ts_node_child_by_field_id(child, field_id); if (result.id) return result; diff --git a/lib/src/parser.c b/lib/src/parser.c index 5fd75cd8..4e5727f6 100644 --- a/lib/src/parser.c +++ b/lib/src/parser.c @@ -675,7 +675,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol, uint32_t count, int dynamic_precedence, - uint16_t child_info_id, bool fragile) { + uint16_t production_id, bool fragile) { uint32_t initial_version_count = ts_stack_version_count(self->stack); uint32_t removed_version_count = 0; StackSliceArray pop = ts_stack_pop_count(self->stack, version, count); @@ -709,7 +709,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } MutableSubtree parent = ts_subtree_new_node(&self->tree_pool, - symbol, &children, child_info_id, self->language + symbol, &children, production_id, self->language ); // This pop operation may have caused multiple stack versions to collapse @@ -735,7 +735,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy } parent.ptr->dynamic_precedence += dynamic_precedence; - parent.ptr->child_info_id = child_info_id; + parent.ptr->production_id = production_id; TSStateId state = ts_stack_state(self->stack, slice_version); TSStateId next_state = ts_language_next_state(self->language, state, symbol); @@ -791,7 +791,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look &self->tree_pool, ts_subtree_symbol(child), &trees, - child.ptr->child_info_id, + child.ptr->production_id, self->language )); ts_subtree_release(&self->tree_pool, child); @@ -867,7 +867,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, .symbol = action.params.symbol, .count = action.params.child_count, .dynamic_precedence = action.params.dynamic_precedence, - .child_info_id = action.params.child_info_id, + .production_id = action.params.production_id, }); default: break; @@ -881,7 +881,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self, reduction_version = ts_parser__reduce( self, version, action.symbol, action.count, - action.dynamic_precedence, action.child_info_id, + action.dynamic_precedence, action.production_id, true ); } @@ -1310,7 +1310,7 @@ static void ts_parser__advance(TSParser *self, StackVersion version, bool allow_ LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count); StackVersion reduction_version = ts_parser__reduce( self, version, action.params.symbol, action.params.child_count, - action.params.dynamic_precedence, action.params.child_info_id, + action.params.dynamic_precedence, action.params.production_id, is_fragile ); if (reduction_version != STACK_VERSION_NONE) { diff --git a/lib/src/reduce_action.h b/lib/src/reduce_action.h index 9eca0327..72aff08d 100644 --- a/lib/src/reduce_action.h +++ b/lib/src/reduce_action.h @@ -12,7 +12,7 @@ typedef struct { uint32_t count; TSSymbol symbol; int dynamic_precedence; - unsigned short child_info_id; + unsigned short production_id; } ReduceAction; typedef Array(ReduceAction) ReduceActionSet; diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 3d588890..776a86fc 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -379,7 +379,7 @@ void ts_subtree_set_children( self.ptr->dynamic_precedence = 0; uint32_t non_extra_index = 0; - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t lookahead_end_byte = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { @@ -474,7 +474,7 @@ void ts_subtree_set_children( } MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, - SubtreeArray *children, unsigned child_info_id, + SubtreeArray *children, unsigned production_id, const TSLanguage *language) { TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol); bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat; @@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol, *data = (SubtreeHeapData) { .ref_count = 1, .symbol = symbol, - .child_info_id = child_info_id, + .production_id = production_id, .visible = metadata.visible, .named = metadata.named, .has_changes = false, @@ -838,7 +838,7 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim } if (ts_subtree_child_count(self)) { - const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->child_info_id); + const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; @@ -915,7 +915,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset, uint32_t child_start_offset = start_offset; uint32_t child_info_offset = language->max_alias_sequence_length * - ts_subtree_child_info_id(*self); + ts_subtree_production_id(*self); for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) { const Subtree *child = &self->ptr->children[i]; TSSymbol alias_symbol = 0; diff --git a/lib/src/subtree.h b/lib/src/subtree.h index f32edfc2..611996d5 100644 --- a/lib/src/subtree.h +++ b/lib/src/subtree.h @@ -73,7 +73,7 @@ typedef struct { uint32_t node_count; uint32_t repeat_depth; int32_t dynamic_precedence; - uint16_t child_info_id; + uint16_t production_id; struct { TSSymbol symbol; TSStateId parse_state; @@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) { return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence; } -static inline uint16_t ts_subtree_child_info_id(Subtree self) { +static inline uint16_t ts_subtree_production_id(Subtree self) { if (ts_subtree_child_count(self) > 0) { - return self.ptr->child_info_id; + return self.ptr->production_id; } else { return 0; } diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 4f3f9ae7..c3ba54c5 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -22,7 +22,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs } const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - last_entry->subtree->ptr->child_info_id + last_entry->subtree->ptr->production_id ); return (CursorChildIterator) { .tree = self->tree, @@ -210,7 +210,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursorEntry *parent_entry = &self->stack.contents[i - 1]; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); is_aliased = alias_sequence && alias_sequence[entry->structural_child_index]; } @@ -230,7 +230,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) { TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2]; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) { alias_symbol = alias_sequence[last_entry->structural_child_index]; @@ -257,7 +257,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { if (ts_subtree_visible(*entry->subtree)) break; const TSSymbol *alias_sequence = ts_language_alias_sequence( self->tree->language, - parent_entry->subtree->ptr->child_info_id + parent_entry->subtree->ptr->production_id ); if (alias_sequence && alias_sequence[entry->structural_child_index]) { break; @@ -267,7 +267,7 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const TSFieldMapEntry *field_map, *field_map_end; ts_language_field_map( self->tree->language, - parent_entry->subtree->ptr->child_info_id, + parent_entry->subtree->ptr->production_id, &field_map, &field_map_end ); From 9f608435eebdf5cd23a7c4d7a4c7eed416ba1375 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 12 Feb 2019 17:20:12 -0800 Subject: [PATCH 15/25] Fix errors in when languages have no fields --- cli/src/tests/node_test.rs | 66 +++++++++++++++++++++++++++++++++----- lib/src/language.c | 4 +-- lib/src/language.h | 7 ++++ lib/src/tree_cursor.c | 8 +++-- 4 files changed, 72 insertions(+), 13 deletions(-) diff --git a/cli/src/tests/node_test.rs b/cli/src/tests/node_test.rs index 08080798..098f1262 100644 --- a/cli/src/tests/node_test.rs +++ b/cli/src/tests/node_test.rs @@ -1,6 +1,6 @@ +use super::helpers::edits::{get_random_edit, perform_edit}; use super::helpers::fixtures::{get_language, get_test_language}; use super::helpers::random::Rand; -use super::helpers::edits::{get_random_edit, perform_edit}; use crate::generate::generate_parser_for_grammar; use tree_sitter::{Node, Parser, Point, Tree}; @@ -321,11 +321,7 @@ fn test_node_edit() { let nodes_after = get_all_nodes(&tree2); for (i, node) in nodes_before.into_iter().enumerate() { assert_eq!( - ( - node.kind(), - node.start_byte(), - node.start_position() - ), + (node.kind(), node.start_byte(), node.start_position()), ( nodes_after[i].kind(), nodes_after[i].start_byte(), @@ -415,13 +411,18 @@ fn test_node_field_names() { let language = get_test_language(&parser_name, &parser_code, None); parser.set_language(language).unwrap(); - let tree = parser.parse("child-0 child-1 child-2 child-3 child-4", None).unwrap(); + let tree = parser + .parse("child-0 child-1 child-2 child-3 child-4", None) + .unwrap(); let root_node = tree.root_node(); assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0)); assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2)); assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4)); - assert_eq!(root_node.child(0).unwrap().child_by_field_name("field_1"), None); + assert_eq!( + root_node.child(0).unwrap().child_by_field_name("field_1"), + None + ); assert_eq!(root_node.child_by_field_name("not_a_real_field"), None); let mut cursor = root_node.walk(); @@ -443,6 +444,55 @@ fn test_node_field_names() { assert_eq!(cursor.field_name(), Some("field_3")); } +#[test] +fn test_node_field_calls_in_language_without_fields() { + let (parser_name, parser_code) = generate_parser_for_grammar( + r#" + { + "name": "test_grammar_with_no_fields", + "extras": [ + {"type": "PATTERN", "value": "\\s+"} + ], + "rules": { + "a": { + "type": "SEQ", + "members": [ + { + "type": "STRING", + "value": "b" + }, + { + "type": "STRING", + "value": "c" + }, + { + "type": "STRING", + "value": "d" + } + ] + } + } + } + "#, + ) + .unwrap(); + + let mut parser = Parser::new(); + let language = get_test_language(&parser_name, &parser_code, None); + parser.set_language(language).unwrap(); + + let tree = parser.parse("b c d", None).unwrap(); + + let root_node = tree.root_node(); + assert_eq!(root_node.kind(), "a"); + assert_eq!(root_node.child_by_field_name("something"), None); + + let mut cursor = root_node.walk(); + assert_eq!(cursor.field_name(), None); + assert_eq!(cursor.goto_first_child(), true); + assert_eq!(cursor.field_name(), None); +} + fn get_all_nodes(tree: &Tree) -> Vec { let mut result = Vec::new(); let mut visited_children = false; diff --git a/lib/src/language.c b/lib/src/language.c index 74a7b58d..ebb47d06 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -3,8 +3,6 @@ #include "./error_costs.h" #include -#define LANGUAGE_VERSION_WITH_FIELDS 10 - void ts_language_table_entry(const TSLanguage *self, TSStateId state, TSSymbol symbol, TableEntry *result) { if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { @@ -73,7 +71,7 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol } uint32_t ts_language_field_count(const TSLanguage *self) { - if (self->version >= LANGUAGE_VERSION_WITH_FIELDS) { + if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) { return self->field_count; } else { return 0; diff --git a/lib/src/language.h b/lib/src/language.h index 84e3fbc4..16e74790 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -9,6 +9,7 @@ extern "C" { #include "tree_sitter/parser.h" #define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1) +#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10 typedef struct { const TSParseAction *actions; @@ -93,6 +94,12 @@ static inline void ts_language_field_map( const TSFieldMapEntry **start, const TSFieldMapEntry **end ) { + if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) { + *start = NULL; + *end = NULL; + return; + } + TSFieldMapSlice slice = self->field_map_slices[production_id]; *start = &self->field_map_entries[slice.index]; *end = &self->field_map_entries[slice.index] + slice.length; diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index c3ba54c5..35aeebb3 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -284,6 +284,10 @@ TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) { const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) { TSFieldId id = ts_tree_cursor_current_field_id(_self); - const TreeCursor *self = (const TreeCursor *)_self; - return self->tree->language->field_names[id]; + if (id) { + const TreeCursor *self = (const TreeCursor *)_self; + return self->tree->language->field_names[id]; + } else { + return NULL; + } } From 65d1ce859323aac4fc6e9259c6ad117bca21151f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Feb 2019 09:47:21 -0800 Subject: [PATCH 16/25] lib: Include fields in `ts_node_string` output This allows you to assert about fields in tests. But if your test s-expression does *not* include fields, the fields will be stripped from the regexp before comparison. --- cli/src/test.rs | 16 +++- cli/src/tests/corpus_test.rs | 30 +++++-- cli/src/tests/parser_test.rs | 2 +- lib/src/subtree.c | 90 ++++++++++++++----- test/fixtures/error_corpus/c_errors.txt | 14 +-- .../error_corpus/javascript_errors.txt | 6 +- 6 files changed, 113 insertions(+), 45 deletions(-) diff --git a/cli/src/test.rs b/cli/src/test.rs index c4d74285..98404138 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -22,6 +22,7 @@ lazy_static! { .build() .unwrap(); static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap(); + static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap(); } #[derive(Debug, PartialEq, Eq)] @@ -34,6 +35,7 @@ pub enum TestEntry { name: String, input: Vec, output: String, + has_fields: bool, }, } @@ -135,6 +137,7 @@ fn run_tests( name, input, output, + has_fields, } => { if let Some(filter) = filter { if !name.contains(filter) { @@ -142,7 +145,10 @@ fn run_tests( } } let tree = parser.parse(&input, None).unwrap(); - let actual = tree.root_node().to_sexp(); + let mut actual = tree.root_node().to_sexp(); + if !has_fields { + actual = strip_sexp_fields(actual); + } for _ in 0..indent_level { print!(" "); } @@ -186,6 +192,10 @@ pub fn parse_tests(path: &Path) -> io::Result { } } +pub fn strip_sexp_fields(sexp: String) -> String { + SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() +} + fn parse_test_content(name: String, content: String) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); @@ -209,10 +219,12 @@ fn parse_test_content(name: String, content: String) -> TestEntry { let input = bytes[previous_header_end..divider_start].to_vec(); let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string(); let output = output.replace(" )", ")"); + let has_fields = SEXP_FIELD_REGEX.is_match(&output); children.push(TestEntry::Example { name: previous_name, input, output, + has_fields, }); } } @@ -265,11 +277,13 @@ d name: "The first test".to_string(), input: "\na b c\n".as_bytes().to_vec(), output: "(a (b c))".to_string(), + has_fields: false, }, TestEntry::Example { name: "The second test".to_string(), input: "d".as_bytes().to_vec(), output: "(d)".to_string(), + has_fields: false, }, ] } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 70b27295..dba86d83 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -4,7 +4,7 @@ use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language}; use super::helpers::random::Rand; use super::helpers::scope_sequence::ScopeSequence; use crate::generate; -use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry}; +use crate::test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}; use crate::util; use lazy_static::lazy_static; use std::{env, fs, time, usize}; @@ -67,7 +67,7 @@ fn test_real_language_corpus_files() { eprintln!("language: {:?}", language_name); } - for (example_name, input, expected_output) in tests { + for (example_name, input, expected_output, has_fields) in tests { eprintln!(" example: {:?}", example_name); if TRIAL_FILTER.map_or(true, |t| t == 0) { @@ -76,7 +76,10 @@ fn test_real_language_corpus_files() { let mut parser = get_parser(&mut log_session, "log.html"); parser.set_language(language).unwrap(); let tree = parser.parse(&input, None).unwrap(); - let actual_output = tree.root_node().to_sexp(); + let mut actual_output = tree.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } drop(tree); drop(parser); if actual_output != expected_output { @@ -144,7 +147,11 @@ fn test_real_language_corpus_files() { let tree3 = parser.parse(&input, Some(&tree2)).unwrap(); // Verify that the final tree matches the expectation from the corpus. - let actual_output = tree3.root_node().to_sexp(); + let mut actual_output = tree3.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } + if actual_output != expected_output { println!( "Incorrect parse for {} - {} - trial {}", @@ -241,7 +248,7 @@ fn test_feature_corpus_files() { eprintln!("test language: {:?}", language_name); } - for (name, input, expected_output) in tests { + for (name, input, expected_output, has_fields) in tests { eprintln!(" example: {:?}", name); allocations::start_recording(); @@ -249,7 +256,11 @@ fn test_feature_corpus_files() { let mut parser = get_parser(&mut log_session, "log.html"); parser.set_language(language).unwrap(); let tree = parser.parse(&input, None).unwrap(); - let actual_output = tree.root_node().to_sexp(); + let mut actual_output = tree.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } + drop(tree); drop(parser); if actual_output != expected_output { @@ -348,13 +359,14 @@ fn get_parser(session: &mut Option, log_filename: &str) -> Par parser } -fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String)> { - fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec, String)>) { +fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { + fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec, String, bool)>) { match test { TestEntry::Example { mut name, input, output, + has_fields, } => { if !prefix.is_empty() { name.insert_str(0, " - "); @@ -365,7 +377,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String)> { return; } } - result.push((name, input, output)); + result.push((name, input, output, has_fields)); } TestEntry::Group { mut name, children } => { if !prefix.is_empty() { diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 7947463a..357cf37d 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -721,7 +721,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { let root = tree.root_node(); assert_eq!( root.to_sexp(), - "(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))" + "(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))" ); assert_eq!(root.start_byte(), 2); assert_eq!(root.child(3).unwrap().start_byte(), 4); diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 776a86fc..a7521fa3 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -805,56 +805,90 @@ static void ts_subtree__write_dot_string(FILE *f, const char *string) { } } -static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit, - const TSLanguage *language, bool is_root, - bool include_all, TSSymbol alias_symbol, - bool alias_is_named) { +static const char *ROOT_FIELD = "__ROOT__"; + +static size_t ts_subtree__write_to_string( + Subtree self, char *string, size_t limit, + const TSLanguage *language, bool include_all, + TSSymbol alias_symbol, bool alias_is_named, const char *field_name +) { if (!self.ptr) return snprintf(string, limit, "(NULL)"); char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; bool visible = include_all || - is_root || + alias_is_named || ts_subtree_missing(self) || - (ts_subtree_visible(self) && ts_subtree_named(self)) || - alias_is_named; - - if (visible && !is_root) { - cursor += snprintf(*writer, limit, " "); - } + (ts_subtree_visible(self) && ts_subtree_named(self)); if (visible) { + if (field_name != ROOT_FIELD) { + cursor += snprintf(*writer, limit, " "); + + if (field_name) { + cursor += snprintf(*writer, limit, "%s: ", field_name); + } + } + if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); - } else if (ts_subtree_missing(self)) { - cursor += snprintf(*writer, limit, "(MISSING"); } else { TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); const char *symbol_name = ts_language_symbol_name(language, symbol); - cursor += snprintf(*writer, limit, "(%s", symbol_name); + if (ts_subtree_missing(self)) { + cursor += snprintf(*writer, limit, "(MISSING "); + if (alias_is_named || ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "%s", symbol_name); + } else { + cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); + } + } else { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } } } if (ts_subtree_child_count(self)) { const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + language, + self.ptr->production_id, + &field_map, + &field_map_end + ); + uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; if (ts_subtree_extra(child)) { cursor += ts_subtree__write_to_string( child, *writer, limit, - language, false, include_all, - 0, false + language, include_all, + 0, false, NULL ); } else { - TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + TSSymbol alias_symbol = alias_sequence + ? alias_sequence[structural_child_index] + : 0; + bool alias_is_named = alias_symbol + ? ts_language_symbol_metadata(language, alias_symbol).named + : false; + + const char *child_field_name = visible ? NULL : field_name; + for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + if (!i->inherited && i->child_index == structural_child_index) { + child_field_name = language->field_names[i->field_id]; + break; + } + } + cursor += ts_subtree__write_to_string( child, *writer, limit, - language, false, include_all, - alias_symbol, - alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false + language, include_all, + alias_symbol, alias_is_named, child_field_name ); structural_child_index++; } @@ -866,15 +900,23 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim return cursor - string; } -char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) { +char *ts_subtree_string( + Subtree self, + const TSLanguage *language, + bool include_all +) { char scratch_string[1]; size_t size = ts_subtree__write_to_string( self, scratch_string, 0, - language, true, - include_all, 0, false + language, include_all, + 0, false, ROOT_FIELD ) + 1; char *result = malloc(size * sizeof(char)); - ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false); + ts_subtree__write_to_string( + self, result, size, + language, include_all, + 0, false, ROOT_FIELD + ); return result; } diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index ee63debf..4d0c8e8b 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -14,8 +14,8 @@ int main() { (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)) - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))))) + (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")) + (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))))) ============================================== Top-level declarations with missing semicolons @@ -27,8 +27,8 @@ static int b --- (translation_unit - (declaration (primitive_type) (identifier) (MISSING)) - (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING))) + (declaration (primitive_type) (identifier) (MISSING ";")) + (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";"))) ========================================== Partial declaration lists inside ifdefs @@ -58,7 +58,7 @@ int c() { (comment) (declaration (primitive_type) (identifier)) (function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal)))) - (preproc_ifdef (identifier) (MISSING)))))) + (preproc_ifdef (identifier) (MISSING "#endif")))))) ========================================== If statements with incomplete expressions @@ -83,12 +83,12 @@ int main() { (if_statement (parenthesized_expression (field_expression (identifier) - (MISSING))) + (MISSING field_identifier))) (compound_statement (expression_statement (call_expression (identifier) (argument_list))) (expression_statement (call_expression (identifier) (argument_list))) (if_statement - (parenthesized_expression (pointer_expression (MISSING))) + (parenthesized_expression (pointer_expression (MISSING identifier))) (expression_statement (call_expression (identifier) (argument_list))))))))) ==================================== diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index ffa9d547..4aac3e37 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -36,7 +36,7 @@ Missing object-literal values (program (expression_statement (object (pair (property_identifier) (identifier)) - (pair (property_identifier) (MISSING))))) + (pair (property_identifier) (MISSING identifier))))) =================================================== Extra identifiers in expressions @@ -81,7 +81,7 @@ if ({a: 'b'} {c: 'd'}) { (assignment_expression (identifier) (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) - (MISSING)) + (MISSING ";")) (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))))) =================================================== @@ -153,7 +153,7 @@ const h = `i ${j(k} l` (identifier) (template_string (template_substitution (call_expression (identifier) - (arguments (identifier) (MISSING)))))))) + (arguments (identifier) (MISSING ")")))))))) ========================================================= Long sequences of invalid tokens From 9f3134daceb511ce67b73c3d093a76c7cfcb5d59 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 13 Feb 2019 19:14:33 -0800 Subject: [PATCH 17/25] Allow fields to be used in property sheets --- Cargo.lock | 6 +- cli/Cargo.toml | 2 +- cli/src/properties.rs | 400 ++++++++++++++++++++++++++++++++---------- lib/binding/lib.rs | 115 +++++++++--- 4 files changed, 401 insertions(+), 122 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c614545a..0e47fcd2 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -420,7 +420,7 @@ dependencies = [ [[package]] name = "rsass" -version = "0.9.6" +version = "0.9.8" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "lazy_static 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -586,7 +586,7 @@ dependencies = [ "rand 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", "regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)", "regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)", - "rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)", + "rsass 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)", "serde 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_derive 1.0.80 (registry+https://github.com/rust-lang/crates.io-index)", "serde_json 1.0.33 (registry+https://github.com/rust-lang/crates.io-index)", @@ -697,7 +697,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum redox_users 0.2.0 (registry+https://github.com/rust-lang/crates.io-index)" = "214a97e49be64fd2c86f568dd0cb2c757d2cc53de95b273b6ad0a1c908482f26" "checksum regex 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "37e7cbbd370869ce2e8dff25c7018702d10b21a20ef7135316f8daecd6c25b7f" "checksum regex-syntax 0.6.4 (registry+https://github.com/rust-lang/crates.io-index)" = "4e47a2ed29da7a9e1960e1639e7a982e6edc6d49be308a3b02daf511504a16d1" -"checksum rsass 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7a5dde55023a6c19470f7aeb59f75f897d8b80cbe00d61dfcaf7bbbe3de4c0a6" +"checksum rsass 0.9.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7f4534cc03040beacd2668621815f26fe57e5b7cfe085790f98e5e87c1612316" "checksum rustc-demangle 0.1.9 (registry+https://github.com/rust-lang/crates.io-index)" = "bcfe5b13211b4d78e5c2cadfebd7769197d95c639c35a50057eb4c05de811395" "checksum rustc_version 0.2.3 (registry+https://github.com/rust-lang/crates.io-index)" = "138e3e0acb6c9fb258b19b67cb8abd63c00679d2851805ea151465464fe9030a" "checksum ryu 0.2.7 (registry+https://github.com/rust-lang/crates.io-index)" = "eb9e9b8cde282a9fe6a42dd4681319bfb63f121b8a8ee9439c6f4107e58a46f7" diff --git a/cli/Cargo.toml b/cli/Cargo.toml index a013190e..01f269fb 100644 --- a/cli/Cargo.toml +++ b/cli/Cargo.toml @@ -31,7 +31,7 @@ serde = "1.0" serde_derive = "1.0" regex-syntax = "0.6.4" regex = "1" -rsass = "0.9" +rsass = "^0.9.8" [dependencies.tree-sitter] version = ">= 0.3.7" diff --git a/cli/src/properties.rs b/cli/src/properties.rs index fccfd7ed..66cc5589 100644 --- a/cli/src/properties.rs +++ b/cli/src/properties.rs @@ -2,8 +2,8 @@ use crate::error::{Error, Result}; use log::info; use rsass; use rsass::sass::Value; +use rsass::selectors::SelectorPart; use serde_derive::Serialize; -use std::cmp::Ordering; use std::collections::hash_map::Entry; use std::collections::{BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::{self, Write}; @@ -27,11 +27,12 @@ type PropertySetId = usize; #[derive(Clone, PartialEq, Eq)] struct SelectorStep { - kind: String, - is_named: bool, - is_immediate: bool, + kind: Option, + field: Option, child_index: Option, text_pattern: Option, + is_named: Option, + is_immediate: bool, } #[derive(PartialEq, Eq)] @@ -175,6 +176,7 @@ impl Builder { transition_map.insert(( PropertyTransitionJSON { kind: step.kind.clone(), + field: step.field.clone(), named: step.is_named, index: step.child_index, text: step.text_pattern.clone(), @@ -235,19 +237,11 @@ impl Builder { // first, and in the event of a tie, transitions corresponding to later rules // in the cascade are tried first. transition_list.sort_by(|a, b| { - let result = a.0.kind.cmp(&b.0.kind); - if result != Ordering::Equal { - return result; - } - let result = a.0.named.cmp(&b.0.named); - if result != Ordering::Equal { - return result; - } - let result = transition_specificity(&b.0).cmp(&transition_specificity(&a.0)); - if result != Ordering::Equal { - return result; - } - b.1.cmp(&a.1) + (transition_specificity(&b.0).cmp(&transition_specificity(&a.0))) + .then_with(|| b.1.cmp(&a.1)) + .then_with(|| a.0.kind.cmp(&b.0.kind)) + .then_with(|| a.0.named.cmp(&b.0.named)) + .then_with(|| a.0.field.cmp(&b.0.field)) }); // Compute the merged properties that apply in the current state. @@ -256,11 +250,7 @@ impl Builder { // rules will override less specific selectors and earlier rules. let mut properties = PropertySet::new(); selector_matches.sort_unstable_by(|a, b| { - let result = a.specificity.cmp(&b.specificity); - if result != Ordering::Equal { - return result; - } - a.rule_id.cmp(&b.rule_id) + (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id)) }); selector_matches.dedup(); for selector_match in selector_matches { @@ -322,6 +312,7 @@ impl Builder { transition.state_id = *replacement; } } + state.transitions.dedup(); } } @@ -356,8 +347,14 @@ impl Builder { } fn selector_specificity(selector: &Selector) -> u32 { - let mut result = selector.0.len() as u32; + let mut result = 0; for step in &selector.0 { + if step.kind.is_some() { + result += 1; + } + if step.field.is_some() { + result += 1; + } if step.child_index.is_some() { result += 1; } @@ -370,6 +367,12 @@ fn selector_specificity(selector: &Selector) -> u32 { fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 { let mut result = 0; + if transition.kind.is_some() { + result += 1; + } + if transition.field.is_some() { + result += 1; + } if transition.index.is_some() { result += 1; } @@ -380,19 +383,37 @@ fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 { } fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool { - step.kind == transition.kind - && step.is_named == transition.named - && (step.child_index == transition.index || step.child_index.is_none()) - && (step.text_pattern == transition.text || step.text_pattern.is_none()) + step.kind + .as_ref() + .map_or(true, |kind| transition.kind.as_ref() == Some(kind)) + && step + .is_named + .map_or(true, |named| transition.named == Some(named)) + && step + .field + .as_ref() + .map_or(true, |field| transition.field.as_ref() == Some(field)) + && step + .child_index + .map_or(true, |index| transition.index == Some(index)) + && step + .text_pattern + .as_ref() + .map_or(true, |text| transition.text.as_ref() == Some(text)) } impl fmt::Debug for SelectorStep { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { write!(f, "(")?; - if self.is_named { - write!(f, "{}", self.kind)?; - } else { - write!(f, "\"{}\"", self.kind)?; + if let Some(kind) = &self.kind { + if self.is_named.unwrap() { + write!(f, "{}", kind)?; + } else { + write!(f, "[token='{}']", kind)?; + } + } + if let Some(field) = &self.field { + write!(f, ".{}", field)?; } if let Some(n) = self.child_index { write!(f, ":nth-child({})", n)?; @@ -416,7 +437,7 @@ impl fmt::Debug for Selector { } write!(f, "{:?}", step)?; } - write!(f, "]")?; + write!(f, " (specificity: {})]", selector_specificity(self))?; Ok(()) } } @@ -522,52 +543,134 @@ fn parse_sass_items( rsass::Item::Rule(selectors, items) => { let mut full_selectors = Vec::new(); for prefix in selector_prefixes { - let mut part_string = String::new(); - let mut next_step_is_immediate = false; for selector in &selectors.s { let mut prefix = prefix.clone(); + let mut operator_was_immediate: Option = Some(false); for part in &selector.0 { - part_string.clear(); - write!(&mut part_string, "{}", part).unwrap(); - let part_string = part_string.trim(); - if !part_string.is_empty() { - if part_string == "&" { - continue; - } else if part_string.starts_with(":nth-child(") { - if let Some(last_step) = prefix.last_mut() { - if let Ok(index) = usize::from_str_radix( - &part_string[11..(part_string.len() - 1)], - 10, - ) { - last_step.child_index = Some(index); + match part { + SelectorPart::BackRef => { + operator_was_immediate = None; + } + SelectorPart::Simple(value) => { + if let Some(value) = value.single_raw() { + for (i, value) in value.split('.').enumerate() { + if value.is_empty() { + continue; + } + let value = value.to_string(); + check_node_kind(&value)?; + if i > 0 { + if let Some(immediate) = operator_was_immediate { + prefix.push(SelectorStep { + kind: None, + field: Some(value), + is_named: None, + child_index: None, + text_pattern: None, + is_immediate: immediate, + }) + } else { + prefix.last_mut().unwrap().field = Some(value); + } + } else { + if let Some(immediate) = operator_was_immediate { + prefix.push(SelectorStep { + kind: Some(value.to_string()), + field: None, + child_index: None, + text_pattern: None, + is_named: Some(true), + is_immediate: immediate, + }); + } else { + return Err(Error(format!("Node type {} must be separated by whitespace or the `>` operator", value))); + } + } + operator_was_immediate = None; + } + } else { + return Err(interpolation_error()); + } + operator_was_immediate = None; + } + SelectorPart::Attribute { name, val, .. } => { + match name.single_raw() { + None => return Err(interpolation_error()), + Some("text") => { + if operator_was_immediate.is_some() { + return Err(Error("The `text` attribute must be used in combination with a node type or field".to_string())); + } + if let Some(last_step) = prefix.last_mut() { + last_step.text_pattern = + Some(get_string_value(val.to_string())?) + } + } + Some("token") => { + if let Some(immediate) = operator_was_immediate { + prefix.push(SelectorStep { + kind: Some(get_string_value(val.to_string())?), + field: None, + is_named: Some(false), + child_index: None, + text_pattern: None, + is_immediate: immediate, + }); + operator_was_immediate = None; + } else { + return Err(Error("The `token` attribute canot be used in combination with a node type".to_string())); + } + } + _ => { + return Err(Error(format!( + "Unsupported attribute {}", + part + ))); } } - } else if part_string.starts_with("[text=") { - if let Some(last_step) = prefix.last_mut() { - last_step.text_pattern = Some( - part_string[7..(part_string.len() - 2)].to_string(), - ) + } + SelectorPart::PseudoElement { .. } => { + return Err(Error( + "Pseudo elements are not supported".to_string(), + )); + } + SelectorPart::Pseudo { name, arg } => match name.single_raw() { + None => return Err(interpolation_error()), + Some("nth-child") => { + if let Some(arg) = arg { + let mut arg_str = String::new(); + write!(&mut arg_str, "{}", arg).unwrap(); + if let Some(last_step) = prefix.last_mut() { + if let Ok(i) = usize::from_str_radix(&arg_str, 10) { + last_step.child_index = Some(i); + } else { + return Err(Error(format!( + "Invalid child index {}", + arg + ))); + } + } + } + } + _ => { + return Err(Error(format!( + "Unsupported pseudo-class {}", + part + ))); + } + }, + SelectorPart::Descendant => { + operator_was_immediate = Some(false); + } + SelectorPart::RelOp(operator) => { + let operator = *operator as char; + if operator == '>' { + operator_was_immediate = Some(true); + } else { + return Err(Error(format!( + "Unsupported operator {}", + operator + ))); } - } else if part_string == ">" { - next_step_is_immediate = true; - } else if part_string.starts_with("[token=") { - prefix.push(SelectorStep { - kind: part_string[8..(part_string.len() - 2)].to_string(), - is_named: false, - child_index: None, - text_pattern: None, - is_immediate: next_step_is_immediate, - }); - next_step_is_immediate = false; - } else { - prefix.push(SelectorStep { - kind: part_string.to_string(), - is_named: true, - child_index: None, - text_pattern: None, - is_immediate: next_step_is_immediate, - }); - next_step_is_immediate = false; } } } @@ -596,7 +699,7 @@ fn parse_sass_value(value: &Value) -> Result { if let Some(s) = s.single_raw() { Ok(PropertyValue::String(s.to_string())) } else { - Err(Error("String interpolation is not supported".to_string())) + Err(interpolation_error()) } } Value::Call(name, raw_args) => { @@ -665,13 +768,36 @@ fn resolve_path(base: &Path, p: &str) -> Result { Err(Error(format!("Could not resolve import path `{}`", p))) } +fn check_node_kind(name: &String) -> Result<()> { + for c in name.chars() { + if !c.is_alphanumeric() && c != '_' { + return Err(Error(format!("Invalid identifier '{}'", name))); + } + } + Ok(()) +} + +fn get_string_value(mut s: String) -> Result { + if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') { + s.pop(); + s.remove(0); + Ok(s) + } else { + Err(Error(format!("Unsupported string literal {}", s))) + } +} + +fn interpolation_error() -> Error { + Error("String interpolation is not supported".to_string()) +} + #[cfg(test)] mod tests { use super::*; use regex::Regex; #[test] - fn test_immediate_child_and_descendant_selectors() { + fn test_properties_immediate_child_and_descendant_selectors() { let sheet = generate_property_sheet( "foo.css", " @@ -776,7 +902,7 @@ mod tests { } #[test] - fn test_text_attribute() { + fn test_properties_text_attribute() { let sheet = generate_property_sheet( "foo.css", " @@ -800,26 +926,93 @@ mod tests { .unwrap(); assert_eq!( - *query(&sheet, vec![("f1", true, 0)], "abc"), + *query(&sheet, vec![("f1", None, true, 0)], "abc"), props(&[("color", "red")]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 0)], "Abc"), + *query(&sheet, vec![("f1", None, true, 0)], "Abc"), props(&[("color", "green")]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 0)], "AB_CD"), + *query(&sheet, vec![("f1", None, true, 0)], "AB_CD"), props(&[("color", "blue")]) ); - assert_eq!(*query(&sheet, vec![("f2", true, 0)], "Abc"), props(&[])); assert_eq!( - *query(&sheet, vec![("f2", true, 0)], "ABC"), + *query(&sheet, vec![("f2", None, true, 0)], "Abc"), + props(&[]) + ); + assert_eq!( + *query(&sheet, vec![("f2", None, true, 0)], "ABC"), props(&[("color", "purple")]) ); } #[test] - fn test_cascade_ordering_as_tie_breaker() { + fn test_properties_with_fields() { + let sheet = generate_property_sheet( + "foo.css", + " + a { + color: red; + &.x { + color: green; + b { + color: blue; + &.y { color: yellow; } + } + } + b { color: orange; } + b.y { color: indigo; } + } + .x { color: violet; } + ", + ) + .unwrap(); + + assert_eq!( + *query(&sheet, vec![("a", None, true, 0)], ""), + props(&[("color", "red")]) + ); + assert_eq!( + *query(&sheet, vec![("a", Some("x"), true, 0)], ""), + props(&[("color", "green")]) + ); + assert_eq!( + *query( + &sheet, + vec![("a", Some("x"), true, 0), ("b", None, true, 0)], + "" + ), + props(&[("color", "blue")]) + ); + assert_eq!( + *query( + &sheet, + vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)], + "" + ), + props(&[("color", "yellow")]) + ); + assert_eq!( + *query(&sheet, vec![("b", Some("x"), true, 0)], ""), + props(&[("color", "violet")]) + ); + assert_eq!( + *query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""), + props(&[("color", "orange")]) + ); + assert_eq!( + *query( + &sheet, + vec![("a", None, true, 0), ("b", Some("y"), true, 0)], + "" + ), + props(&[("color", "indigo")]) + ); + } + + #[test] + fn test_properties_cascade_ordering_as_tie_breaker() { let sheet = generate_property_sheet( "foo.css", " @@ -832,29 +1025,49 @@ mod tests { .unwrap(); assert_eq!( - *query(&sheet, vec![("f1", true, 0), ("f2", true, 0)], "x"), + *query( + &sheet, + vec![("f1", None, true, 0), ("f2", None, true, 0)], + "x" + ), props(&[]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 0), ("f2", true, 1)], "x"), + *query( + &sheet, + vec![("f1", None, true, 0), ("f2", None, true, 1)], + "x" + ), props(&[("color", "red")]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "x"), + *query( + &sheet, + vec![("f1", None, true, 1), ("f2", None, true, 1)], + "x" + ), props(&[("color", "green")]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "a"), + *query( + &sheet, + vec![("f1", None, true, 1), ("f2", None, true, 1)], + "a" + ), props(&[("color", "blue")]) ); assert_eq!( - *query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "ab"), + *query( + &sheet, + vec![("f1", None, true, 1), ("f2", None, true, 1)], + "ab" + ), props(&[("color", "violet")]) ); } #[test] - fn test_css_function_calls() { + fn test_properties_css_function_calls() { let sheet = generate_property_sheet( "foo.css", " @@ -891,7 +1104,7 @@ mod tests { } #[test] - fn test_array_by_declaring_property_multiple_times() { + fn test_properties_array_by_declaring_property_multiple_times() { let sheet = generate_property_sheet( "foo.css", " @@ -937,25 +1150,26 @@ mod tests { ) -> &'a PropertySet { query( sheet, - node_stack.into_iter().map(|s| (s, true, 0)).collect(), + node_stack.into_iter().map(|s| (s, None, true, 0)).collect(), "", ) } fn query<'a>( sheet: &'a PropertySheetJSON, - node_stack: Vec<(&'static str, bool, usize)>, + node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>, leaf_text: &str, ) -> &'a PropertySet { let mut state_id = 0; - for (kind, is_named, child_index) in node_stack { + for (kind, field, is_named, child_index) in node_stack { let state = &sheet.states[state_id]; state_id = state .transitions .iter() .find(|transition| { - transition.kind == kind - && transition.named == is_named + transition.kind.as_ref().map_or(true, |k| k == kind) + && transition.named.map_or(true, |n| n == is_named) + && transition.field.as_ref().map_or(true, |f| field == Some(f)) && transition.index.map_or(true, |index| index == child_index) && (transition .text diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index c5738608..38e75a1f 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -62,8 +62,15 @@ struct PropertyTransition { text_regex_index: Option, } +#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] +enum NodeId { + Kind(u16), + Field(u16), + KindAndField(u16, u16), +} + struct PropertyState { - transitions: HashMap>, + transitions: HashMap>, property_set_id: usize, default_next_state_id: usize, } @@ -83,11 +90,15 @@ pub struct PropertySheet

> { #[derive(Debug, Deserialize, Serialize, Hash, PartialEq, Eq)] pub struct PropertyTransitionJSON { #[serde(rename = "type")] - pub kind: String, - pub named: bool, + #[serde(skip_serializing_if = "Option::is_none")] + pub kind: Option, + #[serde(skip_serializing_if = "Option::is_none")] + pub named: Option, #[serde(skip_serializing_if = "Option::is_none")] pub index: Option, #[serde(skip_serializing_if = "Option::is_none")] + pub field: Option, + #[serde(skip_serializing_if = "Option::is_none")] pub text: Option, pub state_id: usize, } @@ -137,6 +148,22 @@ impl Language { pub fn node_kind_is_named(&self, id: u16) -> bool { unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular } } + + pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option { + let field_name = field_name.as_ref(); + let id = unsafe { + ffi::ts_language_field_id_for_name( + self.0, + field_name.as_ptr() as *const c_char, + field_name.len() as u32, + ) + }; + if id == 0 { + None + } else { + Some(id) + } + } } unsafe impl Send for Language {} @@ -657,7 +684,9 @@ impl<'a, P> TreePropertyCursor<'a, P> { property_sheet, source, }; - let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0); + let kind_id = result.cursor.node().kind_id(); + let field_id = result.cursor.field_id(); + let state = result.next_state(&result.current_state(), kind_id, field_id, 0); result.state_stack.push(state); result } @@ -676,7 +705,8 @@ impl<'a, P> TreePropertyCursor<'a, P> { let next_state_id = { let state = &self.current_state(); let kind_id = self.cursor.node().kind_id(); - self.next_state(state, kind_id, child_index) + let field_id = self.cursor.field_id(); + self.next_state(state, kind_id, field_id, child_index) }; self.state_stack.push(next_state_id); self.child_index_stack.push(child_index); @@ -693,7 +723,8 @@ impl<'a, P> TreePropertyCursor<'a, P> { let next_state_id = { let state = &self.current_state(); let kind_id = self.cursor.node().kind_id(); - self.next_state(state, kind_id, child_index) + let field_id = self.cursor.field_id(); + self.next_state(state, kind_id, field_id, child_index) }; self.state_stack.push(next_state_id); self.child_index_stack.push(child_index); @@ -717,12 +748,25 @@ impl<'a, P> TreePropertyCursor<'a, P> { &self, state: &PropertyState, node_kind_id: u16, + node_field_id: Option, node_child_index: usize, ) -> usize { - state - .transitions - .get(&node_kind_id) - .and_then(|transitions| { + let keys; + let key_count; + if let Some(node_field_id) = node_field_id { + key_count = 3; + keys = [ + NodeId::KindAndField(node_kind_id, node_field_id), + NodeId::Field(node_field_id), + NodeId::Kind(node_kind_id), + ]; + } else { + key_count = 1; + keys = [NodeId::Kind(node_kind_id); 3]; + } + + for key in &keys[0..key_count] { + if let Some(transitions) = state.transitions.get(key) { for transition in transitions.iter() { if let Some(text_regex_index) = transition.text_regex_index { let node = self.cursor.node(); @@ -740,11 +784,12 @@ impl<'a, P> TreePropertyCursor<'a, P> { } } - return Some(transition.state_id); + return transition.state_id; } - None - }) - .unwrap_or(state.default_next_state_id) + } + } + + state.default_next_state_id } fn current_state(&self) -> &PropertyState { @@ -848,18 +893,38 @@ impl

PropertySheet

{ None }; - for i in 0..(node_kind_count as u16) { - if transition.kind == language.node_kind_for_id(i) - && transition.named == language.node_kind_is_named(i) - { - let entry = transitions.entry(i).or_insert(Vec::new()); - entry.push(PropertyTransition { - child_index: transition.index, - state_id: transition.state_id, - text_regex_index, - }); + let kind_id = transition.kind.as_ref().and_then(|kind| { + let named = transition.named.unwrap(); + for i in 0..(node_kind_count as u16) { + if kind == language.node_kind_for_id(i) + && named == language.node_kind_is_named(i) + { + return Some(i); + } } - } + None + }); + + let field_id = transition + .field + .as_ref() + .and_then(|field| language.field_id_for_name(&field)); + + let key = match (kind_id, field_id) { + (Some(kind_id), None) => NodeId::Kind(kind_id), + (None, Some(field_id)) => NodeId::Field(field_id), + (Some(kind_id), Some(field_id)) => NodeId::KindAndField(kind_id, field_id), + (None, None) => continue, + }; + + transitions + .entry(key) + .or_insert(Vec::new()) + .push(PropertyTransition { + child_index: transition.index, + state_id: transition.state_id, + text_regex_index, + }); } states.push(PropertyState { transitions, From a8a5e23e5e051ba823a98a0a697381d74afa0ebb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 14 Feb 2019 10:15:18 -0800 Subject: [PATCH 18/25] Make TreePropertyCursor respect field selectors --- lib/binding/lib.rs | 147 +++++++++++++++++++++++++++------------------ 1 file changed, 88 insertions(+), 59 deletions(-) diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 94001870..926f95b4 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -57,20 +57,15 @@ pub struct InputEdit { } struct PropertyTransition { - state_id: usize, - child_index: Option, - text_regex_index: Option, -} - -#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] -enum NodeId { - Kind(u16), - Field(u16), - KindAndField(u16, u16), + state_id: u16, + child_index: Option, + text_regex_index: Option, + node_kind_id: Option, } struct PropertyState { - transitions: HashMap>, + field_transitions: HashMap>, + kind_transitions: HashMap>, property_set_id: usize, default_next_state_id: usize, } @@ -260,7 +255,10 @@ impl Parser { pub fn parse(&mut self, input: impl AsRef<[u8]>, old_tree: Option<&Tree>) -> Option { let bytes = input.as_ref(); let len = bytes.len(); - self.parse_with(&mut |i, _| if i < len { &bytes[i..] } else { &[] }, old_tree) + self.parse_with( + &mut |i, _| if i < len { &bytes[i..] } else { &[] }, + old_tree, + ) } pub fn parse_utf16( @@ -270,7 +268,10 @@ impl Parser { ) -> Option { let code_points = input.as_ref(); let len = code_points.len(); - self.parse_utf16_with(&mut |i, _| if i < len { &code_points[i..] } else { &[] }, old_tree) + self.parse_utf16_with( + &mut |i, _| if i < len { &code_points[i..] } else { &[] }, + old_tree, + ) } pub fn parse_with<'a, T: FnMut(usize, Point) -> &'a [u8]>( @@ -753,41 +754,40 @@ impl<'a, P> TreePropertyCursor<'a, P> { node_field_id: Option, node_child_index: usize, ) -> usize { - let keys; - let key_count; - if let Some(node_field_id) = node_field_id { - key_count = 3; - keys = [ - NodeId::KindAndField(node_kind_id, node_field_id), - NodeId::Field(node_field_id), - NodeId::Kind(node_kind_id), - ]; + let transitions = if let Some(field_id) = node_field_id { + state.field_transitions.get(&field_id) } else { - key_count = 1; - keys = [NodeId::Kind(node_kind_id); 3]; - } + state.kind_transitions.get(&node_kind_id) + }; - for key in &keys[0..key_count] { - if let Some(transitions) = state.transitions.get(key) { - for transition in transitions.iter() { - if let Some(text_regex_index) = transition.text_regex_index { - let node = self.cursor.node(); - let text = &self.source[node.start_byte()..node.end_byte()]; - if let Ok(text) = str::from_utf8(text) { - if !self.property_sheet.text_regexes[text_regex_index].is_match(text) { - continue; - } - } - } + if let Some(transitions) = transitions { + for transition in transitions.iter() { + if transition + .node_kind_id + .map_or(false, |id| id != node_kind_id) + { + continue; + } - if let Some(child_index) = transition.child_index { - if child_index != node_child_index { + if let Some(text_regex_index) = transition.text_regex_index { + let node = self.cursor.node(); + let text = &self.source[node.start_byte()..node.end_byte()]; + if let Ok(text) = str::from_utf8(text) { + if !self.property_sheet.text_regexes[text_regex_index as usize] + .is_match(text) + { continue; } } - - return transition.state_id; } + + if let Some(child_index) = transition.child_index { + if child_index != node_child_index as u16 { + continue; + } + } + + return transition.state_id as usize; } } @@ -876,20 +876,32 @@ impl

PropertySheet

{ let mut text_regex_patterns = Vec::new(); for state in input.states.iter() { - let mut transitions = HashMap::new(); let node_kind_count = language.node_kind_count(); + let mut kind_transitions = HashMap::new(); + let mut field_transitions = HashMap::new(); + + for transition in state.transitions.iter() { + let field_id = transition + .field + .as_ref() + .and_then(|field| language.field_id_for_name(&field)); + if let Some(field_id) = field_id { + field_transitions.entry(field_id).or_insert(Vec::new()); + } + } + for transition in state.transitions.iter() { let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() { if let Some(index) = text_regex_patterns.iter().position(|r| *r == regex_pattern) { - Some(index) + Some(index as u16) } else { text_regex_patterns.push(regex_pattern); text_regexes.push( Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?, ); - Some(text_regexes.len() - 1) + Some(text_regexes.len() as u16 - 1) } } else { None @@ -912,24 +924,41 @@ impl

PropertySheet

{ .as_ref() .and_then(|field| language.field_id_for_name(&field)); - let key = match (kind_id, field_id) { - (Some(kind_id), None) => NodeId::Kind(kind_id), - (None, Some(field_id)) => NodeId::Field(field_id), - (Some(kind_id), Some(field_id)) => NodeId::KindAndField(kind_id, field_id), - (None, None) => continue, - }; + if let Some(field_id) = field_id { + field_transitions + .entry(field_id) + .or_insert(Vec::new()) + .push(PropertyTransition { + node_kind_id: kind_id, + child_index: transition.index.map(|i| i as u16), + state_id: transition.state_id as u16, + text_regex_index, + }); + } else { + for (_, entries) in field_transitions.iter_mut() { + entries.push(PropertyTransition { + node_kind_id: kind_id, + child_index: transition.index.map(|i| i as u16), + state_id: transition.state_id as u16, + text_regex_index, + }); + } - transitions - .entry(key) - .or_insert(Vec::new()) - .push(PropertyTransition { - child_index: transition.index, - state_id: transition.state_id, - text_regex_index, - }); + if let Some(kind_id) = kind_id { + kind_transitions.entry(kind_id).or_insert(Vec::new()).push( + PropertyTransition { + node_kind_id: None, + child_index: transition.index.map(|i| i as u16), + state_id: transition.state_id as u16, + text_regex_index, + }, + ); + } + } } states.push(PropertyState { - transitions, + field_transitions, + kind_transitions, default_next_state_id: state.default_next_state_id, property_set_id: state.property_set_id, }); From 445dfda53ecbbc770de4735e595b3d3f7412f05f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 5 Mar 2019 09:44:43 -0800 Subject: [PATCH 19/25] binding: Restore handling of multiple symbols w/ same name Even though normal aliases don't cause this, simple (single-use) aliases still do cause it. --- highlight/src/lib.rs | 32 ++++++++++++++++-- lib/binding/lib.rs | 80 ++++++++++++++++++++++++-------------------- 2 files changed, 73 insertions(+), 39 deletions(-) diff --git a/highlight/src/lib.rs b/highlight/src/lib.rs index e5499fbc..8bc0e6bf 100644 --- a/highlight/src/lib.rs +++ b/highlight/src/lib.rs @@ -3,7 +3,7 @@ mod escape; use serde::{Deserialize, Deserializer, Serialize, Serializer}; use serde_derive::*; use std::cmp; -use std::fmt::Write; +use std::fmt::{self, Write}; use std::mem::transmute; use std::str; use std::usize; @@ -572,8 +572,9 @@ where } } -impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Iterator - for Highlighter<'a, T> +impl<'a, T> Iterator for Highlighter<'a, T> +where + T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, { type Item = HighlightEvent<'a>; @@ -653,6 +654,31 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet)>> Itera } } +impl<'a, T> fmt::Debug for Highlighter<'a, T> +where + T: Fn(&str) -> Option<(Language, &'a PropertySheet)>, +{ + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if let Some(layer) = self.layers.first() { + let node = layer.cursor.node(); + let position = if layer.at_node_end { + node.end_position() + } else { + node.start_position() + }; + write!( + f, + "{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}", + position, + node.kind(), + layer.at_node_end, + layer.cursor.node_properties() + )?; + } + Ok(()) + } +} + impl<'a> Layer<'a> { fn new( source: &'a [u8], diff --git a/lib/binding/lib.rs b/lib/binding/lib.rs index 926f95b4..4f72c280 100644 --- a/lib/binding/lib.rs +++ b/lib/binding/lib.rs @@ -907,53 +907,61 @@ impl

PropertySheet

{ None }; - let kind_id = transition.kind.as_ref().and_then(|kind| { - let named = transition.named.unwrap(); - for i in 0..(node_kind_count as u16) { - if kind == language.node_kind_for_id(i) - && named == language.node_kind_is_named(i) - { - return Some(i); - } - } - None - }); - + let state_id = transition.state_id as u16; + let child_index = transition.index.map(|i| i as u16); let field_id = transition .field .as_ref() .and_then(|field| language.field_id_for_name(&field)); - if let Some(field_id) = field_id { + if let Some(kind) = transition.kind.as_ref() { + for kind_id in 0..(node_kind_count as u16) { + if kind != language.node_kind_for_id(kind_id) + || transition.named != Some(language.node_kind_is_named(kind_id)) + { + continue; + } + + if let Some(field_id) = field_id { + field_transitions + .entry(field_id) + .or_insert(Vec::new()) + .push(PropertyTransition { + node_kind_id: Some(kind_id), + state_id, + child_index, + text_regex_index, + }); + } else { + for (_, entries) in field_transitions.iter_mut() { + entries.push(PropertyTransition { + node_kind_id: Some(kind_id), + state_id, + child_index, + text_regex_index, + }); + } + + kind_transitions.entry(kind_id).or_insert(Vec::new()).push( + PropertyTransition { + node_kind_id: None, + state_id, + child_index, + text_regex_index, + }, + ); + } + } + } else if let Some(field_id) = field_id { field_transitions .entry(field_id) .or_insert(Vec::new()) .push(PropertyTransition { - node_kind_id: kind_id, - child_index: transition.index.map(|i| i as u16), - state_id: transition.state_id as u16, + node_kind_id: None, + state_id, + child_index, text_regex_index, }); - } else { - for (_, entries) in field_transitions.iter_mut() { - entries.push(PropertyTransition { - node_kind_id: kind_id, - child_index: transition.index.map(|i| i as u16), - state_id: transition.state_id as u16, - text_regex_index, - }); - } - - if let Some(kind_id) = kind_id { - kind_transitions.entry(kind_id).or_insert(Vec::new()).push( - PropertyTransition { - node_kind_id: None, - child_index: transition.index.map(|i| i as u16), - state_id: transition.state_id as u16, - text_regex_index, - }, - ); - } } } states.push(PropertyState { From b79bd8693b2e95abbab112faa4271e3dc2db9785 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Fri, 8 Mar 2019 06:20:07 -0500 Subject: [PATCH 20/25] Start work on handling node supertypes --- .../build_tables/build_parse_table.rs | 389 +++++++++++++----- cli/src/generate/dsl.js | 245 ++++++----- cli/src/generate/grammars.rs | 2 + cli/src/generate/mod.rs | 116 +++--- cli/src/generate/parse_grammar.rs | 3 + .../prepare_grammar/expand_repeats.rs | 1 + .../prepare_grammar/extract_simple_aliases.rs | 1 + .../prepare_grammar/extract_tokens.rs | 8 + .../prepare_grammar/flatten_grammar.rs | 1 + .../prepare_grammar/intern_symbols.rs | 13 + cli/src/generate/prepare_grammar/mod.rs | 1 + .../prepare_grammar/process_inlines.rs | 3 + cli/src/generate/tables.rs | 10 +- 13 files changed, 513 insertions(+), 280 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 417d5d3a..9f9413a3 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -47,7 +47,9 @@ struct ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> { fn build(mut self) -> Result { // Ensure that the empty alias sequence has index 0. - self.parse_table.production_infos.push(ProductionInfo::default()); + self.parse_table + .production_infos + .push(ProductionInfo::default()); // Add the error state at index 0. self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); @@ -749,7 +751,7 @@ fn populate_following_tokens( pub(crate) fn get_variable_info( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, -) -> Vec { +) -> Result> { let mut result = Vec::new(); // Determine which field names and child node types can appear directly @@ -757,7 +759,9 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut info = VariableInfo { fields: HashMap::new(), - child_types: HashSet::new(), + subclasses: Vec::new(), + child_types: Vec::new(), + has_multi_step_production: false, }; let is_recursive = variable .productions @@ -765,6 +769,10 @@ pub(crate) fn get_variable_info( .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); for production in &variable.productions { + if production.steps.len() > 1 { + info.has_multi_step_production = true; + } + for step in &production.steps { let child_type = if let Some(alias) = &step.alias { ChildType::Aliased(alias.clone()) @@ -776,13 +784,17 @@ pub(crate) fn get_variable_info( let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { multiple: false, required: true, - types: HashSet::new(), + types: Vec::new(), }); field_info.multiple |= is_recursive; - field_info.types.insert(child_type.clone()); + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + } } - info.child_types.insert(child_type); + if let Err(i) = info.child_types.binary_search(&child_type) { + info.child_types.insert(i, child_type.clone()); + } } } @@ -810,23 +822,25 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { // Move this variable's info out of the vector so it can be modified // while reading from other entries of the vector. - let mut variable_info = VariableInfo { - fields: HashMap::new(), - child_types: HashSet::new(), - }; + let mut variable_info = VariableInfo::default(); mem::swap(&mut variable_info, &mut result[i]); for production in &variable.productions { for step in &production.steps { - if step.symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[step.symbol.index] + let child_symbol = step.symbol; + if child_symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[child_symbol.index] .kind .is_visible() { - let production_info = &result[step.symbol.index]; + let child_variable_info = &result[child_symbol.index]; + + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } // Inherit fields from this hidden child - for (field_name, child_field_info) in &production_info.fields { + for (field_name, child_field_info) in &child_variable_info.fields { let field_info = variable_info .fields .entry(field_name.clone()) @@ -843,15 +857,17 @@ pub(crate) fn get_variable_info( done = false; } for child_type in &child_field_info.types { - if field_info.types.insert(child_type.clone()) { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } } } // Inherit child types from this hidden child - for child_type in &production_info.child_types { - if variable_info.child_types.insert(child_type.clone()) { + for child_type in &child_variable_info.child_types { + if let Err(i) = variable_info.child_types.binary_search(&child_type) { + variable_info.child_types.insert(i, child_type.clone()); done = false; } } @@ -860,8 +876,9 @@ pub(crate) fn get_variable_info( // for the field. if let Some(field_name) = &step.field_name { let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &production_info.child_types { - if field_info.types.insert(child_type.clone()) { + for child_type in &child_variable_info.child_types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); done = false; } } @@ -875,27 +892,111 @@ pub(crate) fn get_variable_info( } } + for supertype_symbol in &syntax_grammar.supertype_symbols { + let variable = &syntax_grammar.variables[supertype_symbol.index]; + if variable.kind != VariableType::Hidden { + return Err(Error::grammar(&format!( + "Supertype symbols must be hidden, but `{}` is not", + variable.name + ))); + } + + if result[supertype_symbol.index].has_multi_step_production { + return Err(Error::grammar(&format!( + "Supertype symbols must always have a single visible child, but `{}` can have multiple", + variable.name + ))); + } + } + let child_type_is_visible = |child_type: &ChildType| match child_type { ChildType::Aliased(_) => true, ChildType::Normal(symbol) => { - let step_kind = match symbol.kind { + let variable_kind = match symbol.kind { SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, _ => VariableType::Hidden, }; - step_kind.is_visible() + variable_kind.is_visible() } }; - for variable_info in result.iter_mut() { - variable_info.child_types.retain(&child_type_is_visible); + for supertype_symbol in &syntax_grammar.supertype_symbols { + result[supertype_symbol.index] + .child_types + .retain(child_type_is_visible); + } + + for i in 0..result.len() { + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + // For each field, make the `types` list more concise by replacing sets of + // subtypes with a single supertype. for (_, field_info) in variable_info.fields.iter_mut() { - field_info.types.retain(&child_type_is_visible); + for supertype_symbol in &syntax_grammar.supertype_symbols { + if sorted_vec_replace( + &mut field_info.types, + &result[supertype_symbol.index].child_types, + ChildType::Normal(*supertype_symbol), + ) { + break; + } + } + + field_info.types.retain(|t| { + if let ChildType::Normal(symbol) = t { + if syntax_grammar.supertype_symbols.contains(&symbol) { + return true; + } + } + child_type_is_visible(t) + }); + } + + result[i] = variable_info; + } + + Ok(result) +} + +fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool +where + T: Eq + Ord, +{ + let mut i = 0; + for right_elem in right.iter() { + while left[i] < *right_elem { + i += 1; + if i == left.len() { + return false; + } + } + if left[i] != *right_elem { + return false; } } - result + i = 0; + left.retain(|left_elem| { + if i == right.len() { + return true; + } + while right[i] < *left_elem { + i += 1; + if i == right.len() { + return true; + } + } + right[i] != *left_elem + }); + + if let Err(i) = left.binary_search(&value) { + left.insert(i, value); + } + + true } pub(crate) fn build_parse_table( @@ -913,6 +1014,8 @@ pub(crate) fn build_parse_table( &item_set_builder, ); + let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?; + let table = ParseTableBuilder { syntax_grammar, lexical_grammar, @@ -926,7 +1029,7 @@ pub(crate) fn build_parse_table( symbols: Vec::new(), production_infos: Vec::new(), max_aliased_production_length: 0, - variable_info: get_variable_info(syntax_grammar, lexical_grammar), + variable_info, }, } .build()?; @@ -944,56 +1047,63 @@ mod tests { #[test] fn test_get_variable_info() { let variable_info = get_variable_info( - &build_syntax_grammar(vec![ - // Required field `field1` has only one node type. - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"), + &build_syntax_grammar( + vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)) + .with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)) + .with_field_name("field2"), + ], + }, ], - }], - }, - // Hidden node - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(1))], - }], - }, - // Optional field `field2` can have two possible node types. - SyntaxVariable { - name: "rule2".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(0))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"), - ], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"), - ], - }, - ], - }, - ]), + }, + ], + vec![], + ), &build_lexical_grammar(), - ); + ) + .unwrap(); assert_eq!( variable_info[0].fields, @@ -1002,9 +1112,7 @@ mod tests { FieldInfo { required: true, multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(1))] - .into_iter() - .collect::>(), + types: vec![ChildType::Normal(Symbol::terminal(1))], } )] .into_iter() @@ -1021,9 +1129,7 @@ mod tests { types: vec![ ChildType::Normal(Symbol::terminal(2)), ChildType::Normal(Symbol::terminal(3)), - ] - .into_iter() - .collect::>(), + ], } )] .into_iter() @@ -1034,34 +1140,38 @@ mod tests { #[test] fn test_get_variable_info_with_inherited_fields() { let variable_info = get_variable_info( - &build_syntax_grammar(vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - // Hidden node with fields - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), - ], - }], - }, - ]), + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ], + vec![], + ), &build_lexical_grammar(), - ); + ) + .unwrap(); assert_eq!( variable_info[0].fields, @@ -1070,9 +1180,7 @@ mod tests { FieldInfo { required: true, multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(3))] - .into_iter() - .collect::>(), + types: vec![ChildType::Normal(Symbol::terminal(3))], } )] .into_iter() @@ -1080,9 +1188,68 @@ mod tests { ); } - fn build_syntax_grammar(variables: Vec) -> SyntaxGrammar { + #[test] + fn test_get_variable_info_with_supertypes() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(2))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(3))], + }, + ], + }, + ], + // _rule1 is a supertype + vec![Symbol::non_terminal(1)], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::non_terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + + fn build_syntax_grammar( + variables: Vec, + supertype_symbols: Vec, + ) -> SyntaxGrammar { let mut syntax_grammar = SyntaxGrammar::default(); syntax_grammar.variables = variables; + syntax_grammar.supertype_symbols = supertype_symbols; syntax_grammar } diff --git a/cli/src/generate/dsl.js b/cli/src/generate/dsl.js index cf124258..651e6713 100644 --- a/cli/src/generate/dsl.js +++ b/cli/src/generate/dsl.js @@ -212,137 +212,154 @@ function RuleBuilder(ruleMap) { } function grammar(baseGrammar, options) { - if (!options) { - options = baseGrammar; - baseGrammar = { - name: null, - rules: {}, - extras: [normalize(/\s/)], - conflicts: [], - externals: [], - inline: [] - }; + if (!options) { + options = baseGrammar; + baseGrammar = { + name: null, + rules: {}, + extras: [normalize(/\s/)], + conflicts: [], + externals: [], + inline: [], + supertypes: [] + }; + } + + let externals = baseGrammar.externals; + if (options.externals) { + if (typeof options.externals !== "function") { + throw new Error("Grammar's 'externals' property must be a function."); } - let externals = baseGrammar.externals; - if (options.externals) { - if (typeof options.externals !== "function") { - throw new Error("Grammar's 'externals' property must be a function."); + const externalsRuleBuilder = RuleBuilder(null) + const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); + + if (!Array.isArray(externalRules)) { + throw new Error("Grammar's 'externals' property must return an array of rules."); + } + + externals = externalRules.map(normalize); + } + + const ruleMap = {}; + for (const key in options.rules) { + ruleMap[key] = true; + } + for (const key in baseGrammar.rules) { + ruleMap[key] = true; + } + for (const external of externals) { + if (typeof external.name === 'string') { + ruleMap[external.name] = true; + } + } + + const ruleBuilder = RuleBuilder(ruleMap); + + const name = options.name; + if (typeof name !== "string") { + throw new Error("Grammar's 'name' property must be a string."); + } + + if (!/^[a-zA-Z_]\w*$/.test(name)) { + throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); + } + + let rules = Object.assign({}, baseGrammar.rules); + if (options.rules) { + if (typeof options.rules !== "object") { + throw new Error("Grammar's 'rules' property must be an object."); + } + + for (const ruleName in options.rules) { + const ruleFn = options.rules[ruleName]; + if (typeof ruleFn !== "function") { + throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not."); } + rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName])); + } + } - const externalsRuleBuilder = RuleBuilder(null) - const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); - - if (!Array.isArray(externalRules)) { - throw new Error("Grammar's 'externals' property must return an array of rules."); - } - - externals = externalRules.map(normalize); + let extras = baseGrammar.extras.slice(); + if (options.extras) { + if (typeof options.extras !== "function") { + throw new Error("Grammar's 'extras' property must be a function."); } - const ruleMap = {}; - for (const key in options.rules) { - ruleMap[key] = true; + extras = options.extras + .call(ruleBuilder, ruleBuilder, baseGrammar.extras) + .map(normalize); + } + + let word = baseGrammar.word; + if (options.word) { + word = options.word.call(ruleBuilder, ruleBuilder).name; + if (typeof word != 'string') { + throw new Error("Grammar's 'word' property must be a named rule."); } - for (const key in baseGrammar.rules) { - ruleMap[key] = true; - } - for (const external of externals) { - if (typeof external.name === 'string') { - ruleMap[external.name] = true; - } + } + + let conflicts = baseGrammar.conflicts; + if (options.conflicts) { + if (typeof options.conflicts !== "function") { + throw new Error("Grammar's 'conflicts' property must be a function."); } - const ruleBuilder = RuleBuilder(ruleMap); + const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym)); + const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules); - const name = options.name; - if (typeof name !== "string") { - throw new Error("Grammar's 'name' property must be a string."); + if (!Array.isArray(conflictRules)) { + throw new Error("Grammar's conflicts must be an array of arrays of rules."); } - if (!/^[a-zA-Z_]\w*$/.test(name)) { - throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); - } - - let rules = Object.assign({}, baseGrammar.rules); - if (options.rules) { - if (typeof options.rules !== "object") { - throw new Error("Grammar's 'rules' property must be an object."); - } - - for (const ruleName in options.rules) { - const ruleFn = options.rules[ruleName]; - if (typeof ruleFn !== "function") { - throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not."); - } - rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName])); - } - } - - let extras = baseGrammar.extras.slice(); - if (options.extras) { - if (typeof options.extras !== "function") { - throw new Error("Grammar's 'extras' property must be a function."); - } - - extras = options.extras - .call(ruleBuilder, ruleBuilder, baseGrammar.extras) - .map(normalize); - } - - let word = baseGrammar.word; - if (options.word) { - word = options.word.call(ruleBuilder, ruleBuilder).name; - if (typeof word != 'string') { - throw new Error("Grammar's 'word' property must be a named rule."); - } - } - - let conflicts = baseGrammar.conflicts; - if (options.conflicts) { - if (typeof options.conflicts !== "function") { - throw new Error("Grammar's 'conflicts' property must be a function."); - } - - const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym)); - const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules); - - if (!Array.isArray(conflictRules)) { + conflicts = conflictRules.map(conflictSet => { + if (!Array.isArray(conflictSet)) { throw new Error("Grammar's conflicts must be an array of arrays of rules."); } - conflicts = conflictRules.map(conflictSet => { - if (!Array.isArray(conflictSet)) { - throw new Error("Grammar's conflicts must be an array of arrays of rules."); - } - - return conflictSet.map(symbol => normalize(symbol).name); - }); - } - - let inline = baseGrammar.inline; - if (options.inline) { - if (typeof options.inline !== "function") { - throw new Error("Grammar's 'inline' property must be a function."); - } - - const baseInlineRules = baseGrammar.inline.map(sym); - const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules); - - if (!Array.isArray(inlineRules)) { - throw new Error("Grammar's inline must be an array of rules."); - } - - inline = inlineRules.map(symbol => symbol.name); - } - - if (Object.keys(rules).length == 0) { - throw new Error("Grammar must have at least one rule."); - } - - return {name, word, rules, extras, conflicts, externals, inline}; + return conflictSet.map(symbol => normalize(symbol).name); + }); } + let inline = baseGrammar.inline; + if (options.inline) { + if (typeof options.inline !== "function") { + throw new Error("Grammar's 'inline' property must be a function."); + } + + const baseInlineRules = baseGrammar.inline.map(sym); + const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules); + + if (!Array.isArray(inlineRules)) { + throw new Error("Grammar's inline must be an array of rules."); + } + + inline = inlineRules.map(symbol => symbol.name); + } + + let supertypes = baseGrammar.supertypes; + if (options.supertypes) { + if (typeof options.supertypes !== "function") { + throw new Error("Grammar's 'supertypes' property must be a function."); + } + + const baseSupertypeRules = baseGrammar.supertypes.map(sym); + const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules); + + if (!Array.isArray(supertypeRules)) { + throw new Error("Grammar's supertypes must be an array of rules."); + } + + supertypes = supertypeRules.map(symbol => symbol.name); + } + + if (Object.keys(rules).length == 0) { + throw new Error("Grammar must have at least one rule."); + } + + return {name, word, rules, extras, conflicts, externals, inline, supertypes}; +} + function checkArguments(ruleCount, caller, callerName, suffix = '') { if (ruleCount > 1) { const error = new Error([ diff --git a/cli/src/generate/grammars.rs b/cli/src/generate/grammars.rs index 6cc1a5f7..f904efa3 100644 --- a/cli/src/generate/grammars.rs +++ b/cli/src/generate/grammars.rs @@ -27,6 +27,7 @@ pub(crate) struct InputGrammar { pub expected_conflicts: Vec>, pub external_tokens: Vec, pub variables_to_inline: Vec, + pub supertype_symbols: Vec, pub word_token: Option, } @@ -88,6 +89,7 @@ pub(crate) struct SyntaxGrammar { pub extra_tokens: Vec, pub expected_conflicts: Vec>, pub external_tokens: Vec, + pub supertype_symbols: Vec, pub variables_to_inline: Vec, pub word_token: Option, } diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 1b43df3b..4d6a1057 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -184,17 +184,23 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> } } +#[derive(Debug, Serialize, PartialEq, Eq, Default)] +struct NodeInfoJSON { + fields: Option>, + subtypes: Option>, +} + #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] -struct FieldTypeJSON { +struct NodeTypeJSON { kind: String, named: bool, } -#[derive(Debug, Serialize)] +#[derive(Debug, Serialize, PartialEq, Eq)] struct FieldInfoJSON { multiple: bool, required: bool, - types: Vec, + types: Vec, } fn generate_field_info_json( @@ -203,7 +209,7 @@ fn generate_field_info_json( simple_aliases: &AliasMap, variable_info: &Vec, ) -> String { - let mut map = BTreeMap::new(); + let mut node_types_json = BTreeMap::new(); for (i, info) in variable_info.iter().enumerate() { let variable = &syntax_grammar.variables[i]; if !variable.kind.is_visible() || info.fields.is_empty() { @@ -214,60 +220,68 @@ fn generate_field_info_json( .get(&Symbol::non_terminal(i)) .map_or(&variable.name, |alias| &alias.value); - let fields = map.entry(name.clone()).or_insert_with(|| BTreeMap::new()); - for (field, field_info) in info.fields.iter() { - let field_info_json = fields.entry(field.clone()).or_insert(FieldInfoJSON { - multiple: false, - required: true, - types: Vec::new(), - }); + let node_type_json = node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON::default()); - field_info_json.multiple |= field_info.multiple; - field_info_json.required &= field_info.required; - field_info_json.types.extend(field_info.types.iter().map( - |child_type| match child_type { - ChildType::Aliased(alias) => FieldTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - }, - ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { - FieldTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - } - } else { - match symbol.kind { - SymbolType::NonTerminal => { - let variable = &syntax_grammar.variables[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } + if info.fields.len() > 0 { + let mut fields_json = BTreeMap::new(); + for (field, field_info) in info.fields.iter() { + let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + }); + + field_info_json.multiple |= field_info.multiple; + field_info_json.required &= field_info.required; + field_info_json.types.extend(field_info.types.iter().map( + |child_type| match child_type { + ChildType::Aliased(alias) => NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, } - SymbolType::Terminal => { - let variable = &lexical_grammar.variables[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } } - } - SymbolType::External => { - let variable = &syntax_grammar.external_tokens[symbol.index]; - FieldTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), } - _ => panic!("Unexpected symbol type"), } } - } - }, - )); - field_info_json.types.sort_unstable(); - field_info_json.types.dedup(); + }, + )); + field_info_json.types.sort_unstable(); + field_info_json.types.dedup(); + } + node_type_json.fields = Some(fields_json); } + } - serde_json::to_string_pretty(&map).unwrap() + serde_json::to_string_pretty(&node_types_json).unwrap() } diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index 5b244c87..ce4b881a 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -71,6 +71,7 @@ struct GrammarJSON { externals: Option>, extras: Option>, inline: Option>, + supertypes: Option>, word: Option, } @@ -100,6 +101,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { .collect(); let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new()); let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new()); + let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new()); Ok(InputGrammar { name: grammar_json.name, @@ -108,6 +110,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result { extra_tokens, expected_conflicts, external_tokens, + supertype_symbols, variables_to_inline, }) } diff --git a/cli/src/generate/prepare_grammar/expand_repeats.rs b/cli/src/generate/prepare_grammar/expand_repeats.rs index b290799b..ec0deb75 100644 --- a/cli/src/generate/prepare_grammar/expand_repeats.rs +++ b/cli/src/generate/prepare_grammar/expand_repeats.rs @@ -235,6 +235,7 @@ mod tests { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs index 79ea5e67..9a0b7fbb 100644 --- a/cli/src/generate/prepare_grammar/extract_simple_aliases.rs +++ b/cli/src/generate/prepare_grammar/extract_simple_aliases.rs @@ -149,6 +149,7 @@ mod tests { extra_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), external_tokens: Vec::new(), word_token: None, }; diff --git a/cli/src/generate/prepare_grammar/extract_tokens.rs b/cli/src/generate/prepare_grammar/extract_tokens.rs index 88afb50f..d1264b6e 100644 --- a/cli/src/generate/prepare_grammar/extract_tokens.rs +++ b/cli/src/generate/prepare_grammar/extract_tokens.rs @@ -77,6 +77,12 @@ pub(super) fn extract_tokens( }) .collect(); + let supertype_symbols = grammar + .supertype_symbols + .into_iter() + .map(|symbol| symbol_replacer.replace_symbol(symbol)) + .collect(); + let variables_to_inline = grammar .variables_to_inline .into_iter() @@ -154,6 +160,7 @@ pub(super) fn extract_tokens( expected_conflicts, extra_tokens, variables_to_inline, + supertype_symbols, external_tokens, word_token, }, @@ -519,6 +526,7 @@ mod test { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/flatten_grammar.rs b/cli/src/generate/prepare_grammar/flatten_grammar.rs index 1c050a6b..af93a82d 100644 --- a/cli/src/generate/prepare_grammar/flatten_grammar.rs +++ b/cli/src/generate/prepare_grammar/flatten_grammar.rs @@ -203,6 +203,7 @@ unless they are used only as the grammar's start rule. expected_conflicts: grammar.expected_conflicts, variables_to_inline: grammar.variables_to_inline, external_tokens: grammar.external_tokens, + supertype_symbols: grammar.supertype_symbols, word_token: grammar.word_token, variables, }) diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index d742864c..54abdf83 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -35,6 +35,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result extra_tokens.push(interner.intern_rule(extra_token)?); } + let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len()); + for supertype_symbol_name in grammar.supertype_symbols.iter() { + supertype_symbols.push( + interner + .intern_name(supertype_symbol_name) + .ok_or_else(|| Error::undefined_symbol(supertype_symbol_name))?, + ); + } + let mut expected_conflicts = Vec::new(); for conflict in grammar.expected_conflicts.iter() { let mut interned_conflict = Vec::with_capacity(conflict.len()); @@ -64,12 +73,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } + eprintln!("supertype_symbols: {:?}", supertype_symbols); + Ok(InternedGrammar { variables, external_tokens, extra_tokens, expected_conflicts, variables_to_inline, + supertype_symbols, word_token, }) } @@ -230,6 +242,7 @@ mod tests { external_tokens: Vec::new(), expected_conflicts: Vec::new(), variables_to_inline: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, } } diff --git a/cli/src/generate/prepare_grammar/mod.rs b/cli/src/generate/prepare_grammar/mod.rs index 41f668f4..a574aefb 100644 --- a/cli/src/generate/prepare_grammar/mod.rs +++ b/cli/src/generate/prepare_grammar/mod.rs @@ -25,6 +25,7 @@ pub(crate) struct IntermediateGrammar { expected_conflicts: Vec>, external_tokens: Vec, variables_to_inline: Vec, + supertype_symbols: Vec, word_token: Option, } diff --git a/cli/src/generate/prepare_grammar/process_inlines.rs b/cli/src/generate/prepare_grammar/process_inlines.rs index e067cd9e..e7c1b3e8 100644 --- a/cli/src/generate/prepare_grammar/process_inlines.rs +++ b/cli/src/generate/prepare_grammar/process_inlines.rs @@ -198,6 +198,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, variables_to_inline: vec![Symbol::non_terminal(1)], variables: vec![ @@ -328,6 +329,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, }; let inline_map = process_inlines(&grammar); @@ -429,6 +431,7 @@ mod tests { expected_conflicts: Vec::new(), extra_tokens: Vec::new(), external_tokens: Vec::new(), + supertype_symbols: Vec::new(), word_token: None, }; diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index e358f4fa..7ea5acdd 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -1,6 +1,6 @@ use super::nfa::CharacterSet; use super::rules::{Alias, Associativity, Symbol}; -use hashbrown::{HashMap, HashSet}; +use hashbrown::HashMap; use std::collections::BTreeMap; pub(crate) type ProductionInfoId = usize; @@ -52,7 +52,7 @@ pub(crate) struct ProductionInfo { pub field_map: BTreeMap>, } -#[derive(Clone, Debug, PartialEq, Eq, Hash)] +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] pub(crate) enum ChildType { Normal(Symbol), Aliased(Alias), @@ -62,13 +62,15 @@ pub(crate) enum ChildType { pub(crate) struct FieldInfo { pub required: bool, pub multiple: bool, - pub types: HashSet, + pub types: Vec, } #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub child_types: HashSet, + pub subclasses: Vec, + pub child_types: Vec, + pub has_multi_step_production: bool, } #[derive(Debug, PartialEq, Eq)] From 6c65d7481026781bc5e7abb5238d0261300573e8 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 Mar 2019 13:43:10 -0700 Subject: [PATCH 21/25] Restructure node-types.json output --- .../build_tables/build_parse_table.rs | 4 +- cli/src/generate/mod.rs | 142 +++++++++++------- .../prepare_grammar/intern_symbols.rs | 2 - cli/src/generate/tables.rs | 1 - 4 files changed, 87 insertions(+), 62 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 9f9413a3..53f59221 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -759,7 +759,6 @@ pub(crate) fn get_variable_info( for (i, variable) in syntax_grammar.variables.iter().enumerate() { let mut info = VariableInfo { fields: HashMap::new(), - subclasses: Vec::new(), child_types: Vec::new(), has_multi_step_production: false, }; @@ -828,7 +827,8 @@ pub(crate) fn get_variable_info( for production in &variable.productions { for step in &production.steps { let child_symbol = step.symbol; - if child_symbol.kind == SymbolType::NonTerminal + if step.alias.is_none() + && child_symbol.kind == SymbolType::NonTerminal && !syntax_grammar.variables[child_symbol.index] .kind .is_visible() diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 68eb41b2..9e640423 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -35,7 +35,7 @@ lazy_static! { struct GeneratedParser { name: String, c_code: String, - fields_json: String, + node_types_json: String, } pub fn generate_parser_in_directory( @@ -65,12 +65,12 @@ pub fn generate_parser_in_directory( let GeneratedParser { name: language_name, c_code, - fields_json, + node_types_json, } = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?; fs::write(&repo_src_path.join("parser.c"), c_code) .map_err(|e| format!("Failed to write parser.c: {}", e))?; - fs::write(&repo_src_path.join("node-fields.json"), fields_json) + fs::write(&repo_src_path.join("node-types.json"), node_types_json) .map_err(|e| format!("Failed to write parser.c: {}", e))?; fs::write( &repo_header_path.join("parser.h"), @@ -112,7 +112,7 @@ fn generate_parser_for_grammar_with_opts( state_ids_to_log, )?; let name = input_grammar.name; - let fields_json = generate_field_info_json( + let node_types_json = generate_node_types_json( &syntax_grammar, &lexical_grammar, &simple_aliases, @@ -131,7 +131,7 @@ fn generate_parser_for_grammar_with_opts( Ok(GeneratedParser { name, c_code, - fields_json, + node_types_json, }) } @@ -188,12 +188,17 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> #[derive(Debug, Serialize, PartialEq, Eq, Default)] struct NodeInfoJSON { + #[serde(rename = "type")] + name: String, + #[serde(skip_serializing_if = "Option::is_none")] fields: Option>, + #[serde(skip_serializing_if = "Option::is_none")] subtypes: Option>, } #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] struct NodeTypeJSON { + #[serde(rename = "type")] kind: String, named: bool, } @@ -205,28 +210,87 @@ struct FieldInfoJSON { types: Vec, } -fn generate_field_info_json( +fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, variable_info: &Vec, ) -> String { let mut node_types_json = BTreeMap::new(); - for (i, info) in variable_info.iter().enumerate() { - let variable = &syntax_grammar.variables[i]; - if !variable.kind.is_visible() || info.fields.is_empty() { - continue; - } + let child_type_to_node_type = |child_type: &ChildType| match child_type { + ChildType::Aliased(alias) => NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + } + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), + } + } + } + }; + + for (i, info) in variable_info.iter().enumerate() { + let symbol = Symbol::non_terminal(i); + let variable = &syntax_grammar.variables[i]; let name = simple_aliases .get(&Symbol::non_terminal(i)) .map_or(&variable.name, |alias| &alias.value); - let node_type_json = node_types_json - .entry(name.clone()) - .or_insert_with(|| NodeInfoJSON::default()); + if syntax_grammar.supertype_symbols.contains(&symbol) { + let node_type_json = + node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON { + name: name.clone(), + fields: None, + subtypes: None, + }); + let mut subtypes = info.child_types + .iter() + .map(child_type_to_node_type) + .collect::>(); + subtypes.sort_unstable(); + subtypes.dedup(); + node_type_json.subtypes = Some(subtypes); - if info.fields.len() > 0 { + } else if variable.kind.is_visible() { + let node_type_json = + node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON { + name: name.clone(), + fields: None, + subtypes: None, + }); let mut fields_json = BTreeMap::new(); for (field, field_info) in info.fields.iter() { let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { @@ -237,53 +301,17 @@ fn generate_field_info_json( field_info_json.multiple |= field_info.multiple; field_info_json.required &= field_info.required; - field_info_json.types.extend(field_info.types.iter().map( - |child_type| match child_type { - ChildType::Aliased(alias) => NodeTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - }, - ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { - NodeTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - } - } else { - match symbol.kind { - SymbolType::NonTerminal => { - let variable = &syntax_grammar.variables[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - SymbolType::Terminal => { - let variable = &lexical_grammar.variables[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - SymbolType::External => { - let variable = &syntax_grammar.external_tokens[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - _ => panic!("Unexpected symbol type"), - } - } - } - }, - )); + field_info_json + .types + .extend(field_info.types.iter().map(child_type_to_node_type)); field_info_json.types.sort_unstable(); field_info_json.types.dedup(); } node_type_json.fields = Some(fields_json); } - } + + let node_types_json = node_types_json.values().collect::>(); + serde_json::to_string_pretty(&node_types_json).unwrap() } diff --git a/cli/src/generate/prepare_grammar/intern_symbols.rs b/cli/src/generate/prepare_grammar/intern_symbols.rs index 54abdf83..815eefe0 100644 --- a/cli/src/generate/prepare_grammar/intern_symbols.rs +++ b/cli/src/generate/prepare_grammar/intern_symbols.rs @@ -73,8 +73,6 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result ); } - eprintln!("supertype_symbols: {:?}", supertype_symbols); - Ok(InternedGrammar { variables, external_tokens, diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 7ea5acdd..929ba83e 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -68,7 +68,6 @@ pub(crate) struct FieldInfo { #[derive(Debug, Default, PartialEq, Eq)] pub(crate) struct VariableInfo { pub fields: HashMap, - pub subclasses: Vec, pub child_types: Vec, pub has_multi_step_production: bool, } From af09e65efc5a571b89c33378e913691b9f364fd2 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 Mar 2019 14:42:32 -0700 Subject: [PATCH 22/25] Move node types code to its own module --- cli/src/generate/mod.rs | 140 +-------------------------------- cli/src/generate/node_types.rs | 133 +++++++++++++++++++++++++++++++ 2 files changed, 136 insertions(+), 137 deletions(-) create mode 100644 cli/src/generate/node_types.rs diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 9e640423..b5c4c0e4 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,15 +1,10 @@ use self::build_tables::build_tables; -use self::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; use self::parse_grammar::parse_grammar; use self::prepare_grammar::prepare_grammar; use self::render::render_c_code; -use self::rules::{AliasMap, Symbol, SymbolType}; -use self::tables::{ChildType, VariableInfo}; use crate::error::{Error, Result}; use lazy_static::lazy_static; use regex::{Regex, RegexBuilder}; -use serde_derive::Serialize; -use std::collections::BTreeMap; use std::fs; use std::io::Write; use std::path::{Path, PathBuf}; @@ -18,6 +13,7 @@ use std::process::{Command, Stdio}; mod build_tables; mod grammars; mod nfa; +mod node_types; mod npm_files; mod parse_grammar; mod prepare_grammar; @@ -112,7 +108,7 @@ fn generate_parser_for_grammar_with_opts( state_ids_to_log, )?; let name = input_grammar.name; - let node_types_json = generate_node_types_json( + let node_types_json = node_types::generate_node_types_json( &syntax_grammar, &lexical_grammar, &simple_aliases, @@ -131,7 +127,7 @@ fn generate_parser_for_grammar_with_opts( Ok(GeneratedParser { name, c_code, - node_types_json, + node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), }) } @@ -185,133 +181,3 @@ fn ensure_file>(path: &PathBuf, f: impl Fn() -> T) -> Result<()> .map_err(|e| Error(format!("Failed to write file {:?}: {}", path, e))) } } - -#[derive(Debug, Serialize, PartialEq, Eq, Default)] -struct NodeInfoJSON { - #[serde(rename = "type")] - name: String, - #[serde(skip_serializing_if = "Option::is_none")] - fields: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - subtypes: Option>, -} - -#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] -struct NodeTypeJSON { - #[serde(rename = "type")] - kind: String, - named: bool, -} - -#[derive(Debug, Serialize, PartialEq, Eq)] -struct FieldInfoJSON { - multiple: bool, - required: bool, - types: Vec, -} - -fn generate_node_types_json( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, - simple_aliases: &AliasMap, - variable_info: &Vec, -) -> String { - let mut node_types_json = BTreeMap::new(); - - let child_type_to_node_type = |child_type: &ChildType| match child_type { - ChildType::Aliased(alias) => NodeTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - }, - ChildType::Normal(symbol) => { - if let Some(alias) = simple_aliases.get(&symbol) { - NodeTypeJSON { - kind: alias.value.clone(), - named: alias.is_named, - } - } else { - match symbol.kind { - SymbolType::NonTerminal => { - let variable = &syntax_grammar.variables[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - SymbolType::Terminal => { - let variable = &lexical_grammar.variables[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - SymbolType::External => { - let variable = &syntax_grammar.external_tokens[symbol.index]; - NodeTypeJSON { - kind: variable.name.clone(), - named: variable.kind == VariableType::Named, - } - } - _ => panic!("Unexpected symbol type"), - } - } - } - }; - - for (i, info) in variable_info.iter().enumerate() { - let symbol = Symbol::non_terminal(i); - let variable = &syntax_grammar.variables[i]; - let name = simple_aliases - .get(&Symbol::non_terminal(i)) - .map_or(&variable.name, |alias| &alias.value); - - if syntax_grammar.supertype_symbols.contains(&symbol) { - let node_type_json = - node_types_json - .entry(name.clone()) - .or_insert_with(|| NodeInfoJSON { - name: name.clone(), - fields: None, - subtypes: None, - }); - let mut subtypes = info.child_types - .iter() - .map(child_type_to_node_type) - .collect::>(); - subtypes.sort_unstable(); - subtypes.dedup(); - node_type_json.subtypes = Some(subtypes); - - } else if variable.kind.is_visible() { - let node_type_json = - node_types_json - .entry(name.clone()) - .or_insert_with(|| NodeInfoJSON { - name: name.clone(), - fields: None, - subtypes: None, - }); - let mut fields_json = BTreeMap::new(); - for (field, field_info) in info.fields.iter() { - let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { - multiple: false, - required: true, - types: Vec::new(), - }); - - field_info_json.multiple |= field_info.multiple; - field_info_json.required &= field_info.required; - field_info_json - .types - .extend(field_info.types.iter().map(child_type_to_node_type)); - field_info_json.types.sort_unstable(); - field_info_json.types.dedup(); - } - node_type_json.fields = Some(fields_json); - } - } - - let node_types_json = node_types_json.values().collect::>(); - - serde_json::to_string_pretty(&node_types_json).unwrap() -} diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs new file mode 100644 index 00000000..35129189 --- /dev/null +++ b/cli/src/generate/node_types.rs @@ -0,0 +1,133 @@ +use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; +use super::rules::{AliasMap, Symbol, SymbolType}; +use super::tables::{ChildType, VariableInfo}; +use serde_derive::Serialize; +use std::collections::BTreeMap; + +#[derive(Debug, Serialize, PartialEq, Eq, Default)] +pub(crate) struct NodeInfoJSON { + #[serde(rename = "type")] + name: String, + #[serde(skip_serializing_if = "Option::is_none")] + fields: Option>, + #[serde(skip_serializing_if = "Option::is_none")] + subtypes: Option>, +} + +#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] +pub(crate) struct NodeTypeJSON { + #[serde(rename = "type")] + kind: String, + named: bool, +} + +#[derive(Debug, Serialize, PartialEq, Eq)] +pub(crate) struct FieldInfoJSON { + multiple: bool, + required: bool, + types: Vec, +} + +pub(crate) fn generate_node_types_json( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, + simple_aliases: &AliasMap, + variable_info: &Vec, +) -> Vec { + let mut node_types_json = BTreeMap::new(); + + let child_type_to_node_type = |child_type: &ChildType| match child_type { + ChildType::Aliased(alias) => NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + }, + ChildType::Normal(symbol) => { + if let Some(alias) = simple_aliases.get(&symbol) { + NodeTypeJSON { + kind: alias.value.clone(), + named: alias.is_named, + } + } else { + match symbol.kind { + SymbolType::NonTerminal => { + let variable = &syntax_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::Terminal => { + let variable = &lexical_grammar.variables[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + SymbolType::External => { + let variable = &syntax_grammar.external_tokens[symbol.index]; + NodeTypeJSON { + kind: variable.name.clone(), + named: variable.kind == VariableType::Named, + } + } + _ => panic!("Unexpected symbol type"), + } + } + } + }; + + for (i, info) in variable_info.iter().enumerate() { + let symbol = Symbol::non_terminal(i); + let variable = &syntax_grammar.variables[i]; + let name = simple_aliases + .get(&Symbol::non_terminal(i)) + .map_or(&variable.name, |alias| &alias.value); + + if syntax_grammar.supertype_symbols.contains(&symbol) { + let node_type_json = + node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON { + name: name.clone(), + fields: None, + subtypes: None, + }); + let mut subtypes = info + .child_types + .iter() + .map(child_type_to_node_type) + .collect::>(); + subtypes.sort_unstable(); + subtypes.dedup(); + node_type_json.subtypes = Some(subtypes); + } else if variable.kind.is_visible() { + let node_type_json = + node_types_json + .entry(name.clone()) + .or_insert_with(|| NodeInfoJSON { + name: name.clone(), + fields: None, + subtypes: None, + }); + let mut fields_json = BTreeMap::new(); + for (field, field_info) in info.fields.iter() { + let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON { + multiple: false, + required: true, + types: Vec::new(), + }); + + field_info_json.multiple |= field_info.multiple; + field_info_json.required &= field_info.required; + field_info_json + .types + .extend(field_info.types.iter().map(child_type_to_node_type)); + field_info_json.types.sort_unstable(); + field_info_json.types.dedup(); + } + node_type_json.fields = Some(fields_json); + } + } + + node_types_json.into_iter().map(|e| e.1).collect() +} From e9afdd72b44d022067adf8fa83b35070f26401b5 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 Mar 2019 15:27:07 -0700 Subject: [PATCH 23/25] node-types: Fix incorrect `named` value for subclasses --- cli/src/generate/build_tables/mod.rs | 4 +- cli/src/generate/node_types.rs | 174 ++++++++++++++++++++++++++- 2 files changed, 173 insertions(+), 5 deletions(-) diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 7f8b1a45..4b357f47 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -1,5 +1,5 @@ -mod build_lex_table; -mod build_parse_table; +pub(crate) mod build_lex_table; +pub(crate) mod build_parse_table; mod coincident_tokens; mod item; mod item_set_builder; diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 35129189..f9aa1738 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -53,21 +53,21 @@ pub(crate) fn generate_node_types_json( let variable = &syntax_grammar.variables[symbol.index]; NodeTypeJSON { kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + named: variable.kind != VariableType::Anonymous, } } SymbolType::Terminal => { let variable = &lexical_grammar.variables[symbol.index]; NodeTypeJSON { kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + named: variable.kind != VariableType::Anonymous, } } SymbolType::External => { let variable = &syntax_grammar.external_tokens[symbol.index]; NodeTypeJSON { kind: variable.name.clone(), - named: variable.kind == VariableType::Named, + named: variable.kind != VariableType::Anonymous, } } _ => panic!("Unexpected symbol type"), @@ -131,3 +131,171 @@ pub(crate) fn generate_node_types_json( node_types_json.into_iter().map(|e| e.1).collect() } + +#[cfg(test)] +mod tests { + use super::*; + use crate::generate::build_tables::build_parse_table::get_variable_info; + use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + use crate::generate::prepare_grammar::prepare_grammar; + use crate::generate::rules::Rule; + + #[test] + fn test_node_types_simple() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec![], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::seq(vec![ + Rule::field("f1".to_string(), Rule::named("v2")), + Rule::field("f2".to_string(), Rule::string(";")), + ]), + }, + Variable { + name: "v2".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + name: "v1".to_string(), + subtypes: None, + fields: Some( + vec![ + ( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "v2".to_string(), + named: true, + }] + } + ), + ( + "f2".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: ";".to_string(), + named: false, + }] + } + ), + ] + .into_iter() + .collect() + ) + } + ); + } + + #[test] + fn test_node_types_with_supertypes() { + let node_types = get_node_types(InputGrammar { + name: String::new(), + extra_tokens: Vec::new(), + external_tokens: Vec::new(), + expected_conflicts: Vec::new(), + variables_to_inline: Vec::new(), + word_token: None, + supertype_symbols: vec!["_v2".to_string()], + variables: vec![ + Variable { + name: "v1".to_string(), + kind: VariableType::Named, + rule: Rule::field("f1".to_string(), Rule::named("_v2")), + }, + Variable { + name: "_v2".to_string(), + kind: VariableType::Hidden, + rule: Rule::choice(vec![ + Rule::named("v3"), + Rule::named("v4"), + Rule::string("*"), + ]), + }, + Variable { + name: "v3".to_string(), + kind: VariableType::Named, + rule: Rule::string("x"), + }, + Variable { + name: "v4".to_string(), + kind: VariableType::Named, + rule: Rule::string("y"), + }, + ], + }); + + assert_eq!( + node_types[0], + NodeInfoJSON { + name: "_v2".to_string(), + fields: None, + subtypes: Some(vec![ + NodeTypeJSON { + kind: "*".to_string(), + named: false, + }, + NodeTypeJSON { + kind: "v3".to_string(), + named: true, + }, + NodeTypeJSON { + kind: "v4".to_string(), + named: true, + }, + ]), + } + ); + assert_eq!( + node_types[1], + NodeInfoJSON { + name: "v1".to_string(), + subtypes: None, + fields: Some( + vec![( + "f1".to_string(), + FieldInfoJSON { + multiple: false, + required: true, + types: vec![NodeTypeJSON { + kind: "_v2".to_string(), + named: true, + }] + } + ),] + .into_iter() + .collect() + ) + } + ); + } + + fn get_node_types(grammar: InputGrammar) -> Vec { + let (syntax_grammar, lexical_grammar, _, simple_aliases) = + prepare_grammar(&grammar).unwrap(); + let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap(); + generate_node_types_json( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &variable_info, + ) + } +} From 451478c620552ad301217fe02ad25a7123ee52ed Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 26 Mar 2019 16:57:55 -0700 Subject: [PATCH 24/25] Include tokens in node-types.json --- cli/src/generate/node_types.rs | 58 ++++++++++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 7 deletions(-) diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index f9aa1738..7e87b3b0 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -7,7 +7,8 @@ use std::collections::BTreeMap; #[derive(Debug, Serialize, PartialEq, Eq, Default)] pub(crate) struct NodeInfoJSON { #[serde(rename = "type")] - name: String, + kind: String, + named: bool, #[serde(skip_serializing_if = "Option::is_none")] fields: Option>, #[serde(skip_serializing_if = "Option::is_none")] @@ -88,7 +89,8 @@ pub(crate) fn generate_node_types_json( node_types_json .entry(name.clone()) .or_insert_with(|| NodeInfoJSON { - name: name.clone(), + kind: name.clone(), + named: true, fields: None, subtypes: None, }); @@ -105,7 +107,8 @@ pub(crate) fn generate_node_types_json( node_types_json .entry(name.clone()) .or_insert_with(|| NodeInfoJSON { - name: name.clone(), + kind: name.clone(), + named: true, fields: None, subtypes: None, }); @@ -129,7 +132,27 @@ pub(crate) fn generate_node_types_json( } } - node_types_json.into_iter().map(|e| e.1).collect() + let mut result = node_types_json.into_iter().map(|e| e.1).collect::>(); + + for variable in &lexical_grammar.variables { + if variable.kind == VariableType::Named { + result.push(NodeInfoJSON { + kind: variable.name.clone(), + named: true, + fields: None, + subtypes: None, + }); + } else if variable.kind == VariableType::Anonymous { + result.push(NodeInfoJSON { + kind: variable.name.clone(), + named: false, + fields: None, + subtypes: None, + }); + } + } + + result } #[cfg(test)] @@ -170,7 +193,8 @@ mod tests { assert_eq!( node_types[0], NodeInfoJSON { - name: "v1".to_string(), + kind: "v1".to_string(), + named: true, subtypes: None, fields: Some( vec![ @@ -202,6 +226,24 @@ mod tests { ) } ); + assert_eq!( + node_types[1], + NodeInfoJSON { + kind: ";".to_string(), + named: false, + subtypes: None, + fields: None + } + ); + assert_eq!( + node_types[2], + NodeInfoJSON { + kind: "v2".to_string(), + named: true, + subtypes: None, + fields: None + } + ); } #[test] @@ -245,7 +287,8 @@ mod tests { assert_eq!( node_types[0], NodeInfoJSON { - name: "_v2".to_string(), + kind: "_v2".to_string(), + named: true, fields: None, subtypes: Some(vec![ NodeTypeJSON { @@ -266,7 +309,8 @@ mod tests { assert_eq!( node_types[1], NodeInfoJSON { - name: "v1".to_string(), + kind: "v1".to_string(), + named: true, subtypes: None, fields: Some( vec![( From eb96dd6ddb828e56225d25b39c2ab851636a1e56 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 27 Mar 2019 16:17:02 -0700 Subject: [PATCH 25/25] node types: Preserve all supertypes in field type lists --- .../build_tables/build_parse_table.rs | 496 +---------------- cli/src/generate/build_tables/mod.rs | 11 +- cli/src/generate/mod.rs | 14 +- cli/src/generate/node_types.rs | 505 +++++++++++++++++- cli/src/generate/tables.rs | 21 - 5 files changed, 526 insertions(+), 521 deletions(-) diff --git a/cli/src/generate/build_tables/build_parse_table.rs b/cli/src/generate/build_tables/build_parse_table.rs index 53f59221..04ab3aae 100644 --- a/cli/src/generate/build_tables/build_parse_table.rs +++ b/cli/src/generate/build_tables/build_parse_table.rs @@ -4,10 +4,11 @@ use crate::error::{Error, Result}; use crate::generate::grammars::{ InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType, }; +use crate::generate::node_types::VariableInfo; use crate::generate::rules::{Associativity, Symbol, SymbolType}; use crate::generate::tables::{ - ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, - ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo, + FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry, + ProductionInfo, ProductionInfoId, }; use core::ops::Range; use hashbrown::hash_map::Entry; @@ -16,7 +17,7 @@ use std::collections::hash_map::DefaultHasher; use std::collections::{BTreeMap, VecDeque}; use std::fmt::Write; use std::hash::Hasher; -use std::{mem, u32}; +use std::u32; #[derive(Clone)] struct AuxiliarySymbolInfo { @@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> { item_set_builder: ParseItemSetBuilder<'a>, syntax_grammar: &'a SyntaxGrammar, lexical_grammar: &'a LexicalGrammar, + variable_info: &'a Vec, state_ids_by_item_set: HashMap, ParseStateId>, item_sets_by_state_id: Vec>, parse_state_queue: VecDeque, @@ -670,7 +672,7 @@ impl<'a> ParseTableBuilder<'a> { .kind .is_visible() { - let info = &self.parse_table.variable_info[step.symbol.index]; + let info = &self.variable_info[step.symbol.index]; for (field_name, _) in &info.fields { production_info .field_map @@ -748,261 +750,11 @@ fn populate_following_tokens( } } -pub(crate) fn get_variable_info( - syntax_grammar: &SyntaxGrammar, - lexical_grammar: &LexicalGrammar, -) -> Result> { - let mut result = Vec::new(); - - // Determine which field names and child node types can appear directly - // within each type of node. - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - let mut info = VariableInfo { - fields: HashMap::new(), - child_types: Vec::new(), - has_multi_step_production: false, - }; - let is_recursive = variable - .productions - .iter() - .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); - - for production in &variable.productions { - if production.steps.len() > 1 { - info.has_multi_step_production = true; - } - - for step in &production.steps { - let child_type = if let Some(alias) = &step.alias { - ChildType::Aliased(alias.clone()) - } else { - ChildType::Normal(step.symbol) - }; - - if let Some(field_name) = &step.field_name { - let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { - multiple: false, - required: true, - types: Vec::new(), - }); - field_info.multiple |= is_recursive; - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - } - } - - if let Err(i) = info.child_types.binary_search(&child_type) { - info.child_types.insert(i, child_type.clone()); - } - } - } - - for production in &variable.productions { - let production_fields: Vec<&String> = production - .steps - .iter() - .filter_map(|s| s.field_name.as_ref()) - .collect(); - for (field_name, field_info) in info.fields.iter_mut() { - if !production_fields.contains(&field_name) { - field_info.required = false; - } - } - } - - result.push(info); - } - - // Expand each node type's information recursively to inherit the properties of - // hidden children. - let mut done = false; - while !done { - done = true; - for (i, variable) in syntax_grammar.variables.iter().enumerate() { - // Move this variable's info out of the vector so it can be modified - // while reading from other entries of the vector. - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); - - for production in &variable.productions { - for step in &production.steps { - let child_symbol = step.symbol; - if step.alias.is_none() - && child_symbol.kind == SymbolType::NonTerminal - && !syntax_grammar.variables[child_symbol.index] - .kind - .is_visible() - { - let child_variable_info = &result[child_symbol.index]; - - if child_variable_info.has_multi_step_production { - variable_info.has_multi_step_production = true; - } - - // Inherit fields from this hidden child - for (field_name, child_field_info) in &child_variable_info.fields { - let field_info = variable_info - .fields - .entry(field_name.clone()) - .or_insert_with(|| { - done = false; - child_field_info.clone() - }); - if child_field_info.multiple && !field_info.multiple { - field_info.multiple = child_field_info.multiple; - done = false; - } - if !child_field_info.required && field_info.required { - field_info.required = child_field_info.required; - done = false; - } - for child_type in &child_field_info.types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } - - // Inherit child types from this hidden child - for child_type in &child_variable_info.child_types { - if let Err(i) = variable_info.child_types.binary_search(&child_type) { - variable_info.child_types.insert(i, child_type.clone()); - done = false; - } - } - - // If any field points to this hidden child, inherit child types - // for the field. - if let Some(field_name) = &step.field_name { - let field_info = variable_info.fields.get_mut(field_name).unwrap(); - for child_type in &child_variable_info.child_types { - if let Err(i) = field_info.types.binary_search(&child_type) { - field_info.types.insert(i, child_type.clone()); - done = false; - } - } - } - } - } - } - - // Move this variable's info back into the vector. - result[i] = variable_info; - } - } - - for supertype_symbol in &syntax_grammar.supertype_symbols { - let variable = &syntax_grammar.variables[supertype_symbol.index]; - if variable.kind != VariableType::Hidden { - return Err(Error::grammar(&format!( - "Supertype symbols must be hidden, but `{}` is not", - variable.name - ))); - } - - if result[supertype_symbol.index].has_multi_step_production { - return Err(Error::grammar(&format!( - "Supertype symbols must always have a single visible child, but `{}` can have multiple", - variable.name - ))); - } - } - - let child_type_is_visible = |child_type: &ChildType| match child_type { - ChildType::Aliased(_) => true, - ChildType::Normal(symbol) => { - let variable_kind = match symbol.kind { - SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, - SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, - SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, - _ => VariableType::Hidden, - }; - variable_kind.is_visible() - } - }; - - for supertype_symbol in &syntax_grammar.supertype_symbols { - result[supertype_symbol.index] - .child_types - .retain(child_type_is_visible); - } - - for i in 0..result.len() { - let mut variable_info = VariableInfo::default(); - mem::swap(&mut variable_info, &mut result[i]); - - // For each field, make the `types` list more concise by replacing sets of - // subtypes with a single supertype. - for (_, field_info) in variable_info.fields.iter_mut() { - for supertype_symbol in &syntax_grammar.supertype_symbols { - if sorted_vec_replace( - &mut field_info.types, - &result[supertype_symbol.index].child_types, - ChildType::Normal(*supertype_symbol), - ) { - break; - } - } - - field_info.types.retain(|t| { - if let ChildType::Normal(symbol) = t { - if syntax_grammar.supertype_symbols.contains(&symbol) { - return true; - } - } - child_type_is_visible(t) - }); - } - - result[i] = variable_info; - } - - Ok(result) -} - -fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool -where - T: Eq + Ord, -{ - let mut i = 0; - for right_elem in right.iter() { - while left[i] < *right_elem { - i += 1; - if i == left.len() { - return false; - } - } - if left[i] != *right_elem { - return false; - } - } - - i = 0; - left.retain(|left_elem| { - if i == right.len() { - return true; - } - while right[i] < *left_elem { - i += 1; - if i == right.len() { - return true; - } - } - right[i] != *left_elem - }); - - if let Err(i) = left.binary_search(&value) { - left.insert(i, value); - } - - true -} - pub(crate) fn build_parse_table( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, inlines: &InlinedProductionMap, + variable_info: &Vec, state_ids_to_log: Vec, ) -> Result<(ParseTable, Vec)> { let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines); @@ -1014,13 +766,12 @@ pub(crate) fn build_parse_table( &item_set_builder, ); - let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?; - let table = ParseTableBuilder { syntax_grammar, lexical_grammar, state_ids_to_log, item_set_builder, + variable_info, state_ids_by_item_set: HashMap::new(), item_sets_by_state_id: Vec::new(), parse_state_queue: VecDeque::new(), @@ -1029,240 +780,9 @@ pub(crate) fn build_parse_table( symbols: Vec::new(), production_infos: Vec::new(), max_aliased_production_length: 0, - variable_info, }, } .build()?; Ok((table, following_tokens)) } - -#[cfg(test)] -mod tests { - use super::*; - use crate::generate::grammars::{ - LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType, - }; - - #[test] - fn test_get_variable_info() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - // Required field `field1` has only one node type. - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)) - .with_field_name("field1"), - ], - }], - }, - // Hidden node - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(1))], - }], - }, - // Optional field `field2` can have two possible node types. - SyntaxVariable { - name: "rule2".to_string(), - kind: VariableType::Named, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(0))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(2)) - .with_field_name("field2"), - ], - }, - Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::terminal(3)) - .with_field_name("field2"), - ], - }, - ], - }, - ], - vec![], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(1))], - } - )] - .into_iter() - .collect::>() - ); - - assert_eq!( - variable_info[2].fields, - vec![( - "field2".to_string(), - FieldInfo { - required: false, - multiple: false, - types: vec![ - ChildType::Normal(Symbol::terminal(2)), - ChildType::Normal(Symbol::terminal(3)), - ], - } - )] - .into_iter() - .collect::>() - ); - } - - #[test] - fn test_get_variable_info_with_inherited_fields() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - // Hidden node with fields - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(2)), - ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), - ], - }], - }, - ], - vec![], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::terminal(3))], - } - )] - .into_iter() - .collect::>() - ); - } - - #[test] - fn test_get_variable_info_with_supertypes() { - let variable_info = get_variable_info( - &build_syntax_grammar( - vec![ - SyntaxVariable { - name: "rule0".to_string(), - kind: VariableType::Named, - productions: vec![Production { - dynamic_precedence: 0, - steps: vec![ - ProductionStep::new(Symbol::terminal(0)), - ProductionStep::new(Symbol::non_terminal(1)) - .with_field_name("field1"), - ProductionStep::new(Symbol::terminal(1)), - ], - }], - }, - SyntaxVariable { - name: "_rule1".to_string(), - kind: VariableType::Hidden, - productions: vec![ - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(2))], - }, - Production { - dynamic_precedence: 0, - steps: vec![ProductionStep::new(Symbol::terminal(3))], - }, - ], - }, - ], - // _rule1 is a supertype - vec![Symbol::non_terminal(1)], - ), - &build_lexical_grammar(), - ) - .unwrap(); - - assert_eq!( - variable_info[0].fields, - vec![( - "field1".to_string(), - FieldInfo { - required: true, - multiple: false, - types: vec![ChildType::Normal(Symbol::non_terminal(1))], - } - )] - .into_iter() - .collect::>() - ); - } - - fn build_syntax_grammar( - variables: Vec, - supertype_symbols: Vec, - ) -> SyntaxGrammar { - let mut syntax_grammar = SyntaxGrammar::default(); - syntax_grammar.variables = variables; - syntax_grammar.supertype_symbols = supertype_symbols; - syntax_grammar - } - - fn build_lexical_grammar() -> LexicalGrammar { - let mut lexical_grammar = LexicalGrammar::default(); - for i in 0..10 { - lexical_grammar.variables.push(LexicalVariable { - name: format!("token_{}", i), - kind: VariableType::Named, - implicit_precedence: 0, - start_state: 0, - }); - } - lexical_grammar - } -} diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 4b357f47..de28cda3 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap; use crate::error::Result; use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar}; use crate::generate::nfa::{CharacterSet, NfaCursor}; +use crate::generate::node_types::VariableInfo; use crate::generate::rules::{AliasMap, Symbol, SymbolType}; use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry}; use log::info; @@ -23,12 +24,18 @@ pub(crate) fn build_tables( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, simple_aliases: &AliasMap, + variable_info: &Vec, inlines: &InlinedProductionMap, minimize: bool, state_ids_to_log: Vec, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { - let (mut parse_table, following_tokens) = - build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?; + let (mut parse_table, following_tokens) = build_parse_table( + syntax_grammar, + lexical_grammar, + inlines, + variable_info, + state_ids_to_log, + )?; let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens); let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar); let keywords = identify_keywords( diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index b5c4c0e4..7ad15051 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -99,21 +99,23 @@ fn generate_parser_for_grammar_with_opts( let input_grammar = parse_grammar(grammar_json)?; let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = prepare_grammar(&input_grammar)?; + let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?; + let node_types_json = node_types::generate_node_types_json( + &syntax_grammar, + &lexical_grammar, + &simple_aliases, + &variable_info, + ); let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables( &syntax_grammar, &lexical_grammar, &simple_aliases, + &variable_info, &inlines, minimize, state_ids_to_log, )?; let name = input_grammar.name; - let node_types_json = node_types::generate_node_types_json( - &syntax_grammar, - &lexical_grammar, - &simple_aliases, - &parse_table.variable_info, - ); let c_code = render_c_code( &name, parse_table, diff --git a/cli/src/generate/node_types.rs b/cli/src/generate/node_types.rs index 7e87b3b0..f43ffce2 100644 --- a/cli/src/generate/node_types.rs +++ b/cli/src/generate/node_types.rs @@ -1,8 +1,30 @@ use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType}; -use super::rules::{AliasMap, Symbol, SymbolType}; -use super::tables::{ChildType, VariableInfo}; +use super::rules::{Alias, AliasMap, Symbol, SymbolType}; +use crate::error::{Error, Result}; +use hashbrown::HashMap; use serde_derive::Serialize; use std::collections::BTreeMap; +use std::mem; + +#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub(crate) enum ChildType { + Normal(Symbol), + Aliased(Alias), +} + +#[derive(Clone, Debug, Default, PartialEq, Eq)] +pub(crate) struct FieldInfo { + pub required: bool, + pub multiple: bool, + pub types: Vec, +} + +#[derive(Debug, Default, PartialEq, Eq)] +pub(crate) struct VariableInfo { + pub fields: HashMap, + pub child_types: Vec, + pub has_multi_step_production: bool, +} #[derive(Debug, Serialize, PartialEq, Eq, Default)] pub(crate) struct NodeInfoJSON { @@ -29,6 +51,258 @@ pub(crate) struct FieldInfoJSON { types: Vec, } +pub(crate) fn get_variable_info( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> Result> { + let mut result = Vec::new(); + + // Determine which field names and child node types can appear directly + // within each type of node. + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + let mut info = VariableInfo { + fields: HashMap::new(), + child_types: Vec::new(), + has_multi_step_production: false, + }; + let is_recursive = variable + .productions + .iter() + .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); + + for production in &variable.productions { + if production.steps.len() > 1 { + info.has_multi_step_production = true; + } + + for step in &production.steps { + let child_type = if let Some(alias) = &step.alias { + ChildType::Aliased(alias.clone()) + } else { + ChildType::Normal(step.symbol) + }; + + if let Some(field_name) = &step.field_name { + let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { + multiple: false, + required: true, + types: Vec::new(), + }); + field_info.multiple |= is_recursive; + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + } + } + + if let Err(i) = info.child_types.binary_search(&child_type) { + info.child_types.insert(i, child_type.clone()); + } + } + } + + for production in &variable.productions { + let production_fields: Vec<&String> = production + .steps + .iter() + .filter_map(|s| s.field_name.as_ref()) + .collect(); + for (field_name, field_info) in info.fields.iter_mut() { + if !production_fields.contains(&field_name) { + field_info.required = false; + } + } + } + + result.push(info); + } + + // Expand each node type's information recursively to inherit the properties of + // hidden children. + let mut done = false; + while !done { + done = true; + for (i, variable) in syntax_grammar.variables.iter().enumerate() { + // Move this variable's info out of the vector so it can be modified + // while reading from other entries of the vector. + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + for production in &variable.productions { + for step in &production.steps { + let child_symbol = step.symbol; + if step.alias.is_none() + && child_symbol.kind == SymbolType::NonTerminal + && !syntax_grammar.variables[child_symbol.index] + .kind + .is_visible() + { + let child_variable_info = &result[child_symbol.index]; + + // If a hidden child can have multiple children, then this + // node can appear to have multiple children. + if child_variable_info.has_multi_step_production { + variable_info.has_multi_step_production = true; + } + + // Inherit fields from this hidden child + for (field_name, child_field_info) in &child_variable_info.fields { + let field_info = variable_info + .fields + .entry(field_name.clone()) + .or_insert_with(|| { + done = false; + child_field_info.clone() + }); + if child_field_info.multiple && !field_info.multiple { + field_info.multiple = child_field_info.multiple; + done = false; + } + if !child_field_info.required && field_info.required { + field_info.required = child_field_info.required; + done = false; + } + for child_type in &child_field_info.types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + done = false; + } + } + } + + if !syntax_grammar.supertype_symbols.contains(&child_symbol) { + // Inherit child types from this hidden child + for child_type in &child_variable_info.child_types { + if let Err(i) = variable_info.child_types.binary_search(&child_type) + { + variable_info.child_types.insert(i, child_type.clone()); + done = false; + } + } + + // If any field points to this hidden child, inherit child types + // for the field. + if let Some(field_name) = &step.field_name { + let field_info = variable_info.fields.get_mut(field_name).unwrap(); + for child_type in &child_variable_info.child_types { + if let Err(i) = field_info.types.binary_search(&child_type) { + field_info.types.insert(i, child_type.clone()); + done = false; + } + } + } + } + } + } + } + + // Move this variable's info back into the vector. + result[i] = variable_info; + } + } + + for supertype_symbol in &syntax_grammar.supertype_symbols { + let variable = &syntax_grammar.variables[supertype_symbol.index]; + if variable.kind != VariableType::Hidden { + return Err(Error::grammar(&format!( + "Supertype symbols must be hidden, but `{}` is not", + variable.name + ))); + } + + if result[supertype_symbol.index].has_multi_step_production { + return Err(Error::grammar(&format!( + "Supertype symbols must always have a single visible child, but `{}` can have multiple", + variable.name + ))); + } + } + + let child_type_is_visible = |child_type: &ChildType| match child_type { + ChildType::Aliased(_) => true, + ChildType::Normal(symbol) => { + if syntax_grammar.supertype_symbols.contains(&symbol) { + return true; + } + let variable_kind = match symbol.kind { + SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, + SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, + SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, + _ => VariableType::Hidden, + }; + variable_kind.is_visible() + } + }; + + for supertype_symbol in &syntax_grammar.supertype_symbols { + result[supertype_symbol.index] + .child_types + .retain(child_type_is_visible); + } + + for i in 0..result.len() { + let mut variable_info = VariableInfo::default(); + mem::swap(&mut variable_info, &mut result[i]); + + // For each field, make the `types` list more concise by replacing sets of + // subtypes with a single supertype. + for (_, field_info) in variable_info.fields.iter_mut() { + for supertype_symbol in &syntax_grammar.supertype_symbols { + if sorted_vec_replace( + &mut field_info.types, + &result[supertype_symbol.index].child_types, + ChildType::Normal(*supertype_symbol), + ) { + break; + } + } + + field_info.types.retain(child_type_is_visible); + } + + result[i] = variable_info; + } + + Ok(result) +} + +fn sorted_vec_replace(left: &mut Vec, right: &Vec, value: T) -> bool +where + T: Eq + Ord, +{ + let mut i = 0; + for right_elem in right.iter() { + while left[i] < *right_elem { + i += 1; + if i == left.len() { + return false; + } + } + if left[i] != *right_elem { + return false; + } + } + + i = 0; + left.retain(|left_elem| { + if i == right.len() { + return true; + } + while right[i] < *left_elem { + i += 1; + if i == right.len() { + return true; + } + } + right[i] != *left_elem + }); + + if let Err(i) = left.binary_search(&value) { + left.insert(i, value); + } + + true +} + pub(crate) fn generate_node_types_json( syntax_grammar: &SyntaxGrammar, lexical_grammar: &LexicalGrammar, @@ -158,8 +432,9 @@ pub(crate) fn generate_node_types_json( #[cfg(test)] mod tests { use super::*; - use crate::generate::build_tables::build_parse_table::get_variable_info; - use crate::generate::grammars::{InputGrammar, Variable, VariableType}; + use crate::generate::grammars::{ + InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable, + }; use crate::generate::prepare_grammar::prepare_grammar; use crate::generate::rules::Rule; @@ -331,6 +606,205 @@ mod tests { ); } + #[test] + fn test_get_variable_info() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + // Required field `field1` has only one node type. + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ], + }], + }, + // Hidden node + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(1))], + }], + }, + // Optional field `field2` can have two possible node types. + SyntaxVariable { + name: "rule2".to_string(), + kind: VariableType::Named, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(0))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(2)) + .with_field_name("field2"), + ], + }, + Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::terminal(3)) + .with_field_name("field2"), + ], + }, + ], + }, + ], + vec![], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + + assert_eq!( + variable_info[2].fields, + vec![( + "field2".to_string(), + FieldInfo { + required: false, + multiple: false, + types: vec![ + ChildType::Normal(Symbol::terminal(2)), + ChildType::Normal(Symbol::terminal(3)), + ], + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_inherited_fields() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + // Hidden node with fields + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(2)), + ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), + ], + }], + }, + ], + vec![], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::terminal(3))], + } + )] + .into_iter() + .collect::>() + ); + } + + #[test] + fn test_get_variable_info_with_supertypes() { + let variable_info = get_variable_info( + &build_syntax_grammar( + vec![ + SyntaxVariable { + name: "rule0".to_string(), + kind: VariableType::Named, + productions: vec![Production { + dynamic_precedence: 0, + steps: vec![ + ProductionStep::new(Symbol::terminal(0)), + ProductionStep::new(Symbol::non_terminal(1)) + .with_field_name("field1"), + ProductionStep::new(Symbol::terminal(1)), + ], + }], + }, + SyntaxVariable { + name: "_rule1".to_string(), + kind: VariableType::Hidden, + productions: vec![ + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(2))], + }, + Production { + dynamic_precedence: 0, + steps: vec![ProductionStep::new(Symbol::terminal(3))], + }, + ], + }, + ], + // _rule1 is a supertype + vec![Symbol::non_terminal(1)], + ), + &build_lexical_grammar(), + ) + .unwrap(); + + assert_eq!( + variable_info[0].fields, + vec![( + "field1".to_string(), + FieldInfo { + required: true, + multiple: false, + types: vec![ChildType::Normal(Symbol::non_terminal(1))], + } + )] + .into_iter() + .collect::>() + ); + } + fn get_node_types(grammar: InputGrammar) -> Vec { let (syntax_grammar, lexical_grammar, _, simple_aliases) = prepare_grammar(&grammar).unwrap(); @@ -342,4 +816,27 @@ mod tests { &variable_info, ) } + + fn build_syntax_grammar( + variables: Vec, + supertype_symbols: Vec, + ) -> SyntaxGrammar { + let mut syntax_grammar = SyntaxGrammar::default(); + syntax_grammar.variables = variables; + syntax_grammar.supertype_symbols = supertype_symbols; + syntax_grammar + } + + fn build_lexical_grammar() -> LexicalGrammar { + let mut lexical_grammar = LexicalGrammar::default(); + for i in 0..10 { + lexical_grammar.variables.push(LexicalVariable { + name: format!("token_{}", i), + kind: VariableType::Named, + implicit_precedence: 0, + start_state: 0, + }); + } + lexical_grammar + } } diff --git a/cli/src/generate/tables.rs b/cli/src/generate/tables.rs index 929ba83e..1ee5dde8 100644 --- a/cli/src/generate/tables.rs +++ b/cli/src/generate/tables.rs @@ -52,31 +52,10 @@ pub(crate) struct ProductionInfo { pub field_map: BTreeMap>, } -#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -pub(crate) enum ChildType { - Normal(Symbol), - Aliased(Alias), -} - -#[derive(Clone, Debug, Default, PartialEq, Eq)] -pub(crate) struct FieldInfo { - pub required: bool, - pub multiple: bool, - pub types: Vec, -} - -#[derive(Debug, Default, PartialEq, Eq)] -pub(crate) struct VariableInfo { - pub fields: HashMap, - pub child_types: Vec, - pub has_multi_step_production: bool, -} - #[derive(Debug, PartialEq, Eq)] pub(crate) struct ParseTable { pub states: Vec, pub symbols: Vec, - pub variable_info: Vec, pub production_infos: Vec, pub max_aliased_production_length: usize, }