Fully implement ts_node_child_by_field_id
This commit is contained in:
parent
bef80c162e
commit
1d1674811c
15 changed files with 455 additions and 181 deletions
|
|
@ -6,14 +6,14 @@ use crate::generate::grammars::{
|
|||
};
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
ChildInfo, ChildInfoSequenceId, ParseAction, ParseState, ParseStateId, ParseTable,
|
||||
ChildInfo, ChildInfoId, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable,
|
||||
ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::u32;
|
||||
|
||||
use std::fmt::Write;
|
||||
|
|
@ -36,6 +36,7 @@ struct ParseStateQueueEntry {
|
|||
|
||||
struct ParseTableBuilder<'a> {
|
||||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
field_names_by_hidden_symbol: HashMap<Symbol, Vec<String>>,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
|
|
@ -48,7 +49,7 @@ struct ParseTableBuilder<'a> {
|
|||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.child_info_sequences.push(Vec::new());
|
||||
self.parse_table.child_infos.push(ChildInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
|
@ -177,7 +178,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
precedence: item.precedence(),
|
||||
associativity: item.associativity(),
|
||||
dynamic_precedence: item.production.dynamic_precedence,
|
||||
child_info_sequence_id: self.get_child_info_sequence_id(item),
|
||||
child_info_id: self.get_child_info_id(item),
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -646,34 +647,56 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_child_info_sequence_id(&mut self, item: &ParseItem) -> ChildInfoSequenceId {
|
||||
let mut child_info_sequence: Vec<ChildInfo> = item
|
||||
.production
|
||||
.steps
|
||||
.iter()
|
||||
.map(|s| ChildInfo {
|
||||
alias: s.alias.clone(),
|
||||
field_name: s.field_name.clone(),
|
||||
})
|
||||
.collect();
|
||||
while child_info_sequence.last() == Some(&ChildInfo::default()) {
|
||||
child_info_sequence.pop();
|
||||
fn get_child_info_id(&mut self, item: &ParseItem) -> ChildInfoId {
|
||||
let mut child_info = ChildInfo {
|
||||
alias_sequence: Vec::new(),
|
||||
field_map: BTreeMap::new(),
|
||||
};
|
||||
|
||||
for (i, step) in item.production.steps.iter().enumerate() {
|
||||
child_info.alias_sequence.push(step.alias.clone());
|
||||
if let Some(field_name) = &step.field_name {
|
||||
child_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: false,
|
||||
});
|
||||
}
|
||||
if let Some(field_names) = self.field_names_by_hidden_symbol.get(&step.symbol) {
|
||||
for field_name in field_names {
|
||||
child_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while child_info.alias_sequence.last() == Some(&None) {
|
||||
child_info.alias_sequence.pop();
|
||||
}
|
||||
|
||||
if item.production.steps.len() > self.parse_table.max_production_length_with_child_info {
|
||||
self.parse_table.max_production_length_with_child_info = item.production.steps.len()
|
||||
}
|
||||
|
||||
if let Some(index) = self
|
||||
.parse_table
|
||||
.child_info_sequences
|
||||
.child_infos
|
||||
.iter()
|
||||
.position(|seq| *seq == child_info_sequence)
|
||||
.position(|seq| *seq == child_info)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.parse_table
|
||||
.child_info_sequences
|
||||
.push(child_info_sequence);
|
||||
self.parse_table.child_info_sequences.len() - 1
|
||||
self.parse_table.child_infos.push(child_info);
|
||||
self.parse_table.child_infos.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -720,6 +743,26 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
fn field_names_by_hidden_symbol(grammar: &SyntaxGrammar) -> HashMap<Symbol, Vec<String>> {
|
||||
let mut result = HashMap::new();
|
||||
for (i, variable) in grammar.variables.iter().enumerate() {
|
||||
let mut field_names = Vec::new();
|
||||
if variable.kind == VariableType::Hidden {
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if let Some(field_name) = &step.field_name {
|
||||
if let Err(i) = field_names.binary_search(field_name) {
|
||||
field_names.insert(i, field_name.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
result.insert(Symbol::non_terminal(i), field_names);
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
|
|
@ -746,9 +789,10 @@ pub(crate) fn build_parse_table(
|
|||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
child_info_sequences: Vec::new(),
|
||||
child_infos: Vec::new(),
|
||||
max_production_length_with_child_info: 0,
|
||||
},
|
||||
field_names_by_hidden_symbol: field_names_by_hidden_symbol(syntax_grammar),
|
||||
}
|
||||
.build()?;
|
||||
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> {
|
|||
ParseAction::ShiftExtra => continue,
|
||||
ParseAction::Reduce {
|
||||
child_count: 1,
|
||||
child_info_sequence_id: 0,
|
||||
child_info_id: 0,
|
||||
symbol,
|
||||
..
|
||||
} => {
|
||||
|
|
|
|||
|
|
@ -1,7 +1,9 @@
|
|||
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use super::tables::{
|
||||
AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::fmt::Write;
|
||||
|
|
@ -70,12 +72,14 @@ impl Generator {
|
|||
self.add_symbol_names_list();
|
||||
self.add_symbol_metadata_list();
|
||||
|
||||
if self.parse_table.child_info_sequences.len() > 1 {
|
||||
if !self.field_names.is_empty() {
|
||||
self.add_field_name_enum();
|
||||
}
|
||||
if !self.field_names.is_empty() {
|
||||
self.add_field_name_enum();
|
||||
self.add_field_name_names_list();
|
||||
self.add_child_info_sequences();
|
||||
self.add_field_sequences();
|
||||
}
|
||||
|
||||
if !self.alias_ids.is_empty() {
|
||||
self.add_alias_sequences();
|
||||
}
|
||||
|
||||
let mut main_lex_table = LexTable::default();
|
||||
|
|
@ -109,13 +113,13 @@ impl Generator {
|
|||
}
|
||||
|
||||
let mut field_names = Vec::new();
|
||||
for child_info_sequence in &self.parse_table.child_info_sequences {
|
||||
for entry in child_info_sequence {
|
||||
if let Some(field_name) = &entry.field_name {
|
||||
field_names.push(field_name);
|
||||
}
|
||||
for child_info in &self.parse_table.child_infos {
|
||||
for field_name in child_info.field_map.keys() {
|
||||
field_names.push(field_name);
|
||||
}
|
||||
|
||||
if let Some(alias) = &entry.alias {
|
||||
for alias in &child_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
let alias_kind = if alias.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
|
|
@ -350,22 +354,22 @@ impl Generator {
|
|||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_child_info_sequences(&mut self) {
|
||||
fn add_alias_sequences(&mut self) {
|
||||
add_line!(
|
||||
self,
|
||||
"static TSSymbol ts_alias_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
|
||||
self.parse_table.child_info_sequences.len()
|
||||
self.parse_table.child_infos.len()
|
||||
);
|
||||
indent!(self);
|
||||
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
|
||||
if sequence.iter().all(|i| i.alias.is_none()) {
|
||||
for (i, child_info) in self.parse_table.child_infos.iter().enumerate() {
|
||||
if child_info.alias_sequence.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
indent!(self);
|
||||
for (j, child_info) in sequence.iter().enumerate() {
|
||||
if let Some(alias) = &child_info.alias {
|
||||
for (j, alias) in child_info.alias_sequence.iter().enumerate() {
|
||||
if let Some(alias) = alias {
|
||||
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
|
||||
}
|
||||
}
|
||||
|
|
@ -375,28 +379,66 @@ impl Generator {
|
|||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
"static TSFieldId ts_field_sequences[{}][MAX_CHILD_INFO_PRODUCTION_LENGTH] = {{",
|
||||
self.parse_table.child_info_sequences.len()
|
||||
fn add_field_sequences(&mut self) {
|
||||
let mut flat_field_maps = vec![];
|
||||
let mut next_flat_field_map_index = self.parse_table.child_infos.len();
|
||||
self.get_field_map_id(
|
||||
&Vec::new(),
|
||||
&mut flat_field_maps,
|
||||
&mut next_flat_field_map_index,
|
||||
);
|
||||
indent!(self);
|
||||
for (i, sequence) in self.parse_table.child_info_sequences.iter().enumerate() {
|
||||
if sequence.iter().all(|i| i.field_name.is_none()) {
|
||||
continue;
|
||||
}
|
||||
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
indent!(self);
|
||||
for (j, child_info) in sequence.iter().enumerate() {
|
||||
if let Some(field_name) = &child_info.field_name {
|
||||
add_line!(self, "[{}] = {},", j, self.field_id(&field_name));
|
||||
let mut field_map_ids = Vec::new();
|
||||
for child_info in &self.parse_table.child_infos {
|
||||
if !child_info.field_map.is_empty() {
|
||||
let mut flat_field_map = Vec::new();
|
||||
for (field_name, locations) in &child_info.field_map {
|
||||
for location in locations {
|
||||
flat_field_map.push((field_name.clone(), *location));
|
||||
}
|
||||
}
|
||||
field_map_ids.push((
|
||||
self.get_field_map_id(
|
||||
&flat_field_map,
|
||||
&mut flat_field_maps,
|
||||
&mut next_flat_field_map_index,
|
||||
),
|
||||
flat_field_map.len(),
|
||||
));
|
||||
} else {
|
||||
field_map_ids.push((0, 0));
|
||||
}
|
||||
}
|
||||
|
||||
add_line!(self, "static const TSFieldMapping ts_field_map[] = {{",);
|
||||
indent!(self);
|
||||
|
||||
add_line!(self, "/* child info id -> (field map index, count) */");
|
||||
for (child_info_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
||||
if length > 0 {
|
||||
add_line!(self, "[{}] = {{{}, {}, 0}},", child_info_id, row_id, length);
|
||||
}
|
||||
}
|
||||
|
||||
add!(self, "\n");
|
||||
add_line!(self, "/* field id -> child index */");
|
||||
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
|
||||
add_line!(self, "[{}] =", row_index);
|
||||
indent!(self);
|
||||
for (field_name, location) in field_pairs {
|
||||
add_line!(
|
||||
self,
|
||||
"{{{}, {}, {}}},",
|
||||
self.field_id(&field_name),
|
||||
location.index,
|
||||
location.inherited
|
||||
);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}},");
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
|
|
@ -762,19 +804,15 @@ impl Generator {
|
|||
symbol,
|
||||
child_count,
|
||||
dynamic_precedence,
|
||||
child_info_sequence_id,
|
||||
child_info_id,
|
||||
..
|
||||
} => {
|
||||
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
|
||||
if dynamic_precedence != 0 {
|
||||
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
|
||||
}
|
||||
if child_info_sequence_id != 0 {
|
||||
add!(
|
||||
self,
|
||||
", .child_info_sequence_id = {}",
|
||||
child_info_sequence_id
|
||||
);
|
||||
if child_info_id != 0 {
|
||||
add!(self, ", .child_info_id = {}", child_info_id);
|
||||
}
|
||||
add!(self, ")");
|
||||
}
|
||||
|
|
@ -839,17 +877,17 @@ impl Generator {
|
|||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".symbol_names = ts_symbol_names,");
|
||||
|
||||
if self.parse_table.child_info_sequences.len() > 1 {
|
||||
if !self.alias_ids.is_empty() {
|
||||
add_line!(
|
||||
self,
|
||||
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
|
||||
);
|
||||
}
|
||||
|
||||
add_line!(self, ".field_count = FIELD_COUNT,");
|
||||
add_line!(
|
||||
self,
|
||||
".field_sequences = (const TSFieldId *)ts_field_sequences,"
|
||||
);
|
||||
add_line!(self, ".field_count = FIELD_COUNT,");
|
||||
|
||||
if !self.field_names.is_empty() {
|
||||
add_line!(self, ".field_map = (const TSFieldMapping *)ts_field_map,");
|
||||
add_line!(self, ".field_names = ts_field_names,");
|
||||
}
|
||||
|
||||
|
|
@ -907,6 +945,22 @@ impl Generator {
|
|||
result
|
||||
}
|
||||
|
||||
fn get_field_map_id(
|
||||
&self,
|
||||
flat_field_map: &Vec<(String, FieldLocation)>,
|
||||
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
|
||||
next_flat_field_map_index: &mut usize,
|
||||
) -> usize {
|
||||
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
|
||||
return *index;
|
||||
}
|
||||
|
||||
let result = *next_flat_field_map_index;
|
||||
flat_field_maps.push((result, flat_field_map.clone()));
|
||||
*next_flat_field_map_index += flat_field_map.len();
|
||||
result
|
||||
}
|
||||
|
||||
fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
|
||||
self.external_scanner_states
|
||||
.iter()
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, Associativity, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
pub(crate) type ChildInfoSequenceId = usize;
|
||||
pub(crate) type ChildInfoId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
|
|
@ -21,7 +22,7 @@ pub(crate) enum ParseAction {
|
|||
precedence: i32,
|
||||
dynamic_precedence: i32,
|
||||
associativity: Option<Associativity>,
|
||||
child_info_sequence_id: ChildInfoSequenceId,
|
||||
child_info_id: ChildInfoId,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -39,17 +40,23 @@ pub(crate) struct ParseState {
|
|||
pub unfinished_item_signature: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct FieldLocation {
|
||||
pub index: usize,
|
||||
pub inherited: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct ChildInfo {
|
||||
pub alias: Option<Alias>,
|
||||
pub field_name: Option<String>,
|
||||
pub alias_sequence: Vec<Option<Alias>>,
|
||||
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub child_info_sequences: Vec<Vec<ChildInfo>>,
|
||||
pub child_infos: Vec<ChildInfo>,
|
||||
pub max_production_length_with_child_info: usize,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -343,7 +343,7 @@ fn test_node_field_names() {
|
|||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "test_grammar_with_refs",
|
||||
"name": "test_grammar_with_fields",
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"}
|
||||
],
|
||||
|
|
@ -354,30 +354,54 @@ fn test_node_field_names() {
|
|||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_1",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-1"
|
||||
}
|
||||
"content": {"type": "STRING", "value": "child-0"}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-1"},
|
||||
{"type": "BLANK"},
|
||||
|
||||
// This isn't used in the test, but prevents `_hidden_rule1`
|
||||
// from being eliminated as a unit reduction.
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "child-2"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
"type": "ALIAS",
|
||||
"value": "x",
|
||||
"named": true,
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_hidden_rule1"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_2",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-3"
|
||||
}
|
||||
"content": {"type": "SYMBOL", "name": "_hidden_rule1"}
|
||||
},
|
||||
{"type": "SYMBOL", "name": "_hidden_rule2"}
|
||||
]
|
||||
},
|
||||
|
||||
// Fields pointing to hidden nodes with a single child resolve to the child.
|
||||
"_hidden_rule1": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-2"},
|
||||
{"type": "STRING", "value": "child-2.5"}
|
||||
]
|
||||
},
|
||||
|
||||
// Fields within hidden nodes can be referenced through the parent node.
|
||||
"_hidden_rule2": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-3"},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_3",
|
||||
"content": {"type": "STRING", "value": "child-4"}
|
||||
}
|
||||
]
|
||||
}
|
||||
|
|
@ -391,10 +415,30 @@ fn test_node_field_names() {
|
|||
let language = get_test_language(&parser_name, &parser_code, None);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
|
||||
let tree = parser.parse("child-0 child-1 child-2 child-3 child-4", None).unwrap();
|
||||
let root_node = tree.root_node();
|
||||
|
||||
assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
|
||||
assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
|
||||
assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4));
|
||||
|
||||
let mut cursor = root_node.walk();
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_first_child();
|
||||
assert_eq!(cursor.node().kind(), "child-0");
|
||||
assert_eq!(cursor.field_name(), Some("field_1"));
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-1");
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-2");
|
||||
assert_eq!(cursor.field_name(), Some("field_2"));
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-3");
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-4");
|
||||
assert_eq!(cursor.field_name(), Some("field_3"));
|
||||
}
|
||||
|
||||
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue