node types: Preserve all supertypes in field type lists
This commit is contained in:
parent
451478c620
commit
eb96dd6ddb
5 changed files with 526 additions and 521 deletions
|
|
@ -4,10 +4,11 @@ use crate::error::{Error, Result};
|
|||
use crate::generate::grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
||||
};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable,
|
||||
ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo,
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::hash_map::Entry;
|
||||
|
|
@ -16,7 +17,7 @@ use std::collections::hash_map::DefaultHasher;
|
|||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::fmt::Write;
|
||||
use std::hash::Hasher;
|
||||
use std::{mem, u32};
|
||||
use std::u32;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
|
|
@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> {
|
|||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
variable_info: &'a Vec<VariableInfo>,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
|
|
@ -670,7 +672,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let info = &self.parse_table.variable_info[step.symbol.index];
|
||||
let info = &self.variable_info[step.symbol.index];
|
||||
for (field_name, _) in &info.fields {
|
||||
production_info
|
||||
.field_map
|
||||
|
|
@ -748,261 +750,11 @@ fn populate_following_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
// Determine which field names and child node types can appear directly
|
||||
// within each type of node.
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
let mut info = VariableInfo {
|
||||
fields: HashMap::new(),
|
||||
child_types: Vec::new(),
|
||||
has_multi_step_production: false,
|
||||
};
|
||||
let is_recursive = variable
|
||||
.productions
|
||||
.iter()
|
||||
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
|
||||
|
||||
for production in &variable.productions {
|
||||
if production.steps.len() > 1 {
|
||||
info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
for step in &production.steps {
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else {
|
||||
ChildType::Normal(step.symbol)
|
||||
};
|
||||
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
});
|
||||
field_info.multiple |= is_recursive;
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(i) = info.child_types.binary_search(&child_type) {
|
||||
info.child_types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for production in &variable.productions {
|
||||
let production_fields: Vec<&String> = production
|
||||
.steps
|
||||
.iter()
|
||||
.filter_map(|s| s.field_name.as_ref())
|
||||
.collect();
|
||||
for (field_name, field_info) in info.fields.iter_mut() {
|
||||
if !production_fields.contains(&field_name) {
|
||||
field_info.required = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.push(info);
|
||||
}
|
||||
|
||||
// Expand each node type's information recursively to inherit the properties of
|
||||
// hidden children.
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
// Move this variable's info out of the vector so it can be modified
|
||||
// while reading from other entries of the vector.
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
let child_symbol = step.symbol;
|
||||
if step.alias.is_none()
|
||||
&& child_symbol.kind == SymbolType::NonTerminal
|
||||
&& !syntax_grammar.variables[child_symbol.index]
|
||||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
|
||||
if child_variable_info.has_multi_step_production {
|
||||
variable_info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
// Inherit fields from this hidden child
|
||||
for (field_name, child_field_info) in &child_variable_info.fields {
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert_with(|| {
|
||||
done = false;
|
||||
child_field_info.clone()
|
||||
});
|
||||
if child_field_info.multiple && !field_info.multiple {
|
||||
field_info.multiple = child_field_info.multiple;
|
||||
done = false;
|
||||
}
|
||||
if !child_field_info.required && field_info.required {
|
||||
field_info.required = child_field_info.required;
|
||||
done = false;
|
||||
}
|
||||
for child_type in &child_field_info.types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Inherit child types from this hidden child
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = variable_info.child_types.binary_search(&child_type) {
|
||||
variable_info.child_types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If any field points to this hidden child, inherit child types
|
||||
// for the field.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = variable_info.fields.get_mut(field_name).unwrap();
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move this variable's info back into the vector.
|
||||
result[i] = variable_info;
|
||||
}
|
||||
}
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
if variable.kind != VariableType::Hidden {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must be hidden, but `{}` is not",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
|
||||
if result[supertype_symbol.index].has_multi_step_production {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let child_type_is_visible = |child_type: &ChildType| match child_type {
|
||||
ChildType::Aliased(_) => true,
|
||||
ChildType::Normal(symbol) => {
|
||||
let variable_kind = match symbol.kind {
|
||||
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
|
||||
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
|
||||
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
|
||||
_ => VariableType::Hidden,
|
||||
};
|
||||
variable_kind.is_visible()
|
||||
}
|
||||
};
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
result[supertype_symbol.index]
|
||||
.child_types
|
||||
.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
for i in 0..result.len() {
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
// For each field, make the `types` list more concise by replacing sets of
|
||||
// subtypes with a single supertype.
|
||||
for (_, field_info) in variable_info.fields.iter_mut() {
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
if sorted_vec_replace(
|
||||
&mut field_info.types,
|
||||
&result[supertype_symbol.index].child_types,
|
||||
ChildType::Normal(*supertype_symbol),
|
||||
) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
field_info.types.retain(|t| {
|
||||
if let ChildType::Normal(symbol) = t {
|
||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
child_type_is_visible(t)
|
||||
});
|
||||
}
|
||||
|
||||
result[i] = variable_info;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
|
||||
where
|
||||
T: Eq + Ord,
|
||||
{
|
||||
let mut i = 0;
|
||||
for right_elem in right.iter() {
|
||||
while left[i] < *right_elem {
|
||||
i += 1;
|
||||
if i == left.len() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if left[i] != *right_elem {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
left.retain(|left_elem| {
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
while right[i] < *left_elem {
|
||||
i += 1;
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
right[i] != *left_elem
|
||||
});
|
||||
|
||||
if let Err(i) = left.binary_search(&value) {
|
||||
left.insert(i, value);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
|
|
@ -1014,13 +766,12 @@ pub(crate) fn build_parse_table(
|
|||
&item_set_builder,
|
||||
);
|
||||
|
||||
let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?;
|
||||
|
||||
let table = ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
state_ids_to_log,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
|
|
@ -1029,240 +780,9 @@ pub(crate) fn build_parse_table(
|
|||
symbols: Vec::new(),
|
||||
production_infos: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
variable_info,
|
||||
},
|
||||
}
|
||||
.build()?;
|
||||
|
||||
Ok((table, following_tokens))
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
|
||||
};
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
// Required field `field1` has only one node type.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
}],
|
||||
},
|
||||
// Optional field `field2` can have two possible node types.
|
||||
SyntaxVariable {
|
||||
name: "rule2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(0))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(3))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
variable_info[2].fields,
|
||||
vec![(
|
||||
"field2".to_string(),
|
||||
FieldInfo {
|
||||
required: false,
|
||||
multiple: false,
|
||||
types: vec![
|
||||
ChildType::Normal(Symbol::terminal(2)),
|
||||
ChildType::Normal(Symbol::terminal(3)),
|
||||
],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_inherited_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node with fields
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(3))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_supertypes() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(2))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(3))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
// _rule1 is a supertype
|
||||
vec![Symbol::non_terminal(1)],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(
|
||||
variables: Vec<SyntaxVariable>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
) -> SyntaxGrammar {
|
||||
let mut syntax_grammar = SyntaxGrammar::default();
|
||||
syntax_grammar.variables = variables;
|
||||
syntax_grammar.supertype_symbols = supertype_symbols;
|
||||
syntax_grammar
|
||||
}
|
||||
|
||||
fn build_lexical_grammar() -> LexicalGrammar {
|
||||
let mut lexical_grammar = LexicalGrammar::default();
|
||||
for i in 0..10 {
|
||||
lexical_grammar.variables.push(LexicalVariable {
|
||||
name: format!("token_{}", i),
|
||||
kind: VariableType::Named,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
});
|
||||
}
|
||||
lexical_grammar
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap;
|
|||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
|
|
@ -23,12 +24,18 @@ pub(crate) fn build_tables(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
inlines: &InlinedProductionMap,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
let (mut parse_table, following_tokens) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
|
||||
let (mut parse_table, following_tokens) = build_parse_table(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
variable_info,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
let keywords = identify_keywords(
|
||||
|
|
|
|||
|
|
@ -99,21 +99,23 @@ fn generate_parser_for_grammar_with_opts(
|
|||
let input_grammar = parse_grammar(grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
);
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
&inlines,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let name = input_grammar.name;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&parse_table.variable_info,
|
||||
);
|
||||
let c_code = render_c_code(
|
||||
&name,
|
||||
parse_table,
|
||||
|
|
|
|||
|
|
@ -1,8 +1,30 @@
|
|||
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use super::rules::{AliasMap, Symbol, SymbolType};
|
||||
use super::tables::{ChildType, VariableInfo};
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use crate::error::{Error, Result};
|
||||
use hashbrown::HashMap;
|
||||
use serde_derive::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::mem;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) enum ChildType {
|
||||
Normal(Symbol),
|
||||
Aliased(Alias),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct FieldInfo {
|
||||
pub required: bool,
|
||||
pub multiple: bool,
|
||||
pub types: Vec<ChildType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct VariableInfo {
|
||||
pub fields: HashMap<String, FieldInfo>,
|
||||
pub child_types: Vec<ChildType>,
|
||||
pub has_multi_step_production: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, PartialEq, Eq, Default)]
|
||||
pub(crate) struct NodeInfoJSON {
|
||||
|
|
@ -29,6 +51,258 @@ pub(crate) struct FieldInfoJSON {
|
|||
types: Vec<NodeTypeJSON>,
|
||||
}
|
||||
|
||||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
// Determine which field names and child node types can appear directly
|
||||
// within each type of node.
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
let mut info = VariableInfo {
|
||||
fields: HashMap::new(),
|
||||
child_types: Vec::new(),
|
||||
has_multi_step_production: false,
|
||||
};
|
||||
let is_recursive = variable
|
||||
.productions
|
||||
.iter()
|
||||
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
|
||||
|
||||
for production in &variable.productions {
|
||||
if production.steps.len() > 1 {
|
||||
info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
for step in &production.steps {
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else {
|
||||
ChildType::Normal(step.symbol)
|
||||
};
|
||||
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
});
|
||||
field_info.multiple |= is_recursive;
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(i) = info.child_types.binary_search(&child_type) {
|
||||
info.child_types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for production in &variable.productions {
|
||||
let production_fields: Vec<&String> = production
|
||||
.steps
|
||||
.iter()
|
||||
.filter_map(|s| s.field_name.as_ref())
|
||||
.collect();
|
||||
for (field_name, field_info) in info.fields.iter_mut() {
|
||||
if !production_fields.contains(&field_name) {
|
||||
field_info.required = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.push(info);
|
||||
}
|
||||
|
||||
// Expand each node type's information recursively to inherit the properties of
|
||||
// hidden children.
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
// Move this variable's info out of the vector so it can be modified
|
||||
// while reading from other entries of the vector.
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
let child_symbol = step.symbol;
|
||||
if step.alias.is_none()
|
||||
&& child_symbol.kind == SymbolType::NonTerminal
|
||||
&& !syntax_grammar.variables[child_symbol.index]
|
||||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
|
||||
// If a hidden child can have multiple children, then this
|
||||
// node can appear to have multiple children.
|
||||
if child_variable_info.has_multi_step_production {
|
||||
variable_info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
// Inherit fields from this hidden child
|
||||
for (field_name, child_field_info) in &child_variable_info.fields {
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert_with(|| {
|
||||
done = false;
|
||||
child_field_info.clone()
|
||||
});
|
||||
if child_field_info.multiple && !field_info.multiple {
|
||||
field_info.multiple = child_field_info.multiple;
|
||||
done = false;
|
||||
}
|
||||
if !child_field_info.required && field_info.required {
|
||||
field_info.required = child_field_info.required;
|
||||
done = false;
|
||||
}
|
||||
for child_type in &child_field_info.types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !syntax_grammar.supertype_symbols.contains(&child_symbol) {
|
||||
// Inherit child types from this hidden child
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = variable_info.child_types.binary_search(&child_type)
|
||||
{
|
||||
variable_info.child_types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If any field points to this hidden child, inherit child types
|
||||
// for the field.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = variable_info.fields.get_mut(field_name).unwrap();
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move this variable's info back into the vector.
|
||||
result[i] = variable_info;
|
||||
}
|
||||
}
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
if variable.kind != VariableType::Hidden {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must be hidden, but `{}` is not",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
|
||||
if result[supertype_symbol.index].has_multi_step_production {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let child_type_is_visible = |child_type: &ChildType| match child_type {
|
||||
ChildType::Aliased(_) => true,
|
||||
ChildType::Normal(symbol) => {
|
||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
||||
return true;
|
||||
}
|
||||
let variable_kind = match symbol.kind {
|
||||
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
|
||||
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
|
||||
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
|
||||
_ => VariableType::Hidden,
|
||||
};
|
||||
variable_kind.is_visible()
|
||||
}
|
||||
};
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
result[supertype_symbol.index]
|
||||
.child_types
|
||||
.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
for i in 0..result.len() {
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
// For each field, make the `types` list more concise by replacing sets of
|
||||
// subtypes with a single supertype.
|
||||
for (_, field_info) in variable_info.fields.iter_mut() {
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
if sorted_vec_replace(
|
||||
&mut field_info.types,
|
||||
&result[supertype_symbol.index].child_types,
|
||||
ChildType::Normal(*supertype_symbol),
|
||||
) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
field_info.types.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
result[i] = variable_info;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
|
||||
where
|
||||
T: Eq + Ord,
|
||||
{
|
||||
let mut i = 0;
|
||||
for right_elem in right.iter() {
|
||||
while left[i] < *right_elem {
|
||||
i += 1;
|
||||
if i == left.len() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if left[i] != *right_elem {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
left.retain(|left_elem| {
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
while right[i] < *left_elem {
|
||||
i += 1;
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
right[i] != *left_elem
|
||||
});
|
||||
|
||||
if let Err(i) = left.binary_search(&value) {
|
||||
left.insert(i, value);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn generate_node_types_json(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
|
|
@ -158,8 +432,9 @@ pub(crate) fn generate_node_types_json(
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::build_tables::build_parse_table::get_variable_info;
|
||||
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
|
||||
use crate::generate::grammars::{
|
||||
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
|
||||
};
|
||||
use crate::generate::prepare_grammar::prepare_grammar;
|
||||
use crate::generate::rules::Rule;
|
||||
|
||||
|
|
@ -331,6 +606,205 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
// Required field `field1` has only one node type.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
}],
|
||||
},
|
||||
// Optional field `field2` can have two possible node types.
|
||||
SyntaxVariable {
|
||||
name: "rule2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(0))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(3))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
variable_info[2].fields,
|
||||
vec![(
|
||||
"field2".to_string(),
|
||||
FieldInfo {
|
||||
required: false,
|
||||
multiple: false,
|
||||
types: vec![
|
||||
ChildType::Normal(Symbol::terminal(2)),
|
||||
ChildType::Normal(Symbol::terminal(3)),
|
||||
],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_inherited_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node with fields
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(3))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_supertypes() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(2))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(3))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
// _rule1 is a supertype
|
||||
vec![Symbol::non_terminal(1)],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
|
||||
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
|
||||
prepare_grammar(&grammar).unwrap();
|
||||
|
|
@ -342,4 +816,27 @@ mod tests {
|
|||
&variable_info,
|
||||
)
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(
|
||||
variables: Vec<SyntaxVariable>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
) -> SyntaxGrammar {
|
||||
let mut syntax_grammar = SyntaxGrammar::default();
|
||||
syntax_grammar.variables = variables;
|
||||
syntax_grammar.supertype_symbols = supertype_symbols;
|
||||
syntax_grammar
|
||||
}
|
||||
|
||||
fn build_lexical_grammar() -> LexicalGrammar {
|
||||
let mut lexical_grammar = LexicalGrammar::default();
|
||||
for i in 0..10 {
|
||||
lexical_grammar.variables.push(LexicalVariable {
|
||||
name: format!("token_{}", i),
|
||||
kind: VariableType::Named,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
});
|
||||
}
|
||||
lexical_grammar
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -52,31 +52,10 @@ pub(crate) struct ProductionInfo {
|
|||
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) enum ChildType {
|
||||
Normal(Symbol),
|
||||
Aliased(Alias),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct FieldInfo {
|
||||
pub required: bool,
|
||||
pub multiple: bool,
|
||||
pub types: Vec<ChildType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct VariableInfo {
|
||||
pub fields: HashMap<String, FieldInfo>,
|
||||
pub child_types: Vec<ChildType>,
|
||||
pub has_multi_step_production: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub variable_info: Vec<VariableInfo>,
|
||||
pub production_infos: Vec<ProductionInfo>,
|
||||
pub max_aliased_production_length: usize,
|
||||
}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue