Start work on handling node supertypes
This commit is contained in:
parent
445dfda53e
commit
b79bd8693b
13 changed files with 513 additions and 280 deletions
|
|
@ -47,7 +47,9 @@ struct ParseTableBuilder<'a> {
|
|||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.production_infos.push(ProductionInfo::default());
|
||||
self.parse_table
|
||||
.production_infos
|
||||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
|
@ -749,7 +751,7 @@ fn populate_following_tokens(
|
|||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> Vec<VariableInfo> {
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
// Determine which field names and child node types can appear directly
|
||||
|
|
@ -757,7 +759,9 @@ pub(crate) fn get_variable_info(
|
|||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
let mut info = VariableInfo {
|
||||
fields: HashMap::new(),
|
||||
child_types: HashSet::new(),
|
||||
subclasses: Vec::new(),
|
||||
child_types: Vec::new(),
|
||||
has_multi_step_production: false,
|
||||
};
|
||||
let is_recursive = variable
|
||||
.productions
|
||||
|
|
@ -765,6 +769,10 @@ pub(crate) fn get_variable_info(
|
|||
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
|
||||
|
||||
for production in &variable.productions {
|
||||
if production.steps.len() > 1 {
|
||||
info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
for step in &production.steps {
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
|
|
@ -776,13 +784,17 @@ pub(crate) fn get_variable_info(
|
|||
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: HashSet::new(),
|
||||
types: Vec::new(),
|
||||
});
|
||||
field_info.multiple |= is_recursive;
|
||||
field_info.types.insert(child_type.clone());
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
|
||||
info.child_types.insert(child_type);
|
||||
if let Err(i) = info.child_types.binary_search(&child_type) {
|
||||
info.child_types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -810,23 +822,25 @@ pub(crate) fn get_variable_info(
|
|||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
// Move this variable's info out of the vector so it can be modified
|
||||
// while reading from other entries of the vector.
|
||||
let mut variable_info = VariableInfo {
|
||||
fields: HashMap::new(),
|
||||
child_types: HashSet::new(),
|
||||
};
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
if step.symbol.kind == SymbolType::NonTerminal
|
||||
&& !syntax_grammar.variables[step.symbol.index]
|
||||
let child_symbol = step.symbol;
|
||||
if child_symbol.kind == SymbolType::NonTerminal
|
||||
&& !syntax_grammar.variables[child_symbol.index]
|
||||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let production_info = &result[step.symbol.index];
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
|
||||
if child_variable_info.has_multi_step_production {
|
||||
variable_info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
// Inherit fields from this hidden child
|
||||
for (field_name, child_field_info) in &production_info.fields {
|
||||
for (field_name, child_field_info) in &child_variable_info.fields {
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
|
|
@ -843,15 +857,17 @@ pub(crate) fn get_variable_info(
|
|||
done = false;
|
||||
}
|
||||
for child_type in &child_field_info.types {
|
||||
if field_info.types.insert(child_type.clone()) {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Inherit child types from this hidden child
|
||||
for child_type in &production_info.child_types {
|
||||
if variable_info.child_types.insert(child_type.clone()) {
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = variable_info.child_types.binary_search(&child_type) {
|
||||
variable_info.child_types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -860,8 +876,9 @@ pub(crate) fn get_variable_info(
|
|||
// for the field.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = variable_info.fields.get_mut(field_name).unwrap();
|
||||
for child_type in &production_info.child_types {
|
||||
if field_info.types.insert(child_type.clone()) {
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
|
|
@ -875,27 +892,111 @@ pub(crate) fn get_variable_info(
|
|||
}
|
||||
}
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
if variable.kind != VariableType::Hidden {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must be hidden, but `{}` is not",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
|
||||
if result[supertype_symbol.index].has_multi_step_production {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let child_type_is_visible = |child_type: &ChildType| match child_type {
|
||||
ChildType::Aliased(_) => true,
|
||||
ChildType::Normal(symbol) => {
|
||||
let step_kind = match symbol.kind {
|
||||
let variable_kind = match symbol.kind {
|
||||
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
|
||||
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
|
||||
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
|
||||
_ => VariableType::Hidden,
|
||||
};
|
||||
step_kind.is_visible()
|
||||
variable_kind.is_visible()
|
||||
}
|
||||
};
|
||||
|
||||
for variable_info in result.iter_mut() {
|
||||
variable_info.child_types.retain(&child_type_is_visible);
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
result[supertype_symbol.index]
|
||||
.child_types
|
||||
.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
for i in 0..result.len() {
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
// For each field, make the `types` list more concise by replacing sets of
|
||||
// subtypes with a single supertype.
|
||||
for (_, field_info) in variable_info.fields.iter_mut() {
|
||||
field_info.types.retain(&child_type_is_visible);
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
if sorted_vec_replace(
|
||||
&mut field_info.types,
|
||||
&result[supertype_symbol.index].child_types,
|
||||
ChildType::Normal(*supertype_symbol),
|
||||
) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
field_info.types.retain(|t| {
|
||||
if let ChildType::Normal(symbol) = t {
|
||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
child_type_is_visible(t)
|
||||
});
|
||||
}
|
||||
|
||||
result[i] = variable_info;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
|
||||
where
|
||||
T: Eq + Ord,
|
||||
{
|
||||
let mut i = 0;
|
||||
for right_elem in right.iter() {
|
||||
while left[i] < *right_elem {
|
||||
i += 1;
|
||||
if i == left.len() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if left[i] != *right_elem {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
i = 0;
|
||||
left.retain(|left_elem| {
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
while right[i] < *left_elem {
|
||||
i += 1;
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
right[i] != *left_elem
|
||||
});
|
||||
|
||||
if let Err(i) = left.binary_search(&value) {
|
||||
left.insert(i, value);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn build_parse_table(
|
||||
|
|
@ -913,6 +1014,8 @@ pub(crate) fn build_parse_table(
|
|||
&item_set_builder,
|
||||
);
|
||||
|
||||
let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?;
|
||||
|
||||
let table = ParseTableBuilder {
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
|
|
@ -926,7 +1029,7 @@ pub(crate) fn build_parse_table(
|
|||
symbols: Vec::new(),
|
||||
production_infos: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
variable_info: get_variable_info(syntax_grammar, lexical_grammar),
|
||||
variable_info,
|
||||
},
|
||||
}
|
||||
.build()?;
|
||||
|
|
@ -944,56 +1047,63 @@ mod tests {
|
|||
#[test]
|
||||
fn test_get_variable_info() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(vec![
|
||||
// Required field `field1` has only one node type.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"),
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
// Required field `field1` has only one node type.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
}],
|
||||
},
|
||||
// Optional field `field2` can have two possible node types.
|
||||
SyntaxVariable {
|
||||
name: "rule2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(0))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(3))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
}],
|
||||
},
|
||||
// Optional field `field2` can have two possible node types.
|
||||
SyntaxVariable {
|
||||
name: "rule2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(0))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
]),
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
|
|
@ -1002,9 +1112,7 @@ mod tests {
|
|||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))]
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>(),
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
|
|
@ -1021,9 +1129,7 @@ mod tests {
|
|||
types: vec![
|
||||
ChildType::Normal(Symbol::terminal(2)),
|
||||
ChildType::Normal(Symbol::terminal(3)),
|
||||
]
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>(),
|
||||
],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
|
|
@ -1034,34 +1140,38 @@ mod tests {
|
|||
#[test]
|
||||
fn test_get_variable_info_with_inherited_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node with fields
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
]),
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node with fields
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
);
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
|
|
@ -1070,9 +1180,7 @@ mod tests {
|
|||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(3))]
|
||||
.into_iter()
|
||||
.collect::<HashSet<_>>(),
|
||||
types: vec![ChildType::Normal(Symbol::terminal(3))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
|
|
@ -1080,9 +1188,68 @@ mod tests {
|
|||
);
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(variables: Vec<SyntaxVariable>) -> SyntaxGrammar {
|
||||
#[test]
|
||||
fn test_get_variable_info_with_supertypes() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(2))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(3))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
// _rule1 is a supertype
|
||||
vec![Symbol::non_terminal(1)],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(
|
||||
variables: Vec<SyntaxVariable>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
) -> SyntaxGrammar {
|
||||
let mut syntax_grammar = SyntaxGrammar::default();
|
||||
syntax_grammar.variables = variables;
|
||||
syntax_grammar.supertype_symbols = supertype_symbols;
|
||||
syntax_grammar
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue