node types: Preserve all supertypes in field type lists

This commit is contained in:
Max Brunsfeld 2019-03-27 16:17:02 -07:00
parent 451478c620
commit eb96dd6ddb
5 changed files with 526 additions and 521 deletions

View file

@ -4,10 +4,11 @@ use crate::error::{Error, Result};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Symbol, SymbolType};
use crate::generate::tables::{
ChildType, FieldInfo, FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable,
ParseTableEntry, ProductionInfo, ProductionInfoId, VariableInfo,
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use core::ops::Range;
use hashbrown::hash_map::Entry;
@ -16,7 +17,7 @@ use std::collections::hash_map::DefaultHasher;
use std::collections::{BTreeMap, VecDeque};
use std::fmt::Write;
use std::hash::Hasher;
use std::{mem, u32};
use std::u32;
#[derive(Clone)]
struct AuxiliarySymbolInfo {
@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> {
item_set_builder: ParseItemSetBuilder<'a>,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
variable_info: &'a Vec<VariableInfo>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
@ -670,7 +672,7 @@ impl<'a> ParseTableBuilder<'a> {
.kind
.is_visible()
{
let info = &self.parse_table.variable_info[step.symbol.index];
let info = &self.variable_info[step.symbol.index];
for (field_name, _) in &info.fields {
production_info
.field_map
@ -748,261 +750,11 @@ fn populate_following_tokens(
}
}
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> Result<Vec<VariableInfo>> {
let mut result = Vec::new();
// Determine which field names and child node types can appear directly
// within each type of node.
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut info = VariableInfo {
fields: HashMap::new(),
child_types: Vec::new(),
has_multi_step_production: false,
};
let is_recursive = variable
.productions
.iter()
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
for production in &variable.productions {
if production.steps.len() > 1 {
info.has_multi_step_production = true;
}
for step in &production.steps {
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(step.symbol)
};
if let Some(field_name) = &step.field_name {
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
});
field_info.multiple |= is_recursive;
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
}
if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
}
}
for production in &variable.productions {
let production_fields: Vec<&String> = production
.steps
.iter()
.filter_map(|s| s.field_name.as_ref())
.collect();
for (field_name, field_info) in info.fields.iter_mut() {
if !production_fields.contains(&field_name) {
field_info.required = false;
}
}
}
result.push(info);
}
// Expand each node type's information recursively to inherit the properties of
// hidden children.
let mut done = false;
while !done {
done = true;
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
// Move this variable's info out of the vector so it can be modified
// while reading from other entries of the vector.
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
for production in &variable.productions {
for step in &production.steps {
let child_symbol = step.symbol;
if step.alias.is_none()
&& child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind
.is_visible()
{
let child_variable_info = &result[child_symbol.index];
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child
for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert_with(|| {
done = false;
child_field_info.clone()
});
if child_field_info.multiple && !field_info.multiple {
field_info.multiple = child_field_info.multiple;
done = false;
}
if !child_field_info.required && field_info.required {
field_info.required = child_field_info.required;
done = false;
}
for child_type in &child_field_info.types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
if let Err(i) = variable_info.child_types.binary_search(&child_type) {
variable_info.child_types.insert(i, child_type.clone());
done = false;
}
}
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
}
}
}
// Move this variable's info back into the vector.
result[i] = variable_info;
}
}
for supertype_symbol in &syntax_grammar.supertype_symbols {
let variable = &syntax_grammar.variables[supertype_symbol.index];
if variable.kind != VariableType::Hidden {
return Err(Error::grammar(&format!(
"Supertype symbols must be hidden, but `{}` is not",
variable.name
)));
}
if result[supertype_symbol.index].has_multi_step_production {
return Err(Error::grammar(&format!(
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
variable.name
)));
}
}
let child_type_is_visible = |child_type: &ChildType| match child_type {
ChildType::Aliased(_) => true,
ChildType::Normal(symbol) => {
let variable_kind = match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden,
};
variable_kind.is_visible()
}
};
for supertype_symbol in &syntax_grammar.supertype_symbols {
result[supertype_symbol.index]
.child_types
.retain(child_type_is_visible);
}
for i in 0..result.len() {
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
// For each field, make the `types` list more concise by replacing sets of
// subtypes with a single supertype.
for (_, field_info) in variable_info.fields.iter_mut() {
for supertype_symbol in &syntax_grammar.supertype_symbols {
if sorted_vec_replace(
&mut field_info.types,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
) {
break;
}
}
field_info.types.retain(|t| {
if let ChildType::Normal(symbol) = t {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return true;
}
}
child_type_is_visible(t)
});
}
result[i] = variable_info;
}
Ok(result)
}
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
where
T: Eq + Ord,
{
let mut i = 0;
for right_elem in right.iter() {
while left[i] < *right_elem {
i += 1;
if i == left.len() {
return false;
}
}
if left[i] != *right_elem {
return false;
}
}
i = 0;
left.retain(|left_elem| {
if i == right.len() {
return true;
}
while right[i] < *left_elem {
i += 1;
if i == right.len() {
return true;
}
}
right[i] != *left_elem
});
if let Err(i) = left.binary_search(&value) {
left.insert(i, value);
}
true
}
pub(crate) fn build_parse_table(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
inlines: &InlinedProductionMap,
variable_info: &Vec<VariableInfo>,
state_ids_to_log: Vec<usize>,
) -> Result<(ParseTable, Vec<TokenSet>)> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
@ -1014,13 +766,12 @@ pub(crate) fn build_parse_table(
&item_set_builder,
);
let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?;
let table = ParseTableBuilder {
syntax_grammar,
lexical_grammar,
state_ids_to_log,
item_set_builder,
variable_info,
state_ids_by_item_set: HashMap::new(),
item_sets_by_state_id: Vec::new(),
parse_state_queue: VecDeque::new(),
@ -1029,240 +780,9 @@ pub(crate) fn build_parse_table(
symbols: Vec::new(),
production_infos: Vec::new(),
max_aliased_production_length: 0,
variable_info,
},
}
.build()?;
Ok((table, following_tokens))
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
#[test]
fn test_get_variable_info() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
// Required field `field1` has only one node type.
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
],
}],
},
// Hidden node
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
}],
},
// Optional field `field2` can have two possible node types.
SyntaxVariable {
name: "rule2".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(0))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(2))
.with_field_name("field2"),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(3))
.with_field_name("field2"),
],
},
],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
assert_eq!(
variable_info[2].fields,
vec![(
"field2".to_string(),
FieldInfo {
required: false,
multiple: false,
types: vec![
ChildType::Normal(Symbol::terminal(2)),
ChildType::Normal(Symbol::terminal(3)),
],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_inherited_fields() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
// Hidden node with fields
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
],
}],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(3))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_supertypes() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(2))],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(3))],
},
],
},
],
// _rule1 is a supertype
vec![Symbol::non_terminal(1)],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
fn build_syntax_grammar(
variables: Vec<SyntaxVariable>,
supertype_symbols: Vec<Symbol>,
) -> SyntaxGrammar {
let mut syntax_grammar = SyntaxGrammar::default();
syntax_grammar.variables = variables;
syntax_grammar.supertype_symbols = supertype_symbols;
syntax_grammar
}
fn build_lexical_grammar() -> LexicalGrammar {
let mut lexical_grammar = LexicalGrammar::default();
for i in 0..10 {
lexical_grammar.variables.push(LexicalVariable {
name: format!("token_{}", i),
kind: VariableType::Named,
implicit_precedence: 0,
start_state: 0,
});
}
lexical_grammar
}
}

View file

@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap;
use crate::error::Result;
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use log::info;
@ -23,12 +24,18 @@ pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
inlines: &InlinedProductionMap,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
let (mut parse_table, following_tokens) =
build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
let (mut parse_table, following_tokens) = build_parse_table(
syntax_grammar,
lexical_grammar,
inlines,
variable_info,
state_ids_to_log,
)?;
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
let keywords = identify_keywords(

View file

@ -99,21 +99,23 @@ fn generate_parser_for_grammar_with_opts(
let input_grammar = parse_grammar(grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?;
let node_types_json = node_types::generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
);
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
&inlines,
minimize,
state_ids_to_log,
)?;
let name = input_grammar.name;
let node_types_json = node_types::generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&parse_table.variable_info,
);
let c_code = render_c_code(
&name,
parse_table,

View file

@ -1,8 +1,30 @@
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use super::rules::{AliasMap, Symbol, SymbolType};
use super::tables::{ChildType, VariableInfo};
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::error::{Error, Result};
use hashbrown::HashMap;
use serde_derive::Serialize;
use std::collections::BTreeMap;
use std::mem;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ChildType {
Normal(Symbol),
Aliased(Alias),
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldInfo {
pub required: bool,
pub multiple: bool,
pub types: Vec<ChildType>,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub child_types: Vec<ChildType>,
pub has_multi_step_production: bool,
}
#[derive(Debug, Serialize, PartialEq, Eq, Default)]
pub(crate) struct NodeInfoJSON {
@ -29,6 +51,258 @@ pub(crate) struct FieldInfoJSON {
types: Vec<NodeTypeJSON>,
}
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> Result<Vec<VariableInfo>> {
let mut result = Vec::new();
// Determine which field names and child node types can appear directly
// within each type of node.
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut info = VariableInfo {
fields: HashMap::new(),
child_types: Vec::new(),
has_multi_step_production: false,
};
let is_recursive = variable
.productions
.iter()
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
for production in &variable.productions {
if production.steps.len() > 1 {
info.has_multi_step_production = true;
}
for step in &production.steps {
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(step.symbol)
};
if let Some(field_name) = &step.field_name {
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
});
field_info.multiple |= is_recursive;
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
}
if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
}
}
for production in &variable.productions {
let production_fields: Vec<&String> = production
.steps
.iter()
.filter_map(|s| s.field_name.as_ref())
.collect();
for (field_name, field_info) in info.fields.iter_mut() {
if !production_fields.contains(&field_name) {
field_info.required = false;
}
}
}
result.push(info);
}
// Expand each node type's information recursively to inherit the properties of
// hidden children.
let mut done = false;
while !done {
done = true;
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
// Move this variable's info out of the vector so it can be modified
// while reading from other entries of the vector.
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
for production in &variable.productions {
for step in &production.steps {
let child_symbol = step.symbol;
if step.alias.is_none()
&& child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind
.is_visible()
{
let child_variable_info = &result[child_symbol.index];
// If a hidden child can have multiple children, then this
// node can appear to have multiple children.
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child
for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert_with(|| {
done = false;
child_field_info.clone()
});
if child_field_info.multiple && !field_info.multiple {
field_info.multiple = child_field_info.multiple;
done = false;
}
if !child_field_info.required && field_info.required {
field_info.required = child_field_info.required;
done = false;
}
for child_type in &child_field_info.types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
if !syntax_grammar.supertype_symbols.contains(&child_symbol) {
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
if let Err(i) = variable_info.child_types.binary_search(&child_type)
{
variable_info.child_types.insert(i, child_type.clone());
done = false;
}
}
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
}
}
}
}
// Move this variable's info back into the vector.
result[i] = variable_info;
}
}
for supertype_symbol in &syntax_grammar.supertype_symbols {
let variable = &syntax_grammar.variables[supertype_symbol.index];
if variable.kind != VariableType::Hidden {
return Err(Error::grammar(&format!(
"Supertype symbols must be hidden, but `{}` is not",
variable.name
)));
}
if result[supertype_symbol.index].has_multi_step_production {
return Err(Error::grammar(&format!(
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
variable.name
)));
}
}
let child_type_is_visible = |child_type: &ChildType| match child_type {
ChildType::Aliased(_) => true,
ChildType::Normal(symbol) => {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return true;
}
let variable_kind = match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden,
};
variable_kind.is_visible()
}
};
for supertype_symbol in &syntax_grammar.supertype_symbols {
result[supertype_symbol.index]
.child_types
.retain(child_type_is_visible);
}
for i in 0..result.len() {
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
// For each field, make the `types` list more concise by replacing sets of
// subtypes with a single supertype.
for (_, field_info) in variable_info.fields.iter_mut() {
for supertype_symbol in &syntax_grammar.supertype_symbols {
if sorted_vec_replace(
&mut field_info.types,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
) {
break;
}
}
field_info.types.retain(child_type_is_visible);
}
result[i] = variable_info;
}
Ok(result)
}
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
where
T: Eq + Ord,
{
let mut i = 0;
for right_elem in right.iter() {
while left[i] < *right_elem {
i += 1;
if i == left.len() {
return false;
}
}
if left[i] != *right_elem {
return false;
}
}
i = 0;
left.retain(|left_elem| {
if i == right.len() {
return true;
}
while right[i] < *left_elem {
i += 1;
if i == right.len() {
return true;
}
}
right[i] != *left_elem
});
if let Err(i) = left.binary_search(&value) {
left.insert(i, value);
}
true
}
pub(crate) fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -158,8 +432,9 @@ pub(crate) fn generate_node_types_json(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::build_tables::build_parse_table::get_variable_info;
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
use crate::generate::grammars::{
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
};
use crate::generate::prepare_grammar::prepare_grammar;
use crate::generate::rules::Rule;
@ -331,6 +606,205 @@ mod tests {
);
}
#[test]
fn test_get_variable_info() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
// Required field `field1` has only one node type.
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
],
}],
},
// Hidden node
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
}],
},
// Optional field `field2` can have two possible node types.
SyntaxVariable {
name: "rule2".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(0))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(2))
.with_field_name("field2"),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(3))
.with_field_name("field2"),
],
},
],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
assert_eq!(
variable_info[2].fields,
vec![(
"field2".to_string(),
FieldInfo {
required: false,
multiple: false,
types: vec![
ChildType::Normal(Symbol::terminal(2)),
ChildType::Normal(Symbol::terminal(3)),
],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_inherited_fields() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
// Hidden node with fields
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
],
}],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(3))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_supertypes() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(2))],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(3))],
},
],
},
],
// _rule1 is a supertype
vec![Symbol::non_terminal(1)],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
prepare_grammar(&grammar).unwrap();
@ -342,4 +816,27 @@ mod tests {
&variable_info,
)
}
fn build_syntax_grammar(
variables: Vec<SyntaxVariable>,
supertype_symbols: Vec<Symbol>,
) -> SyntaxGrammar {
let mut syntax_grammar = SyntaxGrammar::default();
syntax_grammar.variables = variables;
syntax_grammar.supertype_symbols = supertype_symbols;
syntax_grammar
}
fn build_lexical_grammar() -> LexicalGrammar {
let mut lexical_grammar = LexicalGrammar::default();
for i in 0..10 {
lexical_grammar.variables.push(LexicalVariable {
name: format!("token_{}", i),
kind: VariableType::Named,
implicit_precedence: 0,
start_state: 0,
});
}
lexical_grammar
}
}

View file

@ -52,31 +52,10 @@ pub(crate) struct ProductionInfo {
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
}
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ChildType {
Normal(Symbol),
Aliased(Alias),
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldInfo {
pub required: bool,
pub multiple: bool,
pub types: Vec<ChildType>,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub child_types: Vec<ChildType>,
pub has_multi_step_production: bool,
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ParseTable {
pub states: Vec<ParseState>,
pub symbols: Vec<Symbol>,
pub variable_info: Vec<VariableInfo>,
pub production_infos: Vec<ProductionInfo>,
pub max_aliased_production_length: usize,
}