Start work on handling node supertypes

This commit is contained in:
Max Brunsfeld 2019-03-08 06:20:07 -05:00
parent 445dfda53e
commit b79bd8693b
13 changed files with 513 additions and 280 deletions

View file

@ -47,7 +47,9 @@ struct ParseTableBuilder<'a> {
impl<'a> ParseTableBuilder<'a> { impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<ParseTable> { fn build(mut self) -> Result<ParseTable> {
// Ensure that the empty alias sequence has index 0. // Ensure that the empty alias sequence has index 0.
self.parse_table.production_infos.push(ProductionInfo::default()); self.parse_table
.production_infos
.push(ProductionInfo::default());
// Add the error state at index 0. // Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default()); self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
@ -749,7 +751,7 @@ fn populate_following_tokens(
pub(crate) fn get_variable_info( pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar, syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar, lexical_grammar: &LexicalGrammar,
) -> Vec<VariableInfo> { ) -> Result<Vec<VariableInfo>> {
let mut result = Vec::new(); let mut result = Vec::new();
// Determine which field names and child node types can appear directly // Determine which field names and child node types can appear directly
@ -757,7 +759,9 @@ pub(crate) fn get_variable_info(
for (i, variable) in syntax_grammar.variables.iter().enumerate() { for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut info = VariableInfo { let mut info = VariableInfo {
fields: HashMap::new(), fields: HashMap::new(),
child_types: HashSet::new(), subclasses: Vec::new(),
child_types: Vec::new(),
has_multi_step_production: false,
}; };
let is_recursive = variable let is_recursive = variable
.productions .productions
@ -765,6 +769,10 @@ pub(crate) fn get_variable_info(
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i))); .any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
for production in &variable.productions { for production in &variable.productions {
if production.steps.len() > 1 {
info.has_multi_step_production = true;
}
for step in &production.steps { for step in &production.steps {
let child_type = if let Some(alias) = &step.alias { let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone()) ChildType::Aliased(alias.clone())
@ -776,13 +784,17 @@ pub(crate) fn get_variable_info(
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo { let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false, multiple: false,
required: true, required: true,
types: HashSet::new(), types: Vec::new(),
}); });
field_info.multiple |= is_recursive; field_info.multiple |= is_recursive;
field_info.types.insert(child_type.clone()); if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
} }
info.child_types.insert(child_type); if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
} }
} }
@ -810,23 +822,25 @@ pub(crate) fn get_variable_info(
for (i, variable) in syntax_grammar.variables.iter().enumerate() { for (i, variable) in syntax_grammar.variables.iter().enumerate() {
// Move this variable's info out of the vector so it can be modified // Move this variable's info out of the vector so it can be modified
// while reading from other entries of the vector. // while reading from other entries of the vector.
let mut variable_info = VariableInfo { let mut variable_info = VariableInfo::default();
fields: HashMap::new(),
child_types: HashSet::new(),
};
mem::swap(&mut variable_info, &mut result[i]); mem::swap(&mut variable_info, &mut result[i]);
for production in &variable.productions { for production in &variable.productions {
for step in &production.steps { for step in &production.steps {
if step.symbol.kind == SymbolType::NonTerminal let child_symbol = step.symbol;
&& !syntax_grammar.variables[step.symbol.index] if child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind .kind
.is_visible() .is_visible()
{ {
let production_info = &result[step.symbol.index]; let child_variable_info = &result[child_symbol.index];
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child // Inherit fields from this hidden child
for (field_name, child_field_info) in &production_info.fields { for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info let field_info = variable_info
.fields .fields
.entry(field_name.clone()) .entry(field_name.clone())
@ -843,15 +857,17 @@ pub(crate) fn get_variable_info(
done = false; done = false;
} }
for child_type in &child_field_info.types { for child_type in &child_field_info.types {
if field_info.types.insert(child_type.clone()) { if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false; done = false;
} }
} }
} }
// Inherit child types from this hidden child // Inherit child types from this hidden child
for child_type in &production_info.child_types { for child_type in &child_variable_info.child_types {
if variable_info.child_types.insert(child_type.clone()) { if let Err(i) = variable_info.child_types.binary_search(&child_type) {
variable_info.child_types.insert(i, child_type.clone());
done = false; done = false;
} }
} }
@ -860,8 +876,9 @@ pub(crate) fn get_variable_info(
// for the field. // for the field.
if let Some(field_name) = &step.field_name { if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap(); let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &production_info.child_types { for child_type in &child_variable_info.child_types {
if field_info.types.insert(child_type.clone()) { if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false; done = false;
} }
} }
@ -875,27 +892,111 @@ pub(crate) fn get_variable_info(
} }
} }
for supertype_symbol in &syntax_grammar.supertype_symbols {
let variable = &syntax_grammar.variables[supertype_symbol.index];
if variable.kind != VariableType::Hidden {
return Err(Error::grammar(&format!(
"Supertype symbols must be hidden, but `{}` is not",
variable.name
)));
}
if result[supertype_symbol.index].has_multi_step_production {
return Err(Error::grammar(&format!(
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
variable.name
)));
}
}
let child_type_is_visible = |child_type: &ChildType| match child_type { let child_type_is_visible = |child_type: &ChildType| match child_type {
ChildType::Aliased(_) => true, ChildType::Aliased(_) => true,
ChildType::Normal(symbol) => { ChildType::Normal(symbol) => {
let step_kind = match symbol.kind { let variable_kind = match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind, SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind, SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind, SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden, _ => VariableType::Hidden,
}; };
step_kind.is_visible() variable_kind.is_visible()
} }
}; };
for variable_info in result.iter_mut() { for supertype_symbol in &syntax_grammar.supertype_symbols {
variable_info.child_types.retain(&child_type_is_visible); result[supertype_symbol.index]
.child_types
.retain(child_type_is_visible);
}
for i in 0..result.len() {
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
// For each field, make the `types` list more concise by replacing sets of
// subtypes with a single supertype.
for (_, field_info) in variable_info.fields.iter_mut() { for (_, field_info) in variable_info.fields.iter_mut() {
field_info.types.retain(&child_type_is_visible); for supertype_symbol in &syntax_grammar.supertype_symbols {
if sorted_vec_replace(
&mut field_info.types,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
) {
break;
}
}
field_info.types.retain(|t| {
if let ChildType::Normal(symbol) = t {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return true;
}
}
child_type_is_visible(t)
});
}
result[i] = variable_info;
}
Ok(result)
}
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
where
T: Eq + Ord,
{
let mut i = 0;
for right_elem in right.iter() {
while left[i] < *right_elem {
i += 1;
if i == left.len() {
return false;
}
}
if left[i] != *right_elem {
return false;
} }
} }
result i = 0;
left.retain(|left_elem| {
if i == right.len() {
return true;
}
while right[i] < *left_elem {
i += 1;
if i == right.len() {
return true;
}
}
right[i] != *left_elem
});
if let Err(i) = left.binary_search(&value) {
left.insert(i, value);
}
true
} }
pub(crate) fn build_parse_table( pub(crate) fn build_parse_table(
@ -913,6 +1014,8 @@ pub(crate) fn build_parse_table(
&item_set_builder, &item_set_builder,
); );
let variable_info = get_variable_info(syntax_grammar, lexical_grammar)?;
let table = ParseTableBuilder { let table = ParseTableBuilder {
syntax_grammar, syntax_grammar,
lexical_grammar, lexical_grammar,
@ -926,7 +1029,7 @@ pub(crate) fn build_parse_table(
symbols: Vec::new(), symbols: Vec::new(),
production_infos: Vec::new(), production_infos: Vec::new(),
max_aliased_production_length: 0, max_aliased_production_length: 0,
variable_info: get_variable_info(syntax_grammar, lexical_grammar), variable_info,
}, },
} }
.build()?; .build()?;
@ -944,56 +1047,63 @@ mod tests {
#[test] #[test]
fn test_get_variable_info() { fn test_get_variable_info() {
let variable_info = get_variable_info( let variable_info = get_variable_info(
&build_syntax_grammar(vec![ &build_syntax_grammar(
// Required field `field1` has only one node type. vec![
SyntaxVariable { // Required field `field1` has only one node type.
name: "rule0".to_string(), SyntaxVariable {
kind: VariableType::Named, name: "rule0".to_string(),
productions: vec![Production { kind: VariableType::Named,
dynamic_precedence: 0, productions: vec![Production {
steps: vec![ dynamic_precedence: 0,
ProductionStep::new(Symbol::terminal(0)), steps: vec![
ProductionStep::new(Symbol::non_terminal(1)).with_field_name("field1"), ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
],
}],
},
// Hidden node
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
}],
},
// Optional field `field2` can have two possible node types.
SyntaxVariable {
name: "rule2".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(0))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(2))
.with_field_name("field2"),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(3))
.with_field_name("field2"),
],
},
], ],
}], },
}, ],
// Hidden node vec![],
SyntaxVariable { ),
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
}],
},
// Optional field `field2` can have two possible node types.
SyntaxVariable {
name: "rule2".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(0))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(2)).with_field_name("field2"),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(3)).with_field_name("field2"),
],
},
],
},
]),
&build_lexical_grammar(), &build_lexical_grammar(),
); )
.unwrap();
assert_eq!( assert_eq!(
variable_info[0].fields, variable_info[0].fields,
@ -1002,9 +1112,7 @@ mod tests {
FieldInfo { FieldInfo {
required: true, required: true,
multiple: false, multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(1))] types: vec![ChildType::Normal(Symbol::terminal(1))],
.into_iter()
.collect::<HashSet<_>>(),
} }
)] )]
.into_iter() .into_iter()
@ -1021,9 +1129,7 @@ mod tests {
types: vec![ types: vec![
ChildType::Normal(Symbol::terminal(2)), ChildType::Normal(Symbol::terminal(2)),
ChildType::Normal(Symbol::terminal(3)), ChildType::Normal(Symbol::terminal(3)),
] ],
.into_iter()
.collect::<HashSet<_>>(),
} }
)] )]
.into_iter() .into_iter()
@ -1034,34 +1140,38 @@ mod tests {
#[test] #[test]
fn test_get_variable_info_with_inherited_fields() { fn test_get_variable_info_with_inherited_fields() {
let variable_info = get_variable_info( let variable_info = get_variable_info(
&build_syntax_grammar(vec![ &build_syntax_grammar(
SyntaxVariable { vec![
name: "rule0".to_string(), SyntaxVariable {
kind: VariableType::Named, name: "rule0".to_string(),
productions: vec![Production { kind: VariableType::Named,
dynamic_precedence: 0, productions: vec![Production {
steps: vec![ dynamic_precedence: 0,
ProductionStep::new(Symbol::terminal(0)), steps: vec![
ProductionStep::new(Symbol::non_terminal(1)), ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)), ProductionStep::new(Symbol::non_terminal(1)),
], ProductionStep::new(Symbol::terminal(1)),
}], ],
}, }],
// Hidden node with fields },
SyntaxVariable { // Hidden node with fields
name: "_rule1".to_string(), SyntaxVariable {
kind: VariableType::Hidden, name: "_rule1".to_string(),
productions: vec![Production { kind: VariableType::Hidden,
dynamic_precedence: 0, productions: vec![Production {
steps: vec![ dynamic_precedence: 0,
ProductionStep::new(Symbol::terminal(2)), steps: vec![
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"), ProductionStep::new(Symbol::terminal(2)),
], ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
}], ],
}, }],
]), },
],
vec![],
),
&build_lexical_grammar(), &build_lexical_grammar(),
); )
.unwrap();
assert_eq!( assert_eq!(
variable_info[0].fields, variable_info[0].fields,
@ -1070,9 +1180,7 @@ mod tests {
FieldInfo { FieldInfo {
required: true, required: true,
multiple: false, multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(3))] types: vec![ChildType::Normal(Symbol::terminal(3))],
.into_iter()
.collect::<HashSet<_>>(),
} }
)] )]
.into_iter() .into_iter()
@ -1080,9 +1188,68 @@ mod tests {
); );
} }
fn build_syntax_grammar(variables: Vec<SyntaxVariable>) -> SyntaxGrammar { #[test]
fn test_get_variable_info_with_supertypes() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(2))],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(3))],
},
],
},
],
// _rule1 is a supertype
vec![Symbol::non_terminal(1)],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
fn build_syntax_grammar(
variables: Vec<SyntaxVariable>,
supertype_symbols: Vec<Symbol>,
) -> SyntaxGrammar {
let mut syntax_grammar = SyntaxGrammar::default(); let mut syntax_grammar = SyntaxGrammar::default();
syntax_grammar.variables = variables; syntax_grammar.variables = variables;
syntax_grammar.supertype_symbols = supertype_symbols;
syntax_grammar syntax_grammar
} }

View file

@ -212,137 +212,154 @@ function RuleBuilder(ruleMap) {
} }
function grammar(baseGrammar, options) { function grammar(baseGrammar, options) {
if (!options) { if (!options) {
options = baseGrammar; options = baseGrammar;
baseGrammar = { baseGrammar = {
name: null, name: null,
rules: {}, rules: {},
extras: [normalize(/\s/)], extras: [normalize(/\s/)],
conflicts: [], conflicts: [],
externals: [], externals: [],
inline: [] inline: [],
}; supertypes: []
};
}
let externals = baseGrammar.externals;
if (options.externals) {
if (typeof options.externals !== "function") {
throw new Error("Grammar's 'externals' property must be a function.");
} }
let externals = baseGrammar.externals; const externalsRuleBuilder = RuleBuilder(null)
if (options.externals) { const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
if (typeof options.externals !== "function") {
throw new Error("Grammar's 'externals' property must be a function."); if (!Array.isArray(externalRules)) {
throw new Error("Grammar's 'externals' property must return an array of rules.");
}
externals = externalRules.map(normalize);
}
const ruleMap = {};
for (const key in options.rules) {
ruleMap[key] = true;
}
for (const key in baseGrammar.rules) {
ruleMap[key] = true;
}
for (const external of externals) {
if (typeof external.name === 'string') {
ruleMap[external.name] = true;
}
}
const ruleBuilder = RuleBuilder(ruleMap);
const name = options.name;
if (typeof name !== "string") {
throw new Error("Grammar's 'name' property must be a string.");
}
if (!/^[a-zA-Z_]\w*$/.test(name)) {
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
}
let rules = Object.assign({}, baseGrammar.rules);
if (options.rules) {
if (typeof options.rules !== "object") {
throw new Error("Grammar's 'rules' property must be an object.");
}
for (const ruleName in options.rules) {
const ruleFn = options.rules[ruleName];
if (typeof ruleFn !== "function") {
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
} }
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
}
}
const externalsRuleBuilder = RuleBuilder(null) let extras = baseGrammar.extras.slice();
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals); if (options.extras) {
if (typeof options.extras !== "function") {
if (!Array.isArray(externalRules)) { throw new Error("Grammar's 'extras' property must be a function.");
throw new Error("Grammar's 'externals' property must return an array of rules.");
}
externals = externalRules.map(normalize);
} }
const ruleMap = {}; extras = options.extras
for (const key in options.rules) { .call(ruleBuilder, ruleBuilder, baseGrammar.extras)
ruleMap[key] = true; .map(normalize);
}
let word = baseGrammar.word;
if (options.word) {
word = options.word.call(ruleBuilder, ruleBuilder).name;
if (typeof word != 'string') {
throw new Error("Grammar's 'word' property must be a named rule.");
} }
for (const key in baseGrammar.rules) { }
ruleMap[key] = true;
} let conflicts = baseGrammar.conflicts;
for (const external of externals) { if (options.conflicts) {
if (typeof external.name === 'string') { if (typeof options.conflicts !== "function") {
ruleMap[external.name] = true; throw new Error("Grammar's 'conflicts' property must be a function.");
}
} }
const ruleBuilder = RuleBuilder(ruleMap); const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
const name = options.name; if (!Array.isArray(conflictRules)) {
if (typeof name !== "string") { throw new Error("Grammar's conflicts must be an array of arrays of rules.");
throw new Error("Grammar's 'name' property must be a string.");
} }
if (!/^[a-zA-Z_]\w*$/.test(name)) { conflicts = conflictRules.map(conflictSet => {
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters."); if (!Array.isArray(conflictSet)) {
}
let rules = Object.assign({}, baseGrammar.rules);
if (options.rules) {
if (typeof options.rules !== "object") {
throw new Error("Grammar's 'rules' property must be an object.");
}
for (const ruleName in options.rules) {
const ruleFn = options.rules[ruleName];
if (typeof ruleFn !== "function") {
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
}
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
}
}
let extras = baseGrammar.extras.slice();
if (options.extras) {
if (typeof options.extras !== "function") {
throw new Error("Grammar's 'extras' property must be a function.");
}
extras = options.extras
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
.map(normalize);
}
let word = baseGrammar.word;
if (options.word) {
word = options.word.call(ruleBuilder, ruleBuilder).name;
if (typeof word != 'string') {
throw new Error("Grammar's 'word' property must be a named rule.");
}
}
let conflicts = baseGrammar.conflicts;
if (options.conflicts) {
if (typeof options.conflicts !== "function") {
throw new Error("Grammar's 'conflicts' property must be a function.");
}
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
if (!Array.isArray(conflictRules)) {
throw new Error("Grammar's conflicts must be an array of arrays of rules."); throw new Error("Grammar's conflicts must be an array of arrays of rules.");
} }
conflicts = conflictRules.map(conflictSet => { return conflictSet.map(symbol => normalize(symbol).name);
if (!Array.isArray(conflictSet)) { });
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
}
return conflictSet.map(symbol => normalize(symbol).name);
});
}
let inline = baseGrammar.inline;
if (options.inline) {
if (typeof options.inline !== "function") {
throw new Error("Grammar's 'inline' property must be a function.");
}
const baseInlineRules = baseGrammar.inline.map(sym);
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
if (!Array.isArray(inlineRules)) {
throw new Error("Grammar's inline must be an array of rules.");
}
inline = inlineRules.map(symbol => symbol.name);
}
if (Object.keys(rules).length == 0) {
throw new Error("Grammar must have at least one rule.");
}
return {name, word, rules, extras, conflicts, externals, inline};
} }
let inline = baseGrammar.inline;
if (options.inline) {
if (typeof options.inline !== "function") {
throw new Error("Grammar's 'inline' property must be a function.");
}
const baseInlineRules = baseGrammar.inline.map(sym);
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
if (!Array.isArray(inlineRules)) {
throw new Error("Grammar's inline must be an array of rules.");
}
inline = inlineRules.map(symbol => symbol.name);
}
let supertypes = baseGrammar.supertypes;
if (options.supertypes) {
if (typeof options.supertypes !== "function") {
throw new Error("Grammar's 'supertypes' property must be a function.");
}
const baseSupertypeRules = baseGrammar.supertypes.map(sym);
const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);
if (!Array.isArray(supertypeRules)) {
throw new Error("Grammar's supertypes must be an array of rules.");
}
supertypes = supertypeRules.map(symbol => symbol.name);
}
if (Object.keys(rules).length == 0) {
throw new Error("Grammar must have at least one rule.");
}
return {name, word, rules, extras, conflicts, externals, inline, supertypes};
}
function checkArguments(ruleCount, caller, callerName, suffix = '') { function checkArguments(ruleCount, caller, callerName, suffix = '') {
if (ruleCount > 1) { if (ruleCount > 1) {
const error = new Error([ const error = new Error([

View file

@ -27,6 +27,7 @@ pub(crate) struct InputGrammar {
pub expected_conflicts: Vec<Vec<String>>, pub expected_conflicts: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>, pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>, pub variables_to_inline: Vec<String>,
pub supertype_symbols: Vec<String>,
pub word_token: Option<String>, pub word_token: Option<String>,
} }
@ -88,6 +89,7 @@ pub(crate) struct SyntaxGrammar {
pub extra_tokens: Vec<Symbol>, pub extra_tokens: Vec<Symbol>,
pub expected_conflicts: Vec<Vec<Symbol>>, pub expected_conflicts: Vec<Vec<Symbol>>,
pub external_tokens: Vec<ExternalToken>, pub external_tokens: Vec<ExternalToken>,
pub supertype_symbols: Vec<Symbol>,
pub variables_to_inline: Vec<Symbol>, pub variables_to_inline: Vec<Symbol>,
pub word_token: Option<Symbol>, pub word_token: Option<Symbol>,
} }

View file

@ -184,17 +184,23 @@ fn ensure_file<T: AsRef<[u8]>>(path: &PathBuf, f: impl Fn() -> T) -> Result<()>
} }
} }
#[derive(Debug, Serialize, PartialEq, Eq, Default)]
struct NodeInfoJSON {
fields: Option<BTreeMap<String, FieldInfoJSON>>,
subtypes: Option<Vec<NodeTypeJSON>>,
}
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)] #[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
struct FieldTypeJSON { struct NodeTypeJSON {
kind: String, kind: String,
named: bool, named: bool,
} }
#[derive(Debug, Serialize)] #[derive(Debug, Serialize, PartialEq, Eq)]
struct FieldInfoJSON { struct FieldInfoJSON {
multiple: bool, multiple: bool,
required: bool, required: bool,
types: Vec<FieldTypeJSON>, types: Vec<NodeTypeJSON>,
} }
fn generate_field_info_json( fn generate_field_info_json(
@ -203,7 +209,7 @@ fn generate_field_info_json(
simple_aliases: &AliasMap, simple_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>, variable_info: &Vec<VariableInfo>,
) -> String { ) -> String {
let mut map = BTreeMap::new(); let mut node_types_json = BTreeMap::new();
for (i, info) in variable_info.iter().enumerate() { for (i, info) in variable_info.iter().enumerate() {
let variable = &syntax_grammar.variables[i]; let variable = &syntax_grammar.variables[i];
if !variable.kind.is_visible() || info.fields.is_empty() { if !variable.kind.is_visible() || info.fields.is_empty() {
@ -214,60 +220,68 @@ fn generate_field_info_json(
.get(&Symbol::non_terminal(i)) .get(&Symbol::non_terminal(i))
.map_or(&variable.name, |alias| &alias.value); .map_or(&variable.name, |alias| &alias.value);
let fields = map.entry(name.clone()).or_insert_with(|| BTreeMap::new()); let node_type_json = node_types_json
for (field, field_info) in info.fields.iter() { .entry(name.clone())
let field_info_json = fields.entry(field.clone()).or_insert(FieldInfoJSON { .or_insert_with(|| NodeInfoJSON::default());
multiple: false,
required: true,
types: Vec::new(),
});
field_info_json.multiple |= field_info.multiple; if info.fields.len() > 0 {
field_info_json.required &= field_info.required; let mut fields_json = BTreeMap::new();
field_info_json.types.extend(field_info.types.iter().map( for (field, field_info) in info.fields.iter() {
|child_type| match child_type { let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON {
ChildType::Aliased(alias) => FieldTypeJSON { multiple: false,
kind: alias.value.clone(), required: true,
named: alias.is_named, types: Vec::new(),
}, });
ChildType::Normal(symbol) => {
if let Some(alias) = simple_aliases.get(&symbol) { field_info_json.multiple |= field_info.multiple;
FieldTypeJSON { field_info_json.required &= field_info.required;
kind: alias.value.clone(), field_info_json.types.extend(field_info.types.iter().map(
named: alias.is_named, |child_type| match child_type {
} ChildType::Aliased(alias) => NodeTypeJSON {
} else { kind: alias.value.clone(),
match symbol.kind { named: alias.is_named,
SymbolType::NonTerminal => { },
let variable = &syntax_grammar.variables[symbol.index]; ChildType::Normal(symbol) => {
FieldTypeJSON { if let Some(alias) = simple_aliases.get(&symbol) {
kind: variable.name.clone(), NodeTypeJSON {
named: variable.kind == VariableType::Named, kind: alias.value.clone(),
} named: alias.is_named,
} }
SymbolType::Terminal => { } else {
let variable = &lexical_grammar.variables[symbol.index]; match symbol.kind {
FieldTypeJSON { SymbolType::NonTerminal => {
kind: variable.name.clone(), let variable = &syntax_grammar.variables[symbol.index];
named: variable.kind == VariableType::Named, NodeTypeJSON {
kind: variable.name.clone(),
named: variable.kind == VariableType::Named,
}
} }
} SymbolType::Terminal => {
SymbolType::External => { let variable = &lexical_grammar.variables[symbol.index];
let variable = &syntax_grammar.external_tokens[symbol.index]; NodeTypeJSON {
FieldTypeJSON { kind: variable.name.clone(),
kind: variable.name.clone(), named: variable.kind == VariableType::Named,
named: variable.kind == VariableType::Named, }
} }
SymbolType::External => {
let variable = &syntax_grammar.external_tokens[symbol.index];
NodeTypeJSON {
kind: variable.name.clone(),
named: variable.kind == VariableType::Named,
}
}
_ => panic!("Unexpected symbol type"),
} }
_ => panic!("Unexpected symbol type"),
} }
} }
} },
}, ));
)); field_info_json.types.sort_unstable();
field_info_json.types.sort_unstable(); field_info_json.types.dedup();
field_info_json.types.dedup(); }
node_type_json.fields = Some(fields_json);
} }
} }
serde_json::to_string_pretty(&map).unwrap() serde_json::to_string_pretty(&node_types_json).unwrap()
} }

View file

@ -71,6 +71,7 @@ struct GrammarJSON {
externals: Option<Vec<RuleJSON>>, externals: Option<Vec<RuleJSON>>,
extras: Option<Vec<RuleJSON>>, extras: Option<Vec<RuleJSON>>,
inline: Option<Vec<String>>, inline: Option<Vec<String>>,
supertypes: Option<Vec<String>>,
word: Option<String>, word: Option<String>,
} }
@ -100,6 +101,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
.collect(); .collect();
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new()); let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new()); let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new());
Ok(InputGrammar { Ok(InputGrammar {
name: grammar_json.name, name: grammar_json.name,
@ -108,6 +110,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
extra_tokens, extra_tokens,
expected_conflicts, expected_conflicts,
external_tokens, external_tokens,
supertype_symbols,
variables_to_inline, variables_to_inline,
}) })
} }

View file

@ -235,6 +235,7 @@ mod tests {
external_tokens: Vec::new(), external_tokens: Vec::new(),
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(), variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
} }
} }

View file

@ -149,6 +149,7 @@ mod tests {
extra_tokens: Vec::new(), extra_tokens: Vec::new(),
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(), variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(), external_tokens: Vec::new(),
word_token: None, word_token: None,
}; };

View file

@ -77,6 +77,12 @@ pub(super) fn extract_tokens(
}) })
.collect(); .collect();
let supertype_symbols = grammar
.supertype_symbols
.into_iter()
.map(|symbol| symbol_replacer.replace_symbol(symbol))
.collect();
let variables_to_inline = grammar let variables_to_inline = grammar
.variables_to_inline .variables_to_inline
.into_iter() .into_iter()
@ -154,6 +160,7 @@ pub(super) fn extract_tokens(
expected_conflicts, expected_conflicts,
extra_tokens, extra_tokens,
variables_to_inline, variables_to_inline,
supertype_symbols,
external_tokens, external_tokens,
word_token, word_token,
}, },
@ -519,6 +526,7 @@ mod test {
external_tokens: Vec::new(), external_tokens: Vec::new(),
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(), variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
} }
} }

View file

@ -203,6 +203,7 @@ unless they are used only as the grammar's start rule.
expected_conflicts: grammar.expected_conflicts, expected_conflicts: grammar.expected_conflicts,
variables_to_inline: grammar.variables_to_inline, variables_to_inline: grammar.variables_to_inline,
external_tokens: grammar.external_tokens, external_tokens: grammar.external_tokens,
supertype_symbols: grammar.supertype_symbols,
word_token: grammar.word_token, word_token: grammar.word_token,
variables, variables,
}) })

View file

@ -35,6 +35,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
extra_tokens.push(interner.intern_rule(extra_token)?); extra_tokens.push(interner.intern_rule(extra_token)?);
} }
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
for supertype_symbol_name in grammar.supertype_symbols.iter() {
supertype_symbols.push(
interner
.intern_name(supertype_symbol_name)
.ok_or_else(|| Error::undefined_symbol(supertype_symbol_name))?,
);
}
let mut expected_conflicts = Vec::new(); let mut expected_conflicts = Vec::new();
for conflict in grammar.expected_conflicts.iter() { for conflict in grammar.expected_conflicts.iter() {
let mut interned_conflict = Vec::with_capacity(conflict.len()); let mut interned_conflict = Vec::with_capacity(conflict.len());
@ -64,12 +73,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
); );
} }
eprintln!("supertype_symbols: {:?}", supertype_symbols);
Ok(InternedGrammar { Ok(InternedGrammar {
variables, variables,
external_tokens, external_tokens,
extra_tokens, extra_tokens,
expected_conflicts, expected_conflicts,
variables_to_inline, variables_to_inline,
supertype_symbols,
word_token, word_token,
}) })
} }
@ -230,6 +242,7 @@ mod tests {
external_tokens: Vec::new(), external_tokens: Vec::new(),
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(), variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
} }
} }

View file

@ -25,6 +25,7 @@ pub(crate) struct IntermediateGrammar<T, U> {
expected_conflicts: Vec<Vec<Symbol>>, expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<U>, external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>, variables_to_inline: Vec<Symbol>,
supertype_symbols: Vec<Symbol>,
word_token: Option<Symbol>, word_token: Option<Symbol>,
} }

View file

@ -198,6 +198,7 @@ mod tests {
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
extra_tokens: Vec::new(), extra_tokens: Vec::new(),
external_tokens: Vec::new(), external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)], variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![ variables: vec![
@ -328,6 +329,7 @@ mod tests {
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
extra_tokens: Vec::new(), extra_tokens: Vec::new(),
external_tokens: Vec::new(), external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
}; };
let inline_map = process_inlines(&grammar); let inline_map = process_inlines(&grammar);
@ -429,6 +431,7 @@ mod tests {
expected_conflicts: Vec::new(), expected_conflicts: Vec::new(),
extra_tokens: Vec::new(), extra_tokens: Vec::new(),
external_tokens: Vec::new(), external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None, word_token: None,
}; };

View file

@ -1,6 +1,6 @@
use super::nfa::CharacterSet; use super::nfa::CharacterSet;
use super::rules::{Alias, Associativity, Symbol}; use super::rules::{Alias, Associativity, Symbol};
use hashbrown::{HashMap, HashSet}; use hashbrown::HashMap;
use std::collections::BTreeMap; use std::collections::BTreeMap;
pub(crate) type ProductionInfoId = usize; pub(crate) type ProductionInfoId = usize;
@ -52,7 +52,7 @@ pub(crate) struct ProductionInfo {
pub field_map: BTreeMap<String, Vec<FieldLocation>>, pub field_map: BTreeMap<String, Vec<FieldLocation>>,
} }
#[derive(Clone, Debug, PartialEq, Eq, Hash)] #[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ChildType { pub(crate) enum ChildType {
Normal(Symbol), Normal(Symbol),
Aliased(Alias), Aliased(Alias),
@ -62,13 +62,15 @@ pub(crate) enum ChildType {
pub(crate) struct FieldInfo { pub(crate) struct FieldInfo {
pub required: bool, pub required: bool,
pub multiple: bool, pub multiple: bool,
pub types: HashSet<ChildType>, pub types: Vec<ChildType>,
} }
#[derive(Debug, Default, PartialEq, Eq)] #[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo { pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>, pub fields: HashMap<String, FieldInfo>,
pub child_types: HashSet<ChildType>, pub subclasses: Vec<Symbol>,
pub child_types: Vec<ChildType>,
pub has_multi_step_production: bool,
} }
#[derive(Debug, PartialEq, Eq)] #[derive(Debug, PartialEq, Eq)]