fix: intern a sequence or choice of a single element the same as the element itself
This commit is contained in:
parent
3da7deedd8
commit
252e2a4bc0
3 changed files with 130 additions and 13 deletions
|
|
@ -226,8 +226,8 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
|
|||
write!(
|
||||
js_stdin,
|
||||
"globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
|
||||
globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
|
||||
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
|
||||
globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
|
||||
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
|
||||
cli_version.major, cli_version.minor, cli_version.patch,
|
||||
)
|
||||
.with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?;
|
||||
|
|
|
|||
|
|
@ -18,13 +18,13 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
variables.push(Variable {
|
||||
name: variable.name.clone(),
|
||||
kind: variable_type_for_name(&variable.name),
|
||||
rule: interner.intern_rule(&variable.rule)?,
|
||||
rule: interner.intern_rule(&variable.rule, Some(&variable.name))?,
|
||||
});
|
||||
}
|
||||
|
||||
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
|
||||
for external_token in &grammar.external_tokens {
|
||||
let rule = interner.intern_rule(external_token)?;
|
||||
let rule = interner.intern_rule(external_token, None)?;
|
||||
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
|
||||
(name.clone(), variable_type_for_name(name))
|
||||
} else {
|
||||
|
|
@ -35,7 +35,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
|
||||
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
|
||||
for extra_token in &grammar.extra_symbols {
|
||||
extra_symbols.push(interner.intern_rule(extra_token)?);
|
||||
extra_symbols.push(interner.intern_rule(extra_token, None)?);
|
||||
}
|
||||
|
||||
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
||||
|
|
@ -99,33 +99,37 @@ struct Interner<'a> {
|
|||
}
|
||||
|
||||
impl<'a> Interner<'a> {
|
||||
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
|
||||
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result<Rule> {
|
||||
match rule {
|
||||
Rule::Choice(elements) => {
|
||||
if let Some(result) = self.intern_single(elements, name) {
|
||||
return result;
|
||||
}
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
result.push(self.intern_rule(element, name)?);
|
||||
}
|
||||
Ok(Rule::Choice(result))
|
||||
}
|
||||
Rule::Seq(elements) => {
|
||||
if let Some(result) = self.intern_single(elements, name) {
|
||||
return result;
|
||||
}
|
||||
let mut result = Vec::with_capacity(elements.len());
|
||||
for element in elements {
|
||||
result.push(self.intern_rule(element)?);
|
||||
result.push(self.intern_rule(element, name)?);
|
||||
}
|
||||
Ok(Rule::Seq(result))
|
||||
}
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
|
||||
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))),
|
||||
Rule::Metadata { rule, params } => Ok(Rule::Metadata {
|
||||
rule: Box::new(self.intern_rule(rule)?),
|
||||
rule: Box::new(self.intern_rule(rule, name)?),
|
||||
params: params.clone(),
|
||||
}),
|
||||
|
||||
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|
||||
|| Err(anyhow!("Undefined symbol `{name}`")),
|
||||
|symbol| Ok(Rule::Symbol(symbol)),
|
||||
),
|
||||
|
||||
_ => Ok(rule.clone()),
|
||||
}
|
||||
}
|
||||
|
|
@ -147,6 +151,21 @@ impl<'a> Interner<'a> {
|
|||
|
||||
None
|
||||
}
|
||||
|
||||
// In the case of a seq or choice rule of 1 element in a hidden rule, weird
|
||||
// inconsistent behavior w/ queries can occur. So we should treat it as that single rule itself
|
||||
// in this case.
|
||||
fn intern_single(&self, elements: &[Rule], name: Option<&str>) -> Option<Result<Rule>> {
|
||||
if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
|
||||
eprintln!(
|
||||
"Warning: rule {} is just a `seq` or `choice` rule with a single element. This is unnecessary.",
|
||||
name.unwrap_or_default()
|
||||
);
|
||||
Some(self.intern_rule(&elements[0], name))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn variable_type_for_name(name: &str) -> VariableType {
|
||||
|
|
@ -239,6 +258,42 @@ mod tests {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_interning_a_seq_or_choice_of_one_rule() {
|
||||
let grammar = intern_symbols(&build_grammar(vec![
|
||||
Variable::named("w", Rule::choice(vec![Rule::string("a")])),
|
||||
Variable::named("x", Rule::seq(vec![Rule::pattern("b", "")])),
|
||||
Variable::named("y", Rule::string("a")),
|
||||
Variable::named("z", Rule::pattern("b", "")),
|
||||
// Hidden rules should not affect this.
|
||||
Variable::hidden("_a", Rule::choice(vec![Rule::string("a")])),
|
||||
Variable::hidden("_b", Rule::seq(vec![Rule::pattern("b", "")])),
|
||||
Variable::hidden("_c", Rule::string("a")),
|
||||
Variable::hidden("_d", Rule::pattern("b", "")),
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
grammar.variables,
|
||||
vec![
|
||||
Variable::named("w", Rule::string("a")),
|
||||
Variable::named("x", Rule::pattern("b", "")),
|
||||
Variable::named("y", Rule::string("a")),
|
||||
Variable::named("z", Rule::pattern("b", "")),
|
||||
// Hidden rules show no change.
|
||||
Variable::hidden("_a", Rule::string("a")),
|
||||
Variable::hidden("_b", Rule::pattern("b", "")),
|
||||
Variable::hidden("_c", Rule::string("a")),
|
||||
Variable::hidden("_d", Rule::pattern("b", "")),
|
||||
]
|
||||
);
|
||||
|
||||
assert_eq!(grammar.variables[0].rule, grammar.variables[2].rule);
|
||||
assert_eq!(grammar.variables[1].rule, grammar.variables[3].rule);
|
||||
assert_eq!(grammar.variables[4].rule, grammar.variables[6].rule);
|
||||
assert_eq!(grammar.variables[5].rule, grammar.variables[7].rule);
|
||||
}
|
||||
|
||||
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
|
||||
InputGrammar {
|
||||
variables,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue