fix: intern a sequence or choice of a single element the same as the element itself

This commit is contained in:
Amaan Qureshi 2024-05-25 00:48:13 -04:00
parent 3da7deedd8
commit 252e2a4bc0
3 changed files with 130 additions and 13 deletions

View file

@ -226,8 +226,8 @@ fn load_js_grammar_file(grammar_path: &Path, js_runtime: Option<&str>) -> Result
write!(
js_stdin,
"globalThis.TREE_SITTER_CLI_VERSION_MAJOR = {};
globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
globalThis.TREE_SITTER_CLI_VERSION_MINOR = {};
globalThis.TREE_SITTER_CLI_VERSION_PATCH = {};",
cli_version.major, cli_version.minor, cli_version.patch,
)
.with_context(|| format!("Failed to write tree-sitter version to {js_runtime}'s stdin"))?;

View file

@ -18,13 +18,13 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
variables.push(Variable {
name: variable.name.clone(),
kind: variable_type_for_name(&variable.name),
rule: interner.intern_rule(&variable.rule)?,
rule: interner.intern_rule(&variable.rule, Some(&variable.name))?,
});
}
let mut external_tokens = Vec::with_capacity(grammar.external_tokens.len());
for external_token in &grammar.external_tokens {
let rule = interner.intern_rule(external_token)?;
let rule = interner.intern_rule(external_token, None)?;
let (name, kind) = if let Rule::NamedSymbol(name) = external_token {
(name.clone(), variable_type_for_name(name))
} else {
@ -35,7 +35,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
for extra_token in &grammar.extra_symbols {
extra_symbols.push(interner.intern_rule(extra_token)?);
extra_symbols.push(interner.intern_rule(extra_token, None)?);
}
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
@ -99,33 +99,37 @@ struct Interner<'a> {
}
impl<'a> Interner<'a> {
fn intern_rule(&self, rule: &Rule) -> Result<Rule> {
fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> Result<Rule> {
match rule {
Rule::Choice(elements) => {
if let Some(result) = self.intern_single(elements, name) {
return result;
}
let mut result = Vec::with_capacity(elements.len());
for element in elements {
result.push(self.intern_rule(element)?);
result.push(self.intern_rule(element, name)?);
}
Ok(Rule::Choice(result))
}
Rule::Seq(elements) => {
if let Some(result) = self.intern_single(elements, name) {
return result;
}
let mut result = Vec::with_capacity(elements.len());
for element in elements {
result.push(self.intern_rule(element)?);
result.push(self.intern_rule(element, name)?);
}
Ok(Rule::Seq(result))
}
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content)?))),
Rule::Repeat(content) => Ok(Rule::Repeat(Box::new(self.intern_rule(content, name)?))),
Rule::Metadata { rule, params } => Ok(Rule::Metadata {
rule: Box::new(self.intern_rule(rule)?),
rule: Box::new(self.intern_rule(rule, name)?),
params: params.clone(),
}),
Rule::NamedSymbol(name) => self.intern_name(name).map_or_else(
|| Err(anyhow!("Undefined symbol `{name}`")),
|symbol| Ok(Rule::Symbol(symbol)),
),
_ => Ok(rule.clone()),
}
}
@ -147,6 +151,21 @@ impl<'a> Interner<'a> {
None
}
// In the case of a seq or choice rule of 1 element in a hidden rule, weird
// inconsistent behavior w/ queries can occur. So we should treat it as that single rule itself
// in this case.
fn intern_single(&self, elements: &[Rule], name: Option<&str>) -> Option<Result<Rule>> {
if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
eprintln!(
"Warning: rule {} is just a `seq` or `choice` rule with a single element. This is unnecessary.",
name.unwrap_or_default()
);
Some(self.intern_rule(&elements[0], name))
} else {
None
}
}
}
fn variable_type_for_name(name: &str) -> VariableType {
@ -239,6 +258,42 @@ mod tests {
}
}
#[test]
fn test_interning_a_seq_or_choice_of_one_rule() {
let grammar = intern_symbols(&build_grammar(vec![
Variable::named("w", Rule::choice(vec![Rule::string("a")])),
Variable::named("x", Rule::seq(vec![Rule::pattern("b", "")])),
Variable::named("y", Rule::string("a")),
Variable::named("z", Rule::pattern("b", "")),
// Hidden rules should not affect this.
Variable::hidden("_a", Rule::choice(vec![Rule::string("a")])),
Variable::hidden("_b", Rule::seq(vec![Rule::pattern("b", "")])),
Variable::hidden("_c", Rule::string("a")),
Variable::hidden("_d", Rule::pattern("b", "")),
]))
.unwrap();
assert_eq!(
grammar.variables,
vec![
Variable::named("w", Rule::string("a")),
Variable::named("x", Rule::pattern("b", "")),
Variable::named("y", Rule::string("a")),
Variable::named("z", Rule::pattern("b", "")),
// Hidden rules show no change.
Variable::hidden("_a", Rule::string("a")),
Variable::hidden("_b", Rule::pattern("b", "")),
Variable::hidden("_c", Rule::string("a")),
Variable::hidden("_d", Rule::pattern("b", "")),
]
);
assert_eq!(grammar.variables[0].rule, grammar.variables[2].rule);
assert_eq!(grammar.variables[1].rule, grammar.variables[3].rule);
assert_eq!(grammar.variables[4].rule, grammar.variables[6].rule);
assert_eq!(grammar.variables[5].rule, grammar.variables[7].rule);
}
fn build_grammar(variables: Vec<Variable>) -> InputGrammar {
InputGrammar {
variables,

View file

@ -5011,7 +5011,7 @@ fn test_grammar_with_aliased_literal_query() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test",
"name": "test_grammar_with_aliased_literal_query",
"rules": {
"source": {
"type": "REPEAT",
@ -5071,10 +5071,72 @@ fn test_grammar_with_aliased_literal_query() {
&language,
r#"
(compound_statement "}" @bracket1)
(expansion) @bracket2
"#,
);
assert!(query.is_ok());
let query = Query::new(
&language,
r#"
(expansion "}" @bracket2)
"#,
);
assert!(query.is_err());
}
#[test]
fn test_query_with_seq_or_choice_of_one_rule() {
// module.exports = grammar({
// name: 'test',
//
// rules: {
// source: $ => choice($._seq, $._choice),
//
// _seq: $ => seq("hi"),
// _choice: $ => choice("bye"),
// },
// });
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_query_with_seq_or_choice_of_one_rule",
"rules": {
"source": {
"type": "CHOICE",
"members": [
{ "type": "SYMBOL", "name": "_seq" },
{ "type": "SYMBOL", "name": "_choice" }
]
},
"_seq": {
"type": "SEQ",
"members": [{ "type": "STRING", "value": "hi" }]
},
"_choice": {
"type": "CHOICE",
"members": [ { "type": "STRING", "value": "bye" } ]
}
},
"extras": [{ "type": "PATTERN", "value": "\\s" }]
}
"#,
)
.unwrap();
let language = get_test_language(&parser_name, &parser_code, None);
let query = Query::new(
&language,
r#"
"hi" @seq
"bye" @choice
"#,
);
assert!(query.is_ok());
}