refactor(generate): use the logger

Co-authored-by: Amaan Qureshi <git@amaanq.com>
2025-09-15 20:43:02 +03:00 · 2025-09-15 20:43:02 +03:00 · d13657c40c
commit d13657c40c
parent 804ef22075
8 changed files with 89 additions and 62 deletions
--- a/crates/generate/src/build_tables.rs
+++ b/crates/generate/src/build_tables.rs
@ -11,7 +11,7 @@ use std::collections::{BTreeSet, HashMap};
 pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;
 use build_parse_table::BuildTableResult;
 pub use build_parse_table::ParseTableBuilderError;
-use log::info;
+use log::{debug, info};

 use self::{
    build_lex_table::build_lex_table,
@ -172,7 +172,7 @@ fn populate_error_state(
            if conflicts_with_other_tokens {
                None
            } else {
-                info!(
+                debug!(
                    "error recovery - token {} has no conflicts",
                    lexical_grammar.variables[i].name
                );
@ -198,14 +198,14 @@ fn populate_error_state(
                !coincident_token_index.contains(symbol, *t)
                    && token_conflict_map.does_conflict(symbol.index, t.index)
            }) {
-                info!(
+                debug!(
                    "error recovery - exclude token {} because of conflict with {}",
                    lexical_grammar.variables[i].name, lexical_grammar.variables[t.index].name
                );
                continue;
            }
        }
-        info!(
+        debug!(
            "error recovery - include token {}",
            lexical_grammar.variables[i].name
        );
@ -338,7 +338,7 @@ fn identify_keywords(
                && token_conflict_map.does_match_same_string(i, word_token.index)
                && !token_conflict_map.does_match_different_string(i, word_token.index)
            {
-                info!(
+                debug!(
                    "Keywords - add candidate {}",
                    lexical_grammar.variables[i].name
                );
@ -357,7 +357,7 @@ fn identify_keywords(
                if other_token != *token
                    && token_conflict_map.does_match_same_string(other_token.index, token.index)
                {
-                    info!(
+                    debug!(
                        "Keywords - exclude {} because it matches the same string as {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_token.index].name
@ -399,7 +399,7 @@ fn identify_keywords(
                    word_token.index,
                    other_index,
                ) {
-                    info!(
+                    debug!(
                        "Keywords - exclude {} because of conflict with {}",
                        lexical_grammar.variables[token.index].name,
                        lexical_grammar.variables[other_index].name
@ -408,7 +408,7 @@ fn identify_keywords(
                }
            }

-            info!(
+            debug!(
                "Keywords - include {}",
                lexical_grammar.variables[token.index].name,
            );
@ -480,14 +480,14 @@ fn report_state_info<'a>(
        .max()
        .unwrap();
    for (symbol, states) in &symbols_with_state_indices {
-        eprintln!(
+        info!(
            "{:width$}\t{}",
            syntax_grammar.variables[symbol.index].name,
            states.len(),
            width = max_symbol_name_length
        );
    }
-    eprintln!();
+    info!("");

    let state_indices = if report_symbol_name == "*" {
        Some(&all_state_indices)
@ -510,20 +510,25 @@ fn report_state_info<'a>(
        for state_index in state_indices {
            let id = parse_table.states[state_index].id;
            let (preceding_symbols, item_set) = &parse_state_info[id];
-            eprintln!("state index: {state_index}");
-            eprintln!("state id: {id}");
-            eprint!("symbol sequence:");
-            for symbol in preceding_symbols {
-                let name = if symbol.is_terminal() {
-                    &lexical_grammar.variables[symbol.index].name
-                } else if symbol.is_external() {
-                    &syntax_grammar.external_tokens[symbol.index].name
-                } else {
-                    &syntax_grammar.variables[symbol.index].name
-                };
-                eprint!(" {name}");
-            }
-            eprintln!(
+            info!("state index: {state_index}");
+            info!("state id: {id}");
+            info!(
+                "symbol sequence: {}",
+                preceding_symbols
+                    .iter()
+                    .map(|symbol| {
+                        if symbol.is_terminal() {
+                            lexical_grammar.variables[symbol.index].name.clone()
+                        } else if symbol.is_external() {
+                            syntax_grammar.external_tokens[symbol.index].name.clone()
+                        } else {
+                            syntax_grammar.variables[symbol.index].name.clone()
+                        }
+                    })
+                    .collect::<Vec<_>>()
+                    .join(" ")
+            );
+            info!(
                "\nitems:\n{}",
                item::ParseItemSetDisplay(item_set, syntax_grammar, lexical_grammar),
            );
--- a/crates/generate/src/build_tables/build_lex_table.rs
+++ b/crates/generate/src/build_tables/build_lex_table.rs
@ -3,7 +3,7 @@ use std::{
    mem,
 };

-use log::info;
+use log::debug;

 use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
 use crate::{
@ -176,7 +176,7 @@ impl<'a> LexTableBuilder<'a> {
        let (state_id, is_new) = self.add_state(nfa_states, eof_valid);

        if is_new {
-            info!(
+            debug!(
                "entry point state: {state_id}, tokens: {:?}",
                tokens
                    .iter()
--- a/crates/generate/src/build_tables/build_parse_table.rs
+++ b/crates/generate/src/build_tables/build_parse_table.rs
@ -5,6 +5,7 @@ use std::{
 };

 use indexmap::{map::Entry, IndexMap};
+use log::warn;
 use rustc_hash::FxHasher;
 use serde::Serialize;
 use thiserror::Error;
@ -346,17 +347,21 @@ impl<'a> ParseTableBuilder<'a> {
        }

        if !self.actual_conflicts.is_empty() {
-            println!("Warning: unnecessary conflicts");
-            for conflict in &self.actual_conflicts {
-                println!(
-                    "  {}",
-                    conflict
-                        .iter()
-                        .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
-                        .collect::<Vec<_>>()
-                        .join(", ")
-                );
-            }
+            warn!(
+                "unnecessary conflicts:\n  {}",
+                &self
+                    .actual_conflicts
+                    .iter()
+                    .map(|conflict| {
+                        conflict
+                            .iter()
+                            .map(|symbol| format!("`{}`", self.symbol_name(symbol)))
+                            .collect::<Vec<_>>()
+                            .join(", ")
+                    })
+                    .collect::<Vec<_>>()
+                    .join("\n  ")
+            );
        }

        Ok((self.parse_table, self.parse_state_info_by_id))
--- a/crates/generate/src/build_tables/minimize_parse_table.rs
+++ b/crates/generate/src/build_tables/minimize_parse_table.rs
@ -3,7 +3,7 @@ use std::{
    mem,
 };

-use log::info;
+use log::debug;

 use super::token_conflicts::TokenConflictMap;
 use crate::{
@ -244,7 +244,7 @@ impl Minimizer<'_> {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
-                            info!(
+                            debug!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
@ -265,7 +265,7 @@ impl Minimizer<'_> {
                        let group1 = group_ids_by_state_id[*s1];
                        let group2 = group_ids_by_state_id[*s2];
                        if group1 != group2 {
-                            info!(
+                            debug!(
                                "split states {} {} - successors for {} are split: {s1} {s2}",
                                state1.id,
                                state2.id,
@ -295,7 +295,7 @@ impl Minimizer<'_> {
        let actions1 = &entry1.actions;
        let actions2 = &entry2.actions;
        if actions1.len() != actions2.len() {
-            info!(
+            debug!(
                "split states {state_id1} {state_id2} - differing action counts for token {}",
                self.symbol_name(token)
            );
@ -322,13 +322,13 @@ impl Minimizer<'_> {
                if group1 == group2 && is_repetition1 == is_repetition2 {
                    continue;
                }
-                info!(
+                debug!(
                    "split states {state_id1} {state_id2} - successors for {} are split: {s1} {s2}",
                    self.symbol_name(token),
                );
                return true;
            } else if action1 != action2 {
-                info!(
+                debug!(
                    "split states {state_id1} {state_id2} - unequal actions for {}",
                    self.symbol_name(token),
                );
@ -347,14 +347,14 @@ impl Minimizer<'_> {
        new_token: Symbol,
    ) -> bool {
        if new_token == Symbol::end_of_nonterminal_extra() {
-            info!("split states {left_id} {right_id} - end of non-terminal extra",);
+            debug!("split states {left_id} {right_id} - end of non-terminal extra",);
            return true;
        }

        // Do not add external tokens; they could conflict lexically with any of the state's
        // existing lookahead tokens.
        if new_token.is_external() {
-            info!(
+            debug!(
                "split states {left_id} {right_id} - external token {}",
                self.symbol_name(&new_token),
            );
@ -373,7 +373,7 @@ impl Minimizer<'_> {
            .iter()
            .any(|external| external.corresponding_internal_token == Some(new_token))
        {
-            info!(
+            debug!(
                "split states {left_id} {right_id} - internal/external token {}",
                self.symbol_name(&new_token),
            );
@ -399,7 +399,7 @@ impl Minimizer<'_> {
                    .token_conflict_map
                    .does_match_same_string(new_token.index, token.index)
            {
-                info!(
+                debug!(
                    "split states {} {} - token {} conflicts with {}",
                    left_id,
                    right_id,
--- a/crates/generate/src/generate.rs
+++ b/crates/generate/src/generate.rs
@ -8,6 +8,7 @@ use std::{
 };

 use anyhow::Result;
+use log::warn;
 use node_types::VariableInfo;
 use regex::{Regex, RegexBuilder};
 use rules::{Alias, Symbol};
@ -255,8 +256,16 @@ where
    let semantic_version = read_grammar_version(&repo_path)?;

    if semantic_version.is_none() && abi_version > ABI_VERSION_MIN {
-        println!("Warning: No `tree-sitter.json` file found in your grammar, this file is required to generate with ABI {abi_version}. Using ABI version {ABI_VERSION_MIN} instead.");
-        println!("This file can be set up with `tree-sitter init`. For more information, see https://tree-sitter.github.io/tree-sitter/cli/init.");
+        warn!(
+            concat!(
+                "No `tree-sitter.json` file found in your grammar, ",
+                "this file is required to generate with ABI {}. ",
+                "Using ABI version {} instead.\n",
+                "This file can be set up with `tree-sitter init`. ",
+                "For more information, see https://tree-sitter.github.io/tree-sitter/cli/init."
+            ),
+            abi_version, ABI_VERSION_MIN
+        );
        abi_version = ABI_VERSION_MIN;
    }

--- a/crates/generate/src/parse_grammar.rs
+++ b/crates/generate/src/parse_grammar.rs
@ -1,16 +1,16 @@
 use std::collections::HashSet;

 use anyhow::Result;
+use log::warn;
 use regex::Regex;
 use serde::{Deserialize, Serialize};
 use serde_json::{Map, Value};
 use thiserror::Error;

-use super::{
-    grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
+use crate::{
+    grammars::{InputGrammar, PrecedenceEntry, ReservedWordContext, Variable, VariableType},
    rules::{Precedence, Rule},
 };
-use crate::grammars::ReservedWordContext;

 #[derive(Deserialize)]
 #[serde(tag = "type")]
@ -281,7 +281,13 @@ pub(crate) fn parse_grammar(input: &str) -> ParseGrammarResult<InputGrammar> {
                _ => false,
            };
            if matches_empty {
-                eprintln!("Warning: Named extra rule `{name}` matches the empty string. Inline this to avoid infinite loops while parsing.");
+                warn!(
+                    concat!(
+                        "Named extra rule `{}` matches the empty string. ",
+                        "Inline this to avoid infinite loops while parsing."
+                    ),
+                    name
+                );
            }
        }
        variables.push(Variable {
@ -342,7 +348,7 @@ fn parse_rule(json: RuleJSON, is_token: bool) -> ParseGrammarResult<Rule> {
                    } else {
                        // silently ignore unicode flags
                        if c != 'u' && c != 'v' {
-                            eprintln!("Warning: unsupported flag {c}");
+                            warn!("unsupported flag {c}");
                        }
                        false
                    }
--- a/crates/generate/src/prepare_grammar/intern_symbols.rs
+++ b/crates/generate/src/prepare_grammar/intern_symbols.rs
@ -1,4 +1,5 @@
 use anyhow::Result;
+use log::warn;
 use serde::Serialize;
 use thiserror::Error;

@ -132,7 +133,7 @@ impl Interner<'_> {
    fn intern_rule(&self, rule: &Rule, name: Option<&str>) -> InternSymbolsResult<Rule> {
        match rule {
            Rule::Choice(elements) => {
-                self.check_single(elements, name);
+                self.check_single(elements, name, "choice");
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
                    result.push(self.intern_rule(element, name)?);
@ -140,7 +141,7 @@ impl Interner<'_> {
                Ok(Rule::Choice(result))
            }
            Rule::Seq(elements) => {
-                self.check_single(elements, name);
+                self.check_single(elements, name, "seq");
                let mut result = Vec::with_capacity(elements.len());
                for element in elements {
                    result.push(self.intern_rule(element, name)?);
@ -184,10 +185,10 @@ impl Interner<'_> {

    // In the case of a seq or choice rule of 1 element in a hidden rule, weird
    // inconsistent behavior with queries can occur. So we should warn the user about it.
-    fn check_single(&self, elements: &[Rule], name: Option<&str>) {
+    fn check_single(&self, elements: &[Rule], name: Option<&str>, kind: &str) {
        if elements.len() == 1 && matches!(elements[0], Rule::String(_) | Rule::Pattern(_, _)) {
-            eprintln!(
-                "Warning: rule {} contains a `seq` or `choice` rule with a single element. This is unnecessary.",
+            warn!(
+                "rule {} contains a `{kind}` rule with a single element. This is unnecessary.",
                name.unwrap_or_default()
            );
        }
--- a/crates/generate/src/quickjs.rs
+++ b/crates/generate/src/quickjs.rs
@ -4,6 +4,7 @@ use std::{
    sync::{LazyLock, Mutex},
 };

+use log::{error, info, warn};
 use rquickjs::{
    loader::{FileResolver, ScriptLoader},
    Context, Ctx, Function, Module, Object, Runtime, Type, Value,
@ -116,19 +117,19 @@ impl Console {

    #[allow(clippy::needless_pass_by_value)]
    pub fn log(&self, args: rquickjs::function::Rest<Value<'_>>) -> rquickjs::Result<()> {
-        println!("{}", Self::format_args(&args));
+        info!("{}", Self::format_args(&args));
        Ok(())
    }

    #[allow(clippy::needless_pass_by_value)]
    pub fn warn(&self, args: rquickjs::function::Rest<Value<'_>>) -> rquickjs::Result<()> {
-        eprintln!("Warning: {}", Self::format_args(&args));
+        warn!("{}", Self::format_args(&args));
        Ok(())
    }

    #[allow(clippy::needless_pass_by_value)]
    pub fn error(&self, args: rquickjs::function::Rest<Value<'_>>) -> rquickjs::Result<()> {
-        eprintln!("Error: {}", Self::format_args(&args));
+        error!("Error: {}", Self::format_args(&args));
        Ok(())
    }
 }