feat: move generate logic to its own crate

This commit is contained in:
Amaan Qureshi 2024-09-27 16:28:50 -04:00
parent 90efa34608
commit 31f24395b4
47 changed files with 103 additions and 57 deletions

23
Cargo.lock generated
View file

@ -1505,12 +1505,12 @@ dependencies = [
"tiny_http",
"tree-sitter",
"tree-sitter-config",
"tree-sitter-generate",
"tree-sitter-highlight",
"tree-sitter-loader",
"tree-sitter-tags",
"tree-sitter-tests-proc-macro",
"unindent",
"url",
"walkdir",
"wasmparser",
"webbrowser",
@ -1526,6 +1526,27 @@ dependencies = [
"serde_json",
]
[[package]]
name = "tree-sitter-generate"
version = "0.23.0"
dependencies = [
"anyhow",
"heck 0.5.0",
"indexmap",
"indoc",
"lazy_static",
"log",
"regex",
"regex-syntax",
"rustc-hash",
"semver",
"serde",
"serde_json",
"smallbitvec",
"tree-sitter",
"url",
]
[[package]]
name = "tree-sitter-highlight"
version = "0.23.0"

View file

@ -89,6 +89,7 @@ wasmparser = "0.215.0"
webbrowser = "1.0.2"
tree-sitter = { version = "0.23.0", path = "./lib" }
tree-sitter-generate = { version = "0.23.0", path = "./cli/generate" }
tree-sitter-loader = { version = "0.23.0", path = "./cli/loader" }
tree-sitter-config = { version = "0.23.0", path = "./cli/config" }
tree-sitter-highlight = { version = "0.23.0", path = "./highlight" }

View file

@ -58,14 +58,12 @@ wasmparser.workspace = true
webbrowser.workspace = true
tree-sitter.workspace = true
tree-sitter-generate.workspace = true
tree-sitter-config.workspace = true
tree-sitter-highlight.workspace = true
tree-sitter-loader.workspace = true
tree-sitter-tags.workspace = true
[target."cfg(windows)".dependencies]
url = "2.5.2"
[dev-dependencies]
tree_sitter_proc_macro = { path = "src/tests/proc_macro", package = "tree-sitter-tests-proc-macro" }

33
cli/generate/Cargo.toml Normal file
View file

@ -0,0 +1,33 @@
[package]
name = "tree-sitter-generate"
version.workspace = true
description = "Library for generating C source code from a tree-sitter grammar"
authors.workspace = true
edition.workspace = true
rust-version.workspace = true
readme = "README.md"
homepage.workspace = true
repository.workspace = true
license.workspace = true
keywords.workspace = true
categories.workspace = true
[dependencies]
anyhow.workspace = true
heck.workspace = true
indexmap.workspace = true
indoc.workspace = true
lazy_static.workspace = true
log.workspace = true
regex.workspace = true
regex-syntax.workspace = true
rustc-hash.workspace = true
semver.workspace = true
serde.workspace = true
serde_json.workspace = true
smallbitvec.workspace = true
tree-sitter.workspace = true
[target."cfg(windows)".dependencies]
url = "2.5.2"

4
cli/generate/README.md Normal file
View file

@ -0,0 +1,4 @@
# Tree-sitter Generate
This helper crate implements the logic for the `tree-sitter generate` command,
and can be used by external tools to generate a parser from a grammar file.

View file

@ -6,7 +6,7 @@ use std::{
use log::info;
use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
use crate::generate::{
use crate::{
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor},

View file

@ -13,7 +13,7 @@ use super::{
item::{ParseItem, ParseItemSet, ParseItemSetCore},
item_set_builder::ParseItemSetBuilder,
};
use crate::generate::{
use crate::{
grammars::{
InlinedProductionMap, LexicalGrammar, PrecedenceEntry, SyntaxGrammar, VariableType,
},

View file

@ -1,6 +1,6 @@
use std::fmt;
use crate::generate::{
use crate::{
grammars::LexicalGrammar,
rules::Symbol,
tables::{ParseStateId, ParseTable},

View file

@ -6,7 +6,7 @@ use std::{
use lazy_static::lazy_static;
use crate::generate::{
use crate::{
grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
};

View file

@ -4,7 +4,7 @@ use std::{
};
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
use crate::generate::{
use crate::{
grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
rules::{Symbol, SymbolType, TokenSet},
};

View file

@ -6,7 +6,7 @@ use std::{
use log::info;
use super::token_conflicts::TokenConflictMap;
use crate::generate::{
use crate::{
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
rules::{AliasMap, Symbol, TokenSet},

View file

@ -19,7 +19,7 @@ use self::{
minimize_parse_table::minimize_parse_table,
token_conflicts::TokenConflictMap,
};
use crate::generate::{
use crate::{
grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor},
node_types::VariableInfo,

View file

@ -1,6 +1,6 @@
use std::{cmp::Ordering, collections::HashSet, fmt};
use crate::generate::{
use crate::{
build_tables::item::TokenSetDisplay,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor, NfaTransition},
@ -373,7 +373,7 @@ fn compute_conflict_status(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::{
use crate::{
grammars::{Variable, VariableType},
prepare_grammar::{expand_tokens, ExtractedLexicalGrammar},
rules::{Precedence, Rule, Symbol},

View file

@ -39,8 +39,8 @@ struct GeneratedParser {
node_types_json: String,
}
pub const ALLOC_HEADER: &str = include_str!("../templates/alloc.h");
pub const ARRAY_HEADER: &str = include_str!("../templates/array.h");
pub const ALLOC_HEADER: &str = include_str!("../../src/templates/alloc.h");
pub const ARRAY_HEADER: &str = include_str!("../../src/templates/array.h");
pub fn generate_parser_in_directory(
repo_path: &Path,

View file

@ -730,7 +730,7 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::{
use crate::{
grammars::{
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
},

View file

@ -76,8 +76,8 @@ enum PrecedenceValueJSON {
}
#[derive(Deserialize)]
pub(crate) struct GrammarJSON {
pub(crate) name: String,
pub struct GrammarJSON {
pub name: String,
rules: Map<String, Value>,
#[serde(default)]
precedences: Vec<Vec<RuleJSON>>,

View file

@ -1,7 +1,7 @@
use std::{collections::HashMap, mem};
use super::ExtractedSyntaxGrammar;
use crate::generate::{
use crate::{
grammars::{Variable, VariableType},
rules::{Rule, Symbol},
};

View file

@ -8,7 +8,7 @@ use regex_syntax::ast::{
};
use super::ExtractedLexicalGrammar;
use crate::generate::{
use crate::{
grammars::{LexicalGrammar, LexicalVariable},
nfa::{CharacterSet, Nfa, NfaState},
rules::{Precedence, Rule},
@ -542,7 +542,7 @@ impl NfaBuilder {
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::{
use crate::{
grammars::Variable,
nfa::{NfaCursor, NfaTransition},
};

View file

@ -1,4 +1,4 @@
use crate::generate::{
use crate::{
grammars::{LexicalGrammar, SyntaxGrammar},
rules::{Alias, AliasMap, Symbol, SymbolType},
};
@ -164,7 +164,7 @@ pub(super) fn extract_default_aliases(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::{
use crate::{
grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType},
nfa::Nfa,
};

View file

@ -3,7 +3,7 @@ use std::{collections::HashMap, mem};
use anyhow::{anyhow, Result};
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::generate::{
use crate::{
grammars::{ExternalToken, Variable, VariableType},
rules::{MetadataParams, Rule, Symbol, SymbolType},
};

View file

@ -1,7 +1,7 @@
use anyhow::{anyhow, Result};
use super::ExtractedSyntaxGrammar;
use crate::generate::{
use crate::{
grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
rules::{Alias, Associativity, Precedence, Rule, Symbol},
};
@ -231,7 +231,7 @@ unless they are used only as the grammar's start rule.
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::VariableType;
use crate::grammars::VariableType;
#[test]
fn test_flatten_grammar() {

View file

@ -1,7 +1,7 @@
use anyhow::{anyhow, Result};
use super::InternedGrammar;
use crate::generate::{
use crate::{
grammars::{InputGrammar, Variable, VariableType},
rules::{Rule, Symbol},
};

View file

@ -164,7 +164,7 @@ fn validate_precedences(grammar: &InputGrammar) -> Result<()> {
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::VariableType;
use crate::grammars::VariableType;
#[test]
fn test_validate_precedences_with_undeclared_precedence() {

View file

@ -2,7 +2,7 @@ use std::collections::HashMap;
use anyhow::{anyhow, Result};
use crate::generate::{
use crate::{
grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
rules::SymbolType,
};
@ -225,7 +225,7 @@ pub(super) fn process_inlines(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::{
use crate::{
grammars::{LexicalVariable, SyntaxVariable, VariableType},
rules::{Associativity, Precedence, Symbol},
};

View file

@ -11,8 +11,7 @@ use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use indoc::indoc;
use serde::Deserialize;
use serde_json::{json, Map, Value};
use crate::generate::write_file;
use tree_sitter_generate::write_file;
const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";

View file

@ -1,7 +1,6 @@
#![doc = include_str!("../README.md")]
pub mod fuzz;
pub mod generate;
pub mod highlight;
pub mod init;
pub mod logger;

View file

@ -16,7 +16,7 @@ use tree_sitter_cli::{
fuzz_language_corpus, FuzzOptions, EDIT_COUNT, ITERATION_COUNT, LOG_ENABLED,
LOG_GRAPH_ENABLED, START_SEED,
},
generate, highlight,
highlight,
init::{generate_grammar_files, lookup_package_json_for_path},
logger,
parse::{self, ParseFileOptions, ParseOutput},
@ -461,7 +461,7 @@ impl Generate {
version.parse().expect("invalid abi version flag")
}
});
generate::generate_parser_in_directory(
tree_sitter_generate::generate_parser_in_directory(
current_dir,
self.grammar_path.as_deref(),
abi_version,

View file

@ -14,7 +14,6 @@ use crate::{
EDIT_COUNT, EXAMPLE_EXCLUDE, EXAMPLE_INCLUDE, ITERATION_COUNT, LANGUAGE_FILTER,
LOG_GRAPH_ENABLED, START_SEED,
},
generate,
parse::perform_edit,
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields},
tests::{
@ -353,8 +352,8 @@ fn test_feature_corpus_files() {
grammar_path = test_path.join("grammar.json");
}
let error_message_path = test_path.join("expected_error.txt");
let grammar_json = generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = generate::generate_parser_for_grammar(&grammar_json);
let grammar_json = tree_sitter_generate::load_grammar_file(&grammar_path, None).unwrap();
let generate_result = tree_sitter_generate::generate_parser_for_grammar(&grammar_json);
if error_message_path.exists() {
if EXAMPLE_INCLUDE.is_some() || EXAMPLE_EXCLUDE.is_some() {

View file

@ -6,12 +6,11 @@ use std::{
use anyhow::Context;
use lazy_static::lazy_static;
use tree_sitter::Language;
use tree_sitter_generate::{ALLOC_HEADER, ARRAY_HEADER};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::{CompileConfig, Loader};
use tree_sitter_tags::TagsConfiguration;
use crate::generate::{ALLOC_HEADER, ARRAY_HEADER};
include!("./dirs.rs");
lazy_static! {

View file

@ -1,14 +1,12 @@
use tree_sitter::{Node, Parser, Point, Tree};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use super::{
get_random_edit,
helpers::fixtures::{fixtures_dir, get_language, get_test_language},
Rand,
};
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
parse::perform_edit,
};
use crate::parse::perform_edit;
const JSON_EXAMPLE: &str = r#"

View file

@ -7,11 +7,9 @@ use std::{
};
use tree_sitter::Parser;
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
tests::helpers::fixtures::{fixtures_dir, get_test_language},
};
use crate::tests::helpers::fixtures::{fixtures_dir, get_test_language};
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.

View file

@ -4,6 +4,7 @@ use std::{
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_generate::{generate_parser_for_grammar, load_grammar_file};
use tree_sitter_proc_macro::retry;
use super::helpers::{
@ -13,7 +14,6 @@ use super::helpers::{
};
use crate::{
fuzz::edits::Edit,
generate::{generate_parser_for_grammar, load_grammar_file},
parse::perform_edit,
tests::{helpers::fixtures::fixtures_dir, invert_edit},
};

View file

@ -7,6 +7,7 @@ use tree_sitter::{
CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError,
QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty,
};
use tree_sitter_generate::generate_parser_for_grammar;
use unindent::Unindent;
use super::helpers::{
@ -14,12 +15,9 @@ use super::helpers::{
fixtures::{get_language, get_test_language},
query_helpers::{assert_query_matches, Match, Pattern},
};
use crate::{
generate::generate_parser_for_grammar,
tests::{
helpers::query_helpers::{collect_captures, collect_matches},
ITERATION_COUNT,
},
use crate::tests::{
helpers::query_helpers::{collect_captures, collect_matches},
ITERATION_COUNT,
};
lazy_static! {

View file

@ -5,11 +5,10 @@ use std::{
use anyhow::{anyhow, Context, Result};
use tree_sitter::wasm_stdlib_symbols;
use tree_sitter_generate::parse_grammar::GrammarJSON;
use tree_sitter_loader::Loader;
use wasmparser::Parser;
use super::generate::parse_grammar::GrammarJSON;
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(language_dir)
.with_context(|| "Failed to get wasm filename")