Identify large char sets for lexer using NFA transitions

This commit is contained in:
Max Brunsfeld 2024-04-09 17:53:37 -07:00
parent 39be6972fe
commit be34bc9430
5 changed files with 313 additions and 317 deletions

View file

@ -1,21 +1,18 @@
use self::grammars::InputGrammar;
use anyhow::{anyhow, Context, Result};
use build_tables::build_tables;
use grammar_files::path_in_ignore;
use lazy_static::lazy_static;
use parse_grammar::parse_grammar;
use prepare_grammar::prepare_grammar;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::{env, fs};
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use semver::Version;
use build_tables::build_tables;
use grammar_files::path_in_ignore;
use grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use parse_grammar::parse_grammar;
use prepare_grammar::prepare_grammar;
use render::render_c_code;
use rules::AliasMap;
mod build_tables;
mod dedup;
mod grammar_files;
@ -103,23 +100,12 @@ pub fn generate_parser_in_directory(
// Parse and preprocess the grammar.
let input_grammar = parse_grammar(&grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let language_name = input_grammar.name;
// Generate the parser and related files.
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(
&language_name,
syntax_grammar,
lexical_grammar,
&inlines,
simple_aliases,
abi_version,
report_symbol_name,
)?;
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
@ -128,7 +114,7 @@ pub fn generate_parser_in_directory(
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
if !path_in_ignore(&repo_path) {
grammar_files::generate_grammar_files(&repo_path, &language_name, generate_bindings)?;
grammar_files::generate_grammar_files(&repo_path, &input_grammar.name, generate_bindings)?;
}
Ok(())
@ -137,29 +123,18 @@ pub fn generate_parser_in_directory(
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
let input_grammar = parse_grammar(&grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let parser = generate_parser_for_grammar_with_opts(
&input_grammar.name,
syntax_grammar,
lexical_grammar,
&inlines,
simple_aliases,
tree_sitter::LANGUAGE_VERSION,
None,
)?;
Ok((input_grammar.name, parser.c_code))
let parser =
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
Ok((input_grammar.name.clone(), parser.c_code))
}
fn generate_parser_for_grammar_with_opts(
name: &str,
syntax_grammar: SyntaxGrammar,
lexical_grammar: LexicalGrammar,
inlines: &InlinedProductionMap,
simple_aliases: AliasMap,
input_grammar: &InputGrammar,
abi_version: usize,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(input_grammar)?;
let variable_info =
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
let node_types_json = node_types::generate_node_types_json(
@ -168,20 +143,17 @@ fn generate_parser_for_grammar_with_opts(
&simple_aliases,
&variable_info,
);
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
let tables = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
inlines,
&inlines,
report_symbol_name,
)?;
let c_code = render_c_code(
name,
parse_table,
main_lex_table,
keyword_lex_table,
keyword_capture_token,
&input_grammar.name,
tables,
syntax_grammar,
lexical_grammar,
simple_aliases,