Identify large char sets for lexer using NFA transitions
This commit is contained in:
parent
39be6972fe
commit
be34bc9430
5 changed files with 313 additions and 317 deletions
|
|
@ -1,21 +1,18 @@
|
|||
use self::grammars::InputGrammar;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use build_tables::build_tables;
|
||||
use grammar_files::path_in_ignore;
|
||||
use lazy_static::lazy_static;
|
||||
use parse_grammar::parse_grammar;
|
||||
use prepare_grammar::prepare_grammar;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use render::render_c_code;
|
||||
use semver::Version;
|
||||
use std::io::Write;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::process::{Command, Stdio};
|
||||
use std::{env, fs};
|
||||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use lazy_static::lazy_static;
|
||||
use regex::{Regex, RegexBuilder};
|
||||
use semver::Version;
|
||||
|
||||
use build_tables::build_tables;
|
||||
use grammar_files::path_in_ignore;
|
||||
use grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use parse_grammar::parse_grammar;
|
||||
use prepare_grammar::prepare_grammar;
|
||||
use render::render_c_code;
|
||||
use rules::AliasMap;
|
||||
|
||||
mod build_tables;
|
||||
mod dedup;
|
||||
mod grammar_files;
|
||||
|
|
@ -103,23 +100,12 @@ pub fn generate_parser_in_directory(
|
|||
|
||||
// Parse and preprocess the grammar.
|
||||
let input_grammar = parse_grammar(&grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let language_name = input_grammar.name;
|
||||
|
||||
// Generate the parser and related files.
|
||||
let GeneratedParser {
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(
|
||||
&language_name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&inlines,
|
||||
simple_aliases,
|
||||
abi_version,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
|
||||
|
||||
write_file(&src_path.join("parser.c"), c_code)?;
|
||||
write_file(&src_path.join("node-types.json"), node_types_json)?;
|
||||
|
|
@ -128,7 +114,7 @@ pub fn generate_parser_in_directory(
|
|||
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
|
||||
|
||||
if !path_in_ignore(&repo_path) {
|
||||
grammar_files::generate_grammar_files(&repo_path, &language_name, generate_bindings)?;
|
||||
grammar_files::generate_grammar_files(&repo_path, &input_grammar.name, generate_bindings)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
|
|
@ -137,29 +123,18 @@ pub fn generate_parser_in_directory(
|
|||
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
|
||||
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
|
||||
let input_grammar = parse_grammar(&grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let parser = generate_parser_for_grammar_with_opts(
|
||||
&input_grammar.name,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
&inlines,
|
||||
simple_aliases,
|
||||
tree_sitter::LANGUAGE_VERSION,
|
||||
None,
|
||||
)?;
|
||||
Ok((input_grammar.name, parser.c_code))
|
||||
let parser =
|
||||
generate_parser_for_grammar_with_opts(&input_grammar, tree_sitter::LANGUAGE_VERSION, None)?;
|
||||
Ok((input_grammar.name.clone(), parser.c_code))
|
||||
}
|
||||
|
||||
fn generate_parser_for_grammar_with_opts(
|
||||
name: &str,
|
||||
syntax_grammar: SyntaxGrammar,
|
||||
lexical_grammar: LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
simple_aliases: AliasMap,
|
||||
input_grammar: &InputGrammar,
|
||||
abi_version: usize,
|
||||
report_symbol_name: Option<&str>,
|
||||
) -> Result<GeneratedParser> {
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(input_grammar)?;
|
||||
let variable_info =
|
||||
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
|
|
@ -168,20 +143,17 @@ fn generate_parser_for_grammar_with_opts(
|
|||
&simple_aliases,
|
||||
&variable_info,
|
||||
);
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
let tables = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
inlines,
|
||||
&inlines,
|
||||
report_symbol_name,
|
||||
)?;
|
||||
let c_code = render_c_code(
|
||||
name,
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
keyword_capture_token,
|
||||
&input_grammar.name,
|
||||
tables,
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
simple_aliases,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue