From 588c1a5610d07aff34412541f1096947eae2bf38 Mon Sep 17 00:00:00 2001 From: Amaan Qureshi Date: Sun, 24 Nov 2024 17:45:47 -0500 Subject: [PATCH] feat: allow external scanners to reuse the internal character ranges --- cli/generate/src/lib.rs | 10 ++++++++-- cli/generate/src/render.rs | 25 ++++++++++++++++++++----- lib/src/parser.h | 6 +++--- 3 files changed, 31 insertions(+), 10 deletions(-) diff --git a/cli/generate/src/lib.rs b/cli/generate/src/lib.rs index 14f20672..5be41ace 100644 --- a/cli/generate/src/lib.rs +++ b/cli/generate/src/lib.rs @@ -37,6 +37,7 @@ lazy_static! { struct GeneratedParser { c_code: String, node_types_json: String, + header_tail: String, } pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h"); @@ -90,13 +91,17 @@ pub fn generate_parser_in_directory( let GeneratedParser { c_code, node_types_json, + header_tail, } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?; write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("node-types.json"), node_types_json)?; write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; write_file(&header_path.join("array.h"), ARRAY_HEADER)?; - write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; + write_file( + &header_path.join("parser.h"), + format!("{}\n{header_tail}", tree_sitter::PARSER_HEADER), + )?; Ok(()) } @@ -132,7 +137,7 @@ fn generate_parser_for_grammar_with_opts( &inlines, report_symbol_name, )?; - let c_code = render_c_code( + let (c_code, header_tail) = render_c_code( &input_grammar.name, tables, syntax_grammar, @@ -143,6 +148,7 @@ fn generate_parser_for_grammar_with_opts( Ok(GeneratedParser { c_code, node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), + header_tail, }) } diff --git a/cli/generate/src/render.rs b/cli/generate/src/render.rs index 62993d55..c689be5b 100644 --- a/cli/generate/src/render.rs +++ b/cli/generate/src/render.rs @@ -60,6 +60,7 @@ macro_rules! dedent { struct Generator { buffer: String, + header_buffer: String, indent_level: usize, language_name: String, parse_table: ParseTable, @@ -89,7 +90,7 @@ struct LargeCharacterSetInfo { } impl Generator { - fn generate(mut self) -> String { + fn generate(mut self) -> (String, String) { self.init(); self.add_header(); self.add_includes(); @@ -146,7 +147,7 @@ impl Generator { self.add_parser_export(); - self.buffer + (self.buffer, self.header_buffer) } fn init(&mut self) { @@ -985,8 +986,21 @@ impl Generator { add_line!( self, - "static TSCharacterRange {}[] = {{", - info.constant_name + "const TSCharacterRange {}[{}] = {{", + info.constant_name, + characters.range_count() + ); + + self.header_buffer += &format!( + "extern const TSCharacterRange {}[{}];\n", + info.constant_name, + characters.range_count() + ); + + self.header_buffer += &format!( + "static const uint32_t {}_length = {};\n\n", + info.constant_name, + characters.range_count() ); indent!(self); @@ -1713,7 +1727,7 @@ pub fn render_c_code( lexical_grammar: LexicalGrammar, default_aliases: AliasMap, abi_version: usize, -) -> String { +) -> (String, String) { assert!( (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version), "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}", @@ -1721,6 +1735,7 @@ pub fn render_c_code( Generator { buffer: String::new(), + header_buffer: String::new(), indent_level: 0, language_name: name.to_string(), large_state_count: 0, diff --git a/lib/src/parser.h b/lib/src/parser.h index 2338b4a2..13aa49b5 100644 --- a/lib/src/parser.h +++ b/lib/src/parser.h @@ -132,13 +132,13 @@ struct TSLanguage { const char *name; }; -static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { +static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { uint32_t index = 0; uint32_t size = len - index; while (size > 1) { uint32_t half_size = size / 2; uint32_t mid_index = index + half_size; - TSCharacterRange *range = &ranges[mid_index]; + const TSCharacterRange *range = &ranges[mid_index]; if (lookahead >= range->start && lookahead <= range->end) { return true; } else if (lookahead > range->end) { @@ -146,7 +146,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t } size -= half_size; } - TSCharacterRange *range = &ranges[index]; + const TSCharacterRange *range = &ranges[index]; return (lookahead >= range->start && lookahead <= range->end); }