Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Amaan Qureshi
588c1a5610
feat: allow external scanners to reuse the internal character ranges 2024-11-24 17:45:50 -05:00
3 changed files with 31 additions and 10 deletions

View file

@ -37,6 +37,7 @@ lazy_static! {
struct GeneratedParser { struct GeneratedParser {
c_code: String, c_code: String,
node_types_json: String, node_types_json: String,
header_tail: String,
} }
pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h"); pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
@ -90,13 +91,17 @@ pub fn generate_parser_in_directory(
let GeneratedParser { let GeneratedParser {
c_code, c_code,
node_types_json, node_types_json,
header_tail,
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?; } = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
write_file(&src_path.join("parser.c"), c_code)?; write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?; write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?; write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
write_file(&header_path.join("array.h"), ARRAY_HEADER)?; write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; write_file(
&header_path.join("parser.h"),
format!("{}\n{header_tail}", tree_sitter::PARSER_HEADER),
)?;
Ok(()) Ok(())
} }
@ -132,7 +137,7 @@ fn generate_parser_for_grammar_with_opts(
&inlines, &inlines,
report_symbol_name, report_symbol_name,
)?; )?;
let c_code = render_c_code( let (c_code, header_tail) = render_c_code(
&input_grammar.name, &input_grammar.name,
tables, tables,
syntax_grammar, syntax_grammar,
@ -143,6 +148,7 @@ fn generate_parser_for_grammar_with_opts(
Ok(GeneratedParser { Ok(GeneratedParser {
c_code, c_code,
node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
header_tail,
}) })
} }

View file

@ -60,6 +60,7 @@ macro_rules! dedent {
struct Generator { struct Generator {
buffer: String, buffer: String,
header_buffer: String,
indent_level: usize, indent_level: usize,
language_name: String, language_name: String,
parse_table: ParseTable, parse_table: ParseTable,
@ -89,7 +90,7 @@ struct LargeCharacterSetInfo {
} }
impl Generator { impl Generator {
fn generate(mut self) -> String { fn generate(mut self) -> (String, String) {
self.init(); self.init();
self.add_header(); self.add_header();
self.add_includes(); self.add_includes();
@ -146,7 +147,7 @@ impl Generator {
self.add_parser_export(); self.add_parser_export();
self.buffer (self.buffer, self.header_buffer)
} }
fn init(&mut self) { fn init(&mut self) {
@ -985,8 +986,21 @@ impl Generator {
add_line!( add_line!(
self, self,
"static TSCharacterRange {}[] = {{", "const TSCharacterRange {}[{}] = {{",
info.constant_name info.constant_name,
characters.range_count()
);
self.header_buffer += &format!(
"extern const TSCharacterRange {}[{}];\n",
info.constant_name,
characters.range_count()
);
self.header_buffer += &format!(
"static const uint32_t {}_length = {};\n\n",
info.constant_name,
characters.range_count()
); );
indent!(self); indent!(self);
@ -1713,7 +1727,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar, lexical_grammar: LexicalGrammar,
default_aliases: AliasMap, default_aliases: AliasMap,
abi_version: usize, abi_version: usize,
) -> String { ) -> (String, String) {
assert!( assert!(
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version), (ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
"This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}", "This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
@ -1721,6 +1735,7 @@ pub fn render_c_code(
Generator { Generator {
buffer: String::new(), buffer: String::new(),
header_buffer: String::new(),
indent_level: 0, indent_level: 0,
language_name: name.to_string(), language_name: name.to_string(),
large_state_count: 0, large_state_count: 0,

View file

@ -132,13 +132,13 @@ struct TSLanguage {
const char *name; const char *name;
}; };
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) { static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0; uint32_t index = 0;
uint32_t size = len - index; uint32_t size = len - index;
while (size > 1) { while (size > 1) {
uint32_t half_size = size / 2; uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size; uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index]; const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) { if (lookahead >= range->start && lookahead <= range->end) {
return true; return true;
} else if (lookahead > range->end) { } else if (lookahead > range->end) {
@ -146,7 +146,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
} }
size -= half_size; size -= half_size;
} }
TSCharacterRange *range = &ranges[index]; const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end); return (lookahead >= range->start && lookahead <= range->end);
} }