Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Amaan Qureshi
588c1a5610
feat: allow external scanners to reuse the internal character ranges 2024-11-24 17:45:50 -05:00
3 changed files with 31 additions and 10 deletions

View file

@ -37,6 +37,7 @@ lazy_static! {
struct GeneratedParser {
c_code: String,
node_types_json: String,
header_tail: String,
}
pub const ALLOC_HEADER: &str = include_str!("templates/alloc.h");
@ -90,13 +91,17 @@ pub fn generate_parser_in_directory(
let GeneratedParser {
c_code,
node_types_json,
header_tail,
} = generate_parser_for_grammar_with_opts(&input_grammar, abi_version, report_symbol_name)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&header_path.join("alloc.h"), ALLOC_HEADER)?;
write_file(&header_path.join("array.h"), ARRAY_HEADER)?;
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
write_file(
&header_path.join("parser.h"),
format!("{}\n{header_tail}", tree_sitter::PARSER_HEADER),
)?;
Ok(())
}
@ -132,7 +137,7 @@ fn generate_parser_for_grammar_with_opts(
&inlines,
report_symbol_name,
)?;
let c_code = render_c_code(
let (c_code, header_tail) = render_c_code(
&input_grammar.name,
tables,
syntax_grammar,
@ -143,6 +148,7 @@ fn generate_parser_for_grammar_with_opts(
Ok(GeneratedParser {
c_code,
node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
header_tail,
})
}

View file

@ -60,6 +60,7 @@ macro_rules! dedent {
struct Generator {
buffer: String,
header_buffer: String,
indent_level: usize,
language_name: String,
parse_table: ParseTable,
@ -89,7 +90,7 @@ struct LargeCharacterSetInfo {
}
impl Generator {
fn generate(mut self) -> String {
fn generate(mut self) -> (String, String) {
self.init();
self.add_header();
self.add_includes();
@ -146,7 +147,7 @@ impl Generator {
self.add_parser_export();
self.buffer
(self.buffer, self.header_buffer)
}
fn init(&mut self) {
@ -985,8 +986,21 @@ impl Generator {
add_line!(
self,
"static TSCharacterRange {}[] = {{",
info.constant_name
"const TSCharacterRange {}[{}] = {{",
info.constant_name,
characters.range_count()
);
self.header_buffer += &format!(
"extern const TSCharacterRange {}[{}];\n",
info.constant_name,
characters.range_count()
);
self.header_buffer += &format!(
"static const uint32_t {}_length = {};\n\n",
info.constant_name,
characters.range_count()
);
indent!(self);
@ -1713,7 +1727,7 @@ pub fn render_c_code(
lexical_grammar: LexicalGrammar,
default_aliases: AliasMap,
abi_version: usize,
) -> String {
) -> (String, String) {
assert!(
(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version),
"This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {abi_version}",
@ -1721,6 +1735,7 @@ pub fn render_c_code(
Generator {
buffer: String::new(),
header_buffer: String::new(),
indent_level: 0,
language_name: name.to_string(),
large_state_count: 0,

View file

@ -132,13 +132,13 @@ struct TSLanguage {
const char *name;
};
static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
static inline bool set_contains(const TSCharacterRange *ranges, uint32_t len, int32_t lookahead) {
uint32_t index = 0;
uint32_t size = len - index;
while (size > 1) {
uint32_t half_size = size / 2;
uint32_t mid_index = index + half_size;
TSCharacterRange *range = &ranges[mid_index];
const TSCharacterRange *range = &ranges[mid_index];
if (lookahead >= range->start && lookahead <= range->end) {
return true;
} else if (lookahead > range->end) {
@ -146,7 +146,7 @@ static inline bool set_contains(TSCharacterRange *ranges, uint32_t len, int32_t
}
size -= half_size;
}
TSCharacterRange *range = &ranges[index];
const TSCharacterRange *range = &ranges[index];
return (lookahead >= range->start && lookahead <= range->end);
}