fix(wasm): fix alias map size computation
This fixes a crash where parsing with certain languages can lead to a crash due to how the alias map was allocated and laid out in wasm memory
This commit is contained in:
parent
f222db57ce
commit
f09dc3cf46
7 changed files with 80 additions and 26 deletions
|
|
@ -19,7 +19,7 @@ use tree_sitter_cli::{
|
|||
LOG_GRAPH_ENABLED, START_SEED,
|
||||
},
|
||||
highlight::{self, HighlightOptions},
|
||||
init::{generate_grammar_files, get_root_path, JsonConfigOpts},
|
||||
init::{generate_grammar_files, JsonConfigOpts},
|
||||
input::{get_input, get_tmp_source_file, CliInput},
|
||||
logger,
|
||||
parse::{self, ParseDebugType, ParseFileOptions, ParseOutput, ParseTheme},
|
||||
|
|
@ -895,14 +895,7 @@ impl Build {
|
|||
|
||||
if self.wasm {
|
||||
let output_path = self.output.map(|path| current_dir.join(path));
|
||||
let root_path = get_root_path(&grammar_path.join("tree-sitter.json"))?;
|
||||
wasm::compile_language_to_wasm(
|
||||
&loader,
|
||||
Some(&root_path),
|
||||
&grammar_path,
|
||||
current_dir,
|
||||
output_path,
|
||||
)?;
|
||||
wasm::compile_language_to_wasm(&loader, &grammar_path, current_dir, output_path)?;
|
||||
} else {
|
||||
let output_path = if let Some(ref path) = self.output {
|
||||
let path = Path::new(path);
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ static TEST_LOADER: LazyLock<Loader> = LazyLock::new(|| {
|
|||
loader
|
||||
});
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
pub static ENGINE: LazyLock<tree_sitter::wasmtime::Engine> = LazyLock::new(Default::default);
|
||||
|
||||
pub fn test_loader() -> &'static Loader {
|
||||
&TEST_LOADER
|
||||
}
|
||||
|
|
@ -43,10 +46,19 @@ pub fn get_language(name: &str) -> Language {
|
|||
}
|
||||
|
||||
pub fn get_test_fixture_language(name: &str) -> Language {
|
||||
get_test_fixture_language_internal(name, false)
|
||||
}
|
||||
|
||||
#[cfg(feature = "wasm")]
|
||||
pub fn get_test_fixture_language_wasm(name: &str) -> Language {
|
||||
get_test_fixture_language_internal(name, true)
|
||||
}
|
||||
|
||||
fn get_test_fixture_language_internal(name: &str, wasm: bool) -> Language {
|
||||
let grammar_dir_path = fixtures_dir().join("test_grammars").join(name);
|
||||
let grammar_json = load_grammar_file(&grammar_dir_path.join("grammar.js"), None).unwrap();
|
||||
let (parser_name, parser_code) = generate_parser(&grammar_json).unwrap();
|
||||
get_test_language(&parser_name, &parser_code, Some(&grammar_dir_path))
|
||||
get_test_language_internal(&parser_name, &parser_code, Some(&grammar_dir_path), wasm)
|
||||
}
|
||||
|
||||
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
|
||||
|
|
@ -87,6 +99,15 @@ pub fn get_tags_config(language_name: &str) -> TagsConfiguration {
|
|||
}
|
||||
|
||||
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {
|
||||
get_test_language_internal(name, parser_code, path, false)
|
||||
}
|
||||
|
||||
fn get_test_language_internal(
|
||||
name: &str,
|
||||
parser_code: &str,
|
||||
path: Option<&Path>,
|
||||
wasm: bool,
|
||||
) -> Language {
|
||||
let src_dir = scratch_dir().join("src").join(name);
|
||||
fs::create_dir_all(&src_dir).unwrap();
|
||||
|
||||
|
|
@ -136,5 +157,21 @@ pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) ->
|
|||
config.header_paths = vec![&HEADER_DIR];
|
||||
config.name = name.to_string();
|
||||
|
||||
TEST_LOADER.load_language_at_path_with_name(config).unwrap()
|
||||
if wasm {
|
||||
#[cfg(feature = "wasm")]
|
||||
{
|
||||
let mut loader = Loader::with_parser_lib_path(SCRATCH_DIR.clone());
|
||||
loader.use_wasm(&ENGINE);
|
||||
if env::var("TREE_SITTER_GRAMMAR_DEBUG").is_ok() {
|
||||
loader.debug_build(true);
|
||||
}
|
||||
loader.load_language_at_path_with_name(config).unwrap()
|
||||
}
|
||||
#[cfg(not(feature = "wasm"))]
|
||||
{
|
||||
unimplemented!("Wasm feature is not enabled")
|
||||
}
|
||||
} else {
|
||||
TEST_LOADER.load_language_at_path_with_name(config).unwrap()
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,14 +1,13 @@
|
|||
use std::{fs, sync::LazyLock};
|
||||
use std::fs;
|
||||
|
||||
use streaming_iterator::StreamingIterator;
|
||||
use tree_sitter::{
|
||||
wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
|
||||
use tree_sitter::{Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore};
|
||||
|
||||
use crate::tests::helpers::{
|
||||
allocations,
|
||||
fixtures::{get_test_fixture_language_wasm, ENGINE, WASM_DIR},
|
||||
};
|
||||
|
||||
use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
|
||||
|
||||
static ENGINE: LazyLock<Engine> = LazyLock::new(Engine::default);
|
||||
|
||||
#[test]
|
||||
fn test_wasm_stdlib_symbols() {
|
||||
let symbols = tree_sitter::wasm_stdlib_symbols().collect::<Vec<_>>();
|
||||
|
|
@ -92,6 +91,33 @@ fn test_load_wasm_javascript_language() {
|
|||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_wasm_python_language() {
|
||||
allocations::record(|| {
|
||||
let mut store = WasmStore::new(&ENGINE).unwrap();
|
||||
let mut parser = Parser::new();
|
||||
let wasm = fs::read(WASM_DIR.join("tree-sitter-python.wasm")).unwrap();
|
||||
let language = store.load_language("python", &wasm).unwrap();
|
||||
parser.set_wasm_store(store).unwrap();
|
||||
parser.set_language(&language).unwrap();
|
||||
let tree = parser.parse("a = b\nc = d", None).unwrap();
|
||||
assert_eq!(tree.root_node().to_sexp(), "(module (expression_statement (assignment left: (identifier) right: (identifier))) (expression_statement (assignment left: (identifier) right: (identifier))))");
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_fixture_language_wasm() {
|
||||
allocations::record(|| {
|
||||
let store = WasmStore::new(&ENGINE).unwrap();
|
||||
let mut parser = Parser::new();
|
||||
let language = get_test_fixture_language_wasm("epsilon_external_tokens");
|
||||
parser.set_wasm_store(store).unwrap();
|
||||
parser.set_language(&language).unwrap();
|
||||
let tree = parser.parse("hello", None).unwrap();
|
||||
assert_eq!(tree.root_node().to_sexp(), "(document (zero_width))");
|
||||
});
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_load_multiple_wasm_languages() {
|
||||
allocations::record(|| {
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@ use std::{
|
|||
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use tree_sitter::wasm_stdlib_symbols;
|
||||
use tree_sitter_generate::parse_grammar::GrammarJSON;
|
||||
use tree_sitter_generate::{load_grammar_file, parse_grammar::GrammarJSON};
|
||||
use tree_sitter_loader::Loader;
|
||||
use wasmparser::Parser;
|
||||
|
||||
|
|
@ -40,19 +40,18 @@ pub fn get_grammar_name(language_dir: &Path) -> Result<String> {
|
|||
|
||||
pub fn compile_language_to_wasm(
|
||||
loader: &Loader,
|
||||
root_dir: Option<&Path>,
|
||||
language_dir: &Path,
|
||||
output_dir: &Path,
|
||||
output_file: Option<PathBuf>,
|
||||
) -> Result<()> {
|
||||
let grammar_name = get_grammar_name(language_dir)?;
|
||||
let grammar_name = get_grammar_name(language_dir)
|
||||
.or_else(|_| load_grammar_file(&language_dir.join("grammar.js"), None))?;
|
||||
let output_filename =
|
||||
output_file.unwrap_or_else(|| output_dir.join(format!("tree-sitter-{grammar_name}.wasm")));
|
||||
let src_path = language_dir.join("src");
|
||||
let scanner_path = loader.get_scanner_path(&src_path);
|
||||
loader.compile_parser_to_wasm(
|
||||
&grammar_name,
|
||||
root_dir,
|
||||
&src_path,
|
||||
scanner_path
|
||||
.as_ref()
|
||||
|
|
|
|||
|
|
@ -773,7 +773,6 @@ impl Loader {
|
|||
if recompile {
|
||||
self.compile_parser_to_wasm(
|
||||
&config.name,
|
||||
None,
|
||||
config.src_path,
|
||||
config
|
||||
.scanner_path
|
||||
|
|
@ -1026,7 +1025,6 @@ impl Loader {
|
|||
pub fn compile_parser_to_wasm(
|
||||
&self,
|
||||
language_name: &str,
|
||||
_root_path: Option<&Path>,
|
||||
src_path: &Path,
|
||||
scanner_filename: Option<&Path>,
|
||||
output_path: &Path,
|
||||
|
|
|
|||
|
|
@ -1377,11 +1377,12 @@ const TSLanguage *ts_wasm_store_load_language(
|
|||
if (symbol == 0) break;
|
||||
uint16_t value_count;
|
||||
memcpy(&value_count, &memory[wasm_language.alias_map + alias_map_size], sizeof(value_count));
|
||||
alias_map_size += sizeof(uint16_t);
|
||||
alias_map_size += value_count * sizeof(TSSymbol);
|
||||
}
|
||||
language->alias_map = copy(
|
||||
&memory[wasm_language.alias_map],
|
||||
alias_map_size * sizeof(TSSymbol)
|
||||
alias_map_size
|
||||
);
|
||||
language->alias_sequences = copy(
|
||||
&memory[wasm_language.alias_sequences],
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
|
|||
unsigned tree_sitter_external_tokens_external_scanner_serialize(
|
||||
void *payload,
|
||||
char *buffer
|
||||
) { return true; }
|
||||
) { return 0; }
|
||||
|
||||
void tree_sitter_external_tokens_external_scanner_deserialize(
|
||||
void *payload,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue