diff --git a/.gitignore b/.gitignore index 360390b1..7bffbb8c 100644 --- a/.gitignore +++ b/.gitignore @@ -15,3 +15,4 @@ test/fixtures/grammars/* *.obj *.exp *.lib +*.wasm diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index 14c13aa4..48e747c9 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -15,7 +15,7 @@ mod grammars; mod nfa; mod node_types; mod npm_files; -mod parse_grammar; +pub mod parse_grammar; mod prepare_grammar; mod render; mod rules; diff --git a/cli/src/generate/parse_grammar.rs b/cli/src/generate/parse_grammar.rs index ce4b881a..feb560a9 100644 --- a/cli/src/generate/parse_grammar.rs +++ b/cli/src/generate/parse_grammar.rs @@ -64,8 +64,8 @@ enum RuleJSON { } #[derive(Deserialize)] -struct GrammarJSON { - name: String, +pub(crate) struct GrammarJSON { + pub(crate) name: String, rules: Map, conflicts: Option>>, externals: Option>, diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 19b82194..5d026cde 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -8,6 +8,7 @@ pub mod parse; pub mod properties; pub mod test; pub mod util; +pub mod wasm; #[cfg(test)] mod tests; diff --git a/cli/src/main.rs b/cli/src/main.rs index a8b4fd8b..9decd720 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -5,7 +5,7 @@ use std::path::Path; use std::process::exit; use std::{u64, usize}; use tree_sitter_cli::{ - config, error, generate, highlight, loader, logger, parse, properties, test, + config, error, generate, highlight, loader, logger, parse, properties, test, wasm, }; fn main() { @@ -90,6 +90,11 @@ fn run() -> error::Result<()> { .arg(Arg::with_name("html").long("html").short("h")) .arg(Arg::with_name("time").long("time").short("t")), ) + .subcommand( + SubCommand::with_name("build-wasm") + .about("Compile a parser to WASM") + .arg(Arg::with_name("path").index(1).multiple(true)), + ) .get_matches(); let home_dir = dirs::home_dir().expect("Failed to read home directory"); @@ -237,6 +242,9 @@ fn run() -> error::Result<()> { ))); } } + } else if let Some(matches) = matches.subcommand_matches("build-wasm") { + let grammar_path = current_dir.join(matches.value_of("path").unwrap_or("")); + wasm::compile_language_to_wasm(&grammar_path)?; } Ok(()) diff --git a/cli/src/wasm.rs b/cli/src/wasm.rs new file mode 100644 index 00000000..1177391e --- /dev/null +++ b/cli/src/wasm.rs @@ -0,0 +1,72 @@ +use super::error::{Error, Result}; +use super::generate::parse_grammar::GrammarJSON; +use std::fs; +use std::path::Path; +use std::process::Command; + +pub fn compile_language_to_wasm(language_dir: &Path) -> Result<()> { + let src_dir = language_dir.join("src"); + let grammar_json_path = src_dir.join("grammar.json"); + let grammar_json = fs::read_to_string(&grammar_json_path).map_err(|e| { + format!( + "Failed to read grammar file {:?} - {}", + grammar_json_path, e + ) + })?; + let grammar: GrammarJSON = serde_json::from_str(&grammar_json).map_err(|e| { + format!( + "Failed to parse grammar file {:?} - {}", + grammar_json_path, e + ) + })?; + + let mut command = Command::new("emcc"); + command.args(&[ + "-o", + &format!("tree-sitter-{}.wasm", grammar.name), + "-Os", + "-s", + "WASM=1", + "-s", + "SIDE_MODULE=1", + "-s", + &format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar.name), + ]); + command.arg("-I").arg(&src_dir); + + // Find source files to compile + let entries = fs::read_dir(&src_dir) + .map_err(|e| format!("Failed to read source directory {:?} - {}", src_dir, e))?; + + for entry in entries { + let entry = entry?; + let file_name = entry.file_name(); + + // Do not compile the node.js binding file. + if file_name + .to_str() + .map_or(false, |s| s.starts_with("binding")) + { + continue; + } + + // Compile any .c, .cc, or .cpp files + if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) { + if extension == "c" || extension == "cc" || extension == "cpp" { + command.arg(entry.path()); + } + } + } + + let output = command + .output() + .map_err(|e| format!("Failed to run emcc command - {}", e))?; + if output.status.success() { + Ok(()) + } else { + Err(Error::from(format!( + "emcc command failed - {}", + String::from_utf8_lossy(&output.stderr) + ))) + } +} diff --git a/lib/web/binding.c b/lib/web/binding.c new file mode 100644 index 00000000..79fbd04d --- /dev/null +++ b/lib/web/binding.c @@ -0,0 +1,221 @@ +#include +#include +#include + +/*****************************/ +/* Section - Data marshaling */ +/*****************************/ + +static const uint32_t INPUT_BUFFER_SIZE = 10 * 1024; + +const void *TRANSFER_BUFFER[12] = { + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, + NULL, NULL, NULL, NULL, +}; + +void *ts_init() { + return TRANSFER_BUFFER; +} + +static uint32_t code_unit_to_byte(uint32_t unit) { + return unit << 1; +} + +static uint32_t byte_to_code_unit(uint32_t byte) { + return byte >> 1; +} + +static void marshal_node(TSNode node) { + TRANSFER_BUFFER[0] = (const void *)node.id; + TRANSFER_BUFFER[1] = (const void *)node.context[0]; + TRANSFER_BUFFER[2] = (const void *)node.context[1]; + TRANSFER_BUFFER[3] = (const void *)node.context[2]; + TRANSFER_BUFFER[4] = (const void *)node.context[3]; +} + +static TSNode unmarshal_node(const TSTree *tree) { + TSNode node; + node.id = TRANSFER_BUFFER[0]; + node.context[0] = (uint32_t)TRANSFER_BUFFER[1]; + node.context[1] = (uint32_t)TRANSFER_BUFFER[2]; + node.context[2] = (uint32_t)TRANSFER_BUFFER[3]; + node.context[3] = (uint32_t)TRANSFER_BUFFER[4]; + node.tree = tree; + return node; +} + +static void marshal_point(TSPoint point) { + TRANSFER_BUFFER[0] = (const void *)point.row; + TRANSFER_BUFFER[1] = (const void *)byte_to_code_unit(point.column); +} + +static TSPoint unmarshal_point(const void **address) { + TSPoint point; + point.row = (uint32_t)address[0]; + point.column = code_unit_to_byte((uint32_t)address[1]); + return point; +} + +static TSInputEdit unmarshal_edit() { + TSInputEdit edit; + const void **address = TRANSFER_BUFFER; + edit.start_point = unmarshal_point(address); address += 2; + edit.old_end_point = unmarshal_point(address); address += 2; + edit.new_end_point = unmarshal_point(address); address += 2; + edit.start_byte = code_unit_to_byte((uint32_t)*address); address += 1; + edit.old_end_byte = code_unit_to_byte((uint32_t)*address); address += 1; + edit.new_end_byte = code_unit_to_byte((uint32_t)*address); address += 1; + return edit; +} + +/********************/ +/* Section - Parser */ +/********************/ + +extern void tree_sitter_parse_callback( + char *input_buffer, + uint32_t index, + uint32_t row, + uint32_t column, + uint32_t *length_read +); + +extern void tree_sitter_log_callback( + void *payload, + TSLogType log_type, + const char *message +); + +void ts_parser_new_wasm() { + TSParser *parser = ts_parser_new(); + char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char)); + TRANSFER_BUFFER[0] = parser; + TRANSFER_BUFFER[1] = input_buffer; +} + +static const char *call_parse_callback( + void *payload, + uint32_t byte, + TSPoint position, + uint32_t *bytes_read +) { + char *buffer = (char *)payload; + tree_sitter_parse_callback( + buffer, + byte_to_code_unit(byte), + position.row, + byte_to_code_unit(position.column), + bytes_read + ); + *bytes_read = code_unit_to_byte(*bytes_read); + if (*bytes_read > INPUT_BUFFER_SIZE) *bytes_read = INPUT_BUFFER_SIZE; + if (*bytes_read > 0) *bytes_read -= 2; // Remove null character + return buffer; +} + +void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) { + TSLogger logger = {self, should_log ? tree_sitter_log_callback : NULL}; + ts_parser_set_logger(self, logger); +} + +TSTree *ts_parser_parse_wasm( + TSParser *self, + char *input_buffer, + const TSTree *old_tree +) { + TSInput input = { + input_buffer, + call_parse_callback, + TSInputEncodingUTF16 + }; + return ts_parser_parse(self, old_tree, input); +} + +/******************/ +/* Section - Tree */ +/******************/ + +void ts_tree_root_node_wasm(const TSTree *tree) { + marshal_node(ts_tree_root_node(tree)); +} + +void ts_tree_edit_wasm(TSTree *tree) { + TSInputEdit edit = unmarshal_edit(); + ts_tree_edit(tree, &edit); +} + +/******************/ +/* Section - Node */ +/******************/ + +uint16_t ts_node_symbol_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_symbol(node); +} + +uint32_t ts_node_child_count_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_child_count(node); +} + +uint32_t ts_node_named_child_count_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_named_child_count(node); +} + +void ts_node_child_wasm(const TSTree *tree, uint32_t index) { + TSNode node = unmarshal_node(tree); + marshal_node(ts_node_child(node, index)); +} + +void ts_node_named_child_wasm(const TSTree *tree, uint32_t index) { + TSNode node = unmarshal_node(tree); + marshal_node(ts_node_named_child(node, index)); +} + +void ts_node_parent_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + marshal_node(ts_node_parent(node)); +} + +void ts_node_descendant_for_position_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + const void **address = TRANSFER_BUFFER + 5; + TSPoint start = unmarshal_point(address); address += 2; + TSPoint end = unmarshal_point(address); + marshal_node(ts_node_descendant_for_point_range(node, start, end)); +} + +void ts_node_named_descendant_for_position_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + const void **address = TRANSFER_BUFFER + 5; + TSPoint start = unmarshal_point(address); address += 2; + TSPoint end = unmarshal_point(address); + marshal_node(ts_node_named_descendant_for_point_range(node, start, end)); +} + +void ts_node_start_point_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + marshal_point(ts_node_start_point(node)); +} + +void ts_node_end_point_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + marshal_point(ts_node_end_point(node)); +} + +uint32_t ts_node_start_index_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return byte_to_code_unit(ts_node_start_byte(node)); +} + +uint32_t ts_node_end_index_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return byte_to_code_unit(ts_node_end_byte(node)); +} + +char *ts_node_to_string_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_string(node); +} diff --git a/lib/web/binding.js b/lib/web/binding.js new file mode 100644 index 00000000..96d54407 --- /dev/null +++ b/lib/web/binding.js @@ -0,0 +1,367 @@ +const C = Module; +const INTERNAL = {}; +const SIZE_OF_INT = 4; +const SIZE_OF_NODE = 5 * SIZE_OF_INT; +const SIZE_OF_POINT = 2 * SIZE_OF_INT; +const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT; + +var TRANSFER_BUFFER; +var currentParseCallback; +var currentLogCallback; + +class Parser { + static init() { + return new Promise(resolve => { + Module.onRuntimeInitialized = resolve + }).then(() => { + TRANSFER_BUFFER = C._ts_init(); + }); + } + + constructor() { + C._ts_parser_new_wasm(); + this[0] = getValue(TRANSFER_BUFFER, 'i32'); + this[1] = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32'); + } + + delete() { + C._ts_parser_delete(this[0]); + C._free(this[1]); + } + + setLanguage(language) { + this.language = language; + if (language.constructor !== Language) { + throw new Error('Argument must be a Language'); + } + C._ts_parser_set_language(this[0], language[0]); + if (C._ts_parser_language(this[0]) !== language[0]) { + throw new Error('Incompatible language'); + } + } + + getLanguage() { + return this.language + } + + setIncludedRanges(ranges) { + const buffer = C._calloc(ranges.length, SIZE_OF_RANGE); + let address = buffer; + for (let i = 0, n = ranges.length; i < n; i++) { + marshalRange(address, ranges[i]); + address += SIZE_OF_RANGE; + } + C._ts_parser_set_included_ranges(self[0], buffer, ranges.length); + C._free(buffer); + } + + getIncludedRanges() { + const buffer = C._ts_parser_included_ranges(self[0], TRANSFER_BUFFER); + const length = getValue(TRANSFER_BUFFER, 'i32'); + const result = new Array(length); + let address = buffer; + for (let i = 0; i < length; i++) { + result[i] = unmarshalRange(address); + address += SIZE_OF_RANGE; + } + return result; + } + + parse(oldTree, callback) { + if (typeof callback === 'string') { + return this.parse(oldTree, index => callback.slice(index)) + } + + if (this.logCallback) { + currentLogCallback = this.logCallback; + C._ts_parser_enable_logger_wasm(this[0], 1); + } else { + C._ts_parser_enable_logger_wasm(this[0], 0); + } + + currentParseCallback = callback; + const treeAddress = C._ts_parser_parse_wasm( + this[0], + this[1], + oldTree ? oldTree[0] : 0 + ); + + currentParseCallback = null; + currentLogCallback = null; + + if (!treeAddress) { + throw new Error('Parsing failed'); + } + + return new Tree(INTERNAL, treeAddress, this.language, callback); + } + + reset() { + C._ts_parser_parse_wasm(this[0]); + } + + setTimeoutMicros(timeout) { + C._ts_parser_set_timeout_micros(this[0], timeout); + } + + getTimeoutMicros(timeout) { + C._ts_parser_timeout_micros(this[0]); + } + + setLogger(callback) { + this.logCallback = callback; + } + + getLogger() { + return this.logCallback; + } +} + +class Tree { + constructor(internal, address, language, textCallback) { + if (internal !== INTERNAL) { + throw new Error('Illegal constructor') + } + this[0] = address; + this.language = language; + this.textCallback = textCallback; + } + + copy() { + const address = C._ts_tree_copy(this[0]); + return new Tree(INTERNAL, address, this.language, this.textCallback); + } + + delete() { + C._ts_tree_delete(this[0]); + } + + edit(edit) { + marshalEdit(edit); + C._ts_tree_edit_wasm(this[0]); + } + + get rootNode() { + C._ts_tree_root_node_wasm(this[0]); + return unmarshalNode(this); + } + + getLanguage() { + return this.language; + } +} + +class Node { + constructor(internal, tree) { + if (internal !== INTERNAL) { + throw new Error('Illegal constructor') + } + this.tree = tree; + } + + get typeId() { + marshalNode(this); + return C._ts_node_symbol_wasm(this.tree); + } + + get type() { + return this.tree.language.types[this.typeId] || 'ERROR'; + } + + get startPosition() { + marshalNode(this); + C._ts_node_start_point_wasm(this.tree[0]); + return unmarshalPoint(TRANSFER_BUFFER); + } + + get endPosition() { + marshalNode(this); + C._ts_node_end_point_wasm(this.tree[0]); + return unmarshalPoint(TRANSFER_BUFFER); + } + + get startIndex() { + marshalNode(this); + return C._ts_node_start_index_wasm(this.tree[0]); + } + + get endIndex() { + marshalNode(this); + return C._ts_node_end_index_wasm(this.tree[0]); + } + + get text() { + const startIndex = this.startIndex; + const length = this.endIndex - startIndex; + let result = this.tree.textCallback(startIndex); + while (result.length < length) { + result += this.tree.textCallback(startIndex + result.length); + } + return result.slice(0, length); + } + + equals(other) { + if (this === other) return true; + for (let i = 0; i < 5; i++) { + if (this[i] !== other[i]) return false; + } + return true; + } + + get childCount() { + marshalNode(this); + return C._ts_node_child_count_wasm(this.tree[0]); + } + + child(index) { + marshalNode(this); + C._ts_node_child_wasm(this.tree[0], index); + return unmarshalNode(this.tree); + } + + get namedChildCount() { + marshalNode(this); + return C._ts_node_named_child_count_wasm(this.tree[0]); + } + + namedChild(index) { + marshalNode(this); + C._ts_node_named_child_wasm(this.tree[0], index); + return unmarshalNode(this.tree); + } + + get parent() { + marshalNode(this); + C._ts_node_parent_wasm(this.tree[0]); + return unmarshalNode(this.tree); + } + + descendantForPosition(start, end = start) { + marshalNode(this); + let address = TRANSFER_BUFFER + SIZE_OF_NODE; + marshalPoint(address); + marshalPoint(address + SIZE_OF_POINT); + C._ts_node_descendant_for_position_wasm(this.tree[0]); + return unmarshalNode(this.tree); + } + + namedDescendantForPosition(start, end = start) { + marshalNode(this); + let address = TRANSFER_BUFFER + SIZE_OF_NODE; + marshalPoint(address, start); + marshalPoint(address + SIZE_OF_POINT, end); + C._ts_node_named_descendant_for_position_wasm(this.tree[0]); + return unmarshalNode(this.tree); + } + + toString() { + marshalNode(this); + const address = C._ts_node_to_string_wasm(this.tree[0]); + const result = AsciiToString(address); + C._free(address); + return result; + } +} + +class Language { + constructor(internal, address) { + if (internal !== INTERNAL) { + throw new Error('Illegal constructor') + } + this[0] = address; + this.types = new Array(C._ts_language_symbol_count(this[0])); + for (let i = 0, n = this.types.length; i < n; i++) { + if (C._ts_language_symbol_type(this[0], i) < 2) { + this.types[i] = UTF8ToString(C._ts_language_symbol_name(this[0], i)); + } + } + } + + get version() { + return C._ts_language_version(this[0]); + } + + static load(url) { + return fetch(url) + .then(response => response.arrayBuffer() + .then(buffer => { + if (response.ok) { + return loadWebAssemblyModule(new Uint8Array(buffer), {loadAsync: true}); + } else { + const body = new TextDecoder('utf-8').decode(buffer); + throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`) + } + })) + .then(exports => { + const functionName = Object.keys(exports).find(key => key.includes("tree_sitter_")); + const languageAddress = exports[functionName](); + return new Language(INTERNAL, languageAddress); + }); + } +} + +function marshalNode(node) { + let address = TRANSFER_BUFFER; + for (let i = 0; i < 5; i++) { + setValue(address, node[i], 'i32'); + address += SIZE_OF_INT; + } +} + +function unmarshalNode(tree) { + let address = TRANSFER_BUFFER; + const id = getValue(address, 'i32'); + if (id === 0) return null; + const result = new Node(INTERNAL, tree); + result[0] = id; + address += SIZE_OF_INT; + for (let i = 1; i < 5; i++) { + result[i] = getValue(address, 'i32'); + address += SIZE_OF_INT; + } + return result; +} + +function marshalPoint(address, point) { + setValue(address, point.row, 'i32') + setValue(address + SIZE_OF_INT, point.column, 'i32') +} + +function unmarshalPoint(address) { + return { + row: getValue(address, 'i32'), + column: getValue(address + SIZE_OF_INT, 'i32') + } +} + +function marshalRange(address, range) { + setValue(address, range.startIndex, 'i32'); address += SIZE_OF_INT; + setValue(address, range.endIndex, 'i32'); address += SIZE_OF_INT; + marshalPoint(address, range.startPosition); address += SIZE_OF_POINT; + marshalPoint(address, range.endPosition); address += SIZE_OF_POINT; +} + +function unmarshalRange(address) { + const result = {}; + result.startIndex = getValue(address, 'i32'); address += SIZE_OF_INT; + result.endIndex = getValue(address, 'i32'); address += SIZE_OF_INT; + result.startPosition = unmarshalPoint(address); address += SIZE_OF_POINT; + result.endPosition = unmarshalPoint(address); + return result; +} + +function marshalEdit(edit) { + let address = TRANSFER_BUFFER; + marshalPoint(address, edit.startPosition); address += SIZE_OF_POINT; + marshalPoint(address, edit.oldEndPosition); address += SIZE_OF_POINT; + marshalPoint(address, edit.newEndPosition); address += SIZE_OF_POINT; + setValue(address, edit.startIndex, 'i32'); address += SIZE_OF_INT; + setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT; + setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT; +} + +Parser.Language = Language; + +return Parser; + +})); diff --git a/lib/web/imports.js b/lib/web/imports.js new file mode 100644 index 00000000..b9bca74e --- /dev/null +++ b/lib/web/imports.js @@ -0,0 +1,21 @@ +mergeInto(LibraryManager.library, { + tree_sitter_parse_callback: function( + inputBufferAddress, + index, + row, + column, + lengthAddress + ) { + var INPUT_BUFFER_SIZE = 10 * 1024; + var string = currentParseCallback(index, {row: row, column: column}); + setValue(lengthAddress, string.length, 'i32'); + stringToUTF16(string, inputBufferAddress, INPUT_BUFFER_SIZE); + }, + + tree_sitter_log_callback: function(_payload, isLexMessage, messageAddress) { + if (currentLogCallback) { + const message = UTF8ToString(messageAddress); + currentLogCallback(isLexMessage, message); + } + } +}); diff --git a/lib/web/prefix.js b/lib/web/prefix.js new file mode 100644 index 00000000..b380d814 --- /dev/null +++ b/lib/web/prefix.js @@ -0,0 +1,11 @@ +(function (root, factory) { + if (typeof define === 'function' && define.amd) { + define([], factory); + } else if (typeof exports === 'object') { + module.exports = factory(); + module.exports.init(); + delete module.exports.init; + } else { + window.TreeSitter = factory(); + } +}(this, function () { diff --git a/script/build-wasm b/script/build-wasm new file mode 100755 index 00000000..09860e5d --- /dev/null +++ b/script/build-wasm @@ -0,0 +1,31 @@ +#!/usr/bin/env bash + +export EMCC_FORCE_STDLIBS=libc++ + +args="-Os" +target_dir="target/release" +if [[ "$1" == "--debug" ]]; then + args="-s SAFE_HEAP=1 -O0" + target_dir="target/debug" +fi + +mkdir -p $target_dir + +emcc \ + -s WASM=1 \ + -s ALLOW_MEMORY_GROWTH \ + -s MAIN_MODULE=1 \ + -s ASSERTIONS=1 \ + -s EXPORT_ALL=1 \ + $args \ + -std=c99 \ + -D 'fprintf(...)=' \ + -I lib/src \ + -I lib/include \ + -I lib/utf8proc \ + --js-library lib/web/imports.js \ + --pre-js lib/web/prefix.js \ + --post-js lib/web/binding.js \ + lib/src/lib.c \ + lib/web/binding.c \ + -o $target_dir/tree-sitter.js