Start work on a WASM binding

This commit is contained in:
Max Brunsfeld 2019-04-23 14:29:46 -07:00
parent 82ccc0e56d
commit 1fc0525940
11 changed files with 737 additions and 4 deletions

1
.gitignore vendored
View file

@ -15,3 +15,4 @@ test/fixtures/grammars/*
*.obj
*.exp
*.lib
*.wasm

View file

@ -15,7 +15,7 @@ mod grammars;
mod nfa;
mod node_types;
mod npm_files;
mod parse_grammar;
pub mod parse_grammar;
mod prepare_grammar;
mod render;
mod rules;

View file

@ -64,8 +64,8 @@ enum RuleJSON {
}
#[derive(Deserialize)]
struct GrammarJSON {
name: String,
pub(crate) struct GrammarJSON {
pub(crate) name: String,
rules: Map<String, Value>,
conflicts: Option<Vec<Vec<String>>>,
externals: Option<Vec<RuleJSON>>,

View file

@ -8,6 +8,7 @@ pub mod parse;
pub mod properties;
pub mod test;
pub mod util;
pub mod wasm;
#[cfg(test)]
mod tests;

View file

@ -5,7 +5,7 @@ use std::path::Path;
use std::process::exit;
use std::{u64, usize};
use tree_sitter_cli::{
config, error, generate, highlight, loader, logger, parse, properties, test,
config, error, generate, highlight, loader, logger, parse, properties, test, wasm,
};
fn main() {
@ -90,6 +90,11 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("html").long("html").short("h"))
.arg(Arg::with_name("time").long("time").short("t")),
)
.subcommand(
SubCommand::with_name("build-wasm")
.about("Compile a parser to WASM")
.arg(Arg::with_name("path").index(1).multiple(true)),
)
.get_matches();
let home_dir = dirs::home_dir().expect("Failed to read home directory");
@ -237,6 +242,9 @@ fn run() -> error::Result<()> {
)));
}
}
} else if let Some(matches) = matches.subcommand_matches("build-wasm") {
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
wasm::compile_language_to_wasm(&grammar_path)?;
}
Ok(())

72
cli/src/wasm.rs Normal file
View file

@ -0,0 +1,72 @@
use super::error::{Error, Result};
use super::generate::parse_grammar::GrammarJSON;
use std::fs;
use std::path::Path;
use std::process::Command;
pub fn compile_language_to_wasm(language_dir: &Path) -> Result<()> {
let src_dir = language_dir.join("src");
let grammar_json_path = src_dir.join("grammar.json");
let grammar_json = fs::read_to_string(&grammar_json_path).map_err(|e| {
format!(
"Failed to read grammar file {:?} - {}",
grammar_json_path, e
)
})?;
let grammar: GrammarJSON = serde_json::from_str(&grammar_json).map_err(|e| {
format!(
"Failed to parse grammar file {:?} - {}",
grammar_json_path, e
)
})?;
let mut command = Command::new("emcc");
command.args(&[
"-o",
&format!("tree-sitter-{}.wasm", grammar.name),
"-Os",
"-s",
"WASM=1",
"-s",
"SIDE_MODULE=1",
"-s",
&format!("EXPORTED_FUNCTIONS=[\"_tree_sitter_{}\"]", grammar.name),
]);
command.arg("-I").arg(&src_dir);
// Find source files to compile
let entries = fs::read_dir(&src_dir)
.map_err(|e| format!("Failed to read source directory {:?} - {}", src_dir, e))?;
for entry in entries {
let entry = entry?;
let file_name = entry.file_name();
// Do not compile the node.js binding file.
if file_name
.to_str()
.map_or(false, |s| s.starts_with("binding"))
{
continue;
}
// Compile any .c, .cc, or .cpp files
if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) {
if extension == "c" || extension == "cc" || extension == "cpp" {
command.arg(entry.path());
}
}
}
let output = command
.output()
.map_err(|e| format!("Failed to run emcc command - {}", e))?;
if output.status.success() {
Ok(())
} else {
Err(Error::from(format!(
"emcc command failed - {}",
String::from_utf8_lossy(&output.stderr)
)))
}
}

221
lib/web/binding.c Normal file
View file

@ -0,0 +1,221 @@
#include <emscripten.h>
#include <tree_sitter/api.h>
#include <stdio.h>
/*****************************/
/* Section - Data marshaling */
/*****************************/
static const uint32_t INPUT_BUFFER_SIZE = 10 * 1024;
const void *TRANSFER_BUFFER[12] = {
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
NULL, NULL, NULL, NULL,
};
void *ts_init() {
return TRANSFER_BUFFER;
}
static uint32_t code_unit_to_byte(uint32_t unit) {
return unit << 1;
}
static uint32_t byte_to_code_unit(uint32_t byte) {
return byte >> 1;
}
static void marshal_node(TSNode node) {
TRANSFER_BUFFER[0] = (const void *)node.id;
TRANSFER_BUFFER[1] = (const void *)node.context[0];
TRANSFER_BUFFER[2] = (const void *)node.context[1];
TRANSFER_BUFFER[3] = (const void *)node.context[2];
TRANSFER_BUFFER[4] = (const void *)node.context[3];
}
static TSNode unmarshal_node(const TSTree *tree) {
TSNode node;
node.id = TRANSFER_BUFFER[0];
node.context[0] = (uint32_t)TRANSFER_BUFFER[1];
node.context[1] = (uint32_t)TRANSFER_BUFFER[2];
node.context[2] = (uint32_t)TRANSFER_BUFFER[3];
node.context[3] = (uint32_t)TRANSFER_BUFFER[4];
node.tree = tree;
return node;
}
static void marshal_point(TSPoint point) {
TRANSFER_BUFFER[0] = (const void *)point.row;
TRANSFER_BUFFER[1] = (const void *)byte_to_code_unit(point.column);
}
static TSPoint unmarshal_point(const void **address) {
TSPoint point;
point.row = (uint32_t)address[0];
point.column = code_unit_to_byte((uint32_t)address[1]);
return point;
}
static TSInputEdit unmarshal_edit() {
TSInputEdit edit;
const void **address = TRANSFER_BUFFER;
edit.start_point = unmarshal_point(address); address += 2;
edit.old_end_point = unmarshal_point(address); address += 2;
edit.new_end_point = unmarshal_point(address); address += 2;
edit.start_byte = code_unit_to_byte((uint32_t)*address); address += 1;
edit.old_end_byte = code_unit_to_byte((uint32_t)*address); address += 1;
edit.new_end_byte = code_unit_to_byte((uint32_t)*address); address += 1;
return edit;
}
/********************/
/* Section - Parser */
/********************/
extern void tree_sitter_parse_callback(
char *input_buffer,
uint32_t index,
uint32_t row,
uint32_t column,
uint32_t *length_read
);
extern void tree_sitter_log_callback(
void *payload,
TSLogType log_type,
const char *message
);
void ts_parser_new_wasm() {
TSParser *parser = ts_parser_new();
char *input_buffer = calloc(INPUT_BUFFER_SIZE, sizeof(char));
TRANSFER_BUFFER[0] = parser;
TRANSFER_BUFFER[1] = input_buffer;
}
static const char *call_parse_callback(
void *payload,
uint32_t byte,
TSPoint position,
uint32_t *bytes_read
) {
char *buffer = (char *)payload;
tree_sitter_parse_callback(
buffer,
byte_to_code_unit(byte),
position.row,
byte_to_code_unit(position.column),
bytes_read
);
*bytes_read = code_unit_to_byte(*bytes_read);
if (*bytes_read > INPUT_BUFFER_SIZE) *bytes_read = INPUT_BUFFER_SIZE;
if (*bytes_read > 0) *bytes_read -= 2; // Remove null character
return buffer;
}
void ts_parser_enable_logger_wasm(TSParser *self, bool should_log) {
TSLogger logger = {self, should_log ? tree_sitter_log_callback : NULL};
ts_parser_set_logger(self, logger);
}
TSTree *ts_parser_parse_wasm(
TSParser *self,
char *input_buffer,
const TSTree *old_tree
) {
TSInput input = {
input_buffer,
call_parse_callback,
TSInputEncodingUTF16
};
return ts_parser_parse(self, old_tree, input);
}
/******************/
/* Section - Tree */
/******************/
void ts_tree_root_node_wasm(const TSTree *tree) {
marshal_node(ts_tree_root_node(tree));
}
void ts_tree_edit_wasm(TSTree *tree) {
TSInputEdit edit = unmarshal_edit();
ts_tree_edit(tree, &edit);
}
/******************/
/* Section - Node */
/******************/
uint16_t ts_node_symbol_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return ts_node_symbol(node);
}
uint32_t ts_node_child_count_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return ts_node_child_count(node);
}
uint32_t ts_node_named_child_count_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return ts_node_named_child_count(node);
}
void ts_node_child_wasm(const TSTree *tree, uint32_t index) {
TSNode node = unmarshal_node(tree);
marshal_node(ts_node_child(node, index));
}
void ts_node_named_child_wasm(const TSTree *tree, uint32_t index) {
TSNode node = unmarshal_node(tree);
marshal_node(ts_node_named_child(node, index));
}
void ts_node_parent_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
marshal_node(ts_node_parent(node));
}
void ts_node_descendant_for_position_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
const void **address = TRANSFER_BUFFER + 5;
TSPoint start = unmarshal_point(address); address += 2;
TSPoint end = unmarshal_point(address);
marshal_node(ts_node_descendant_for_point_range(node, start, end));
}
void ts_node_named_descendant_for_position_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
const void **address = TRANSFER_BUFFER + 5;
TSPoint start = unmarshal_point(address); address += 2;
TSPoint end = unmarshal_point(address);
marshal_node(ts_node_named_descendant_for_point_range(node, start, end));
}
void ts_node_start_point_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
marshal_point(ts_node_start_point(node));
}
void ts_node_end_point_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
marshal_point(ts_node_end_point(node));
}
uint32_t ts_node_start_index_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return byte_to_code_unit(ts_node_start_byte(node));
}
uint32_t ts_node_end_index_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return byte_to_code_unit(ts_node_end_byte(node));
}
char *ts_node_to_string_wasm(const TSTree *tree) {
TSNode node = unmarshal_node(tree);
return ts_node_string(node);
}

367
lib/web/binding.js Normal file
View file

@ -0,0 +1,367 @@
const C = Module;
const INTERNAL = {};
const SIZE_OF_INT = 4;
const SIZE_OF_NODE = 5 * SIZE_OF_INT;
const SIZE_OF_POINT = 2 * SIZE_OF_INT;
const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT;
var TRANSFER_BUFFER;
var currentParseCallback;
var currentLogCallback;
class Parser {
static init() {
return new Promise(resolve => {
Module.onRuntimeInitialized = resolve
}).then(() => {
TRANSFER_BUFFER = C._ts_init();
});
}
constructor() {
C._ts_parser_new_wasm();
this[0] = getValue(TRANSFER_BUFFER, 'i32');
this[1] = getValue(TRANSFER_BUFFER + SIZE_OF_INT, 'i32');
}
delete() {
C._ts_parser_delete(this[0]);
C._free(this[1]);
}
setLanguage(language) {
this.language = language;
if (language.constructor !== Language) {
throw new Error('Argument must be a Language');
}
C._ts_parser_set_language(this[0], language[0]);
if (C._ts_parser_language(this[0]) !== language[0]) {
throw new Error('Incompatible language');
}
}
getLanguage() {
return this.language
}
setIncludedRanges(ranges) {
const buffer = C._calloc(ranges.length, SIZE_OF_RANGE);
let address = buffer;
for (let i = 0, n = ranges.length; i < n; i++) {
marshalRange(address, ranges[i]);
address += SIZE_OF_RANGE;
}
C._ts_parser_set_included_ranges(self[0], buffer, ranges.length);
C._free(buffer);
}
getIncludedRanges() {
const buffer = C._ts_parser_included_ranges(self[0], TRANSFER_BUFFER);
const length = getValue(TRANSFER_BUFFER, 'i32');
const result = new Array(length);
let address = buffer;
for (let i = 0; i < length; i++) {
result[i] = unmarshalRange(address);
address += SIZE_OF_RANGE;
}
return result;
}
parse(oldTree, callback) {
if (typeof callback === 'string') {
return this.parse(oldTree, index => callback.slice(index))
}
if (this.logCallback) {
currentLogCallback = this.logCallback;
C._ts_parser_enable_logger_wasm(this[0], 1);
} else {
C._ts_parser_enable_logger_wasm(this[0], 0);
}
currentParseCallback = callback;
const treeAddress = C._ts_parser_parse_wasm(
this[0],
this[1],
oldTree ? oldTree[0] : 0
);
currentParseCallback = null;
currentLogCallback = null;
if (!treeAddress) {
throw new Error('Parsing failed');
}
return new Tree(INTERNAL, treeAddress, this.language, callback);
}
reset() {
C._ts_parser_parse_wasm(this[0]);
}
setTimeoutMicros(timeout) {
C._ts_parser_set_timeout_micros(this[0], timeout);
}
getTimeoutMicros(timeout) {
C._ts_parser_timeout_micros(this[0]);
}
setLogger(callback) {
this.logCallback = callback;
}
getLogger() {
return this.logCallback;
}
}
class Tree {
constructor(internal, address, language, textCallback) {
if (internal !== INTERNAL) {
throw new Error('Illegal constructor')
}
this[0] = address;
this.language = language;
this.textCallback = textCallback;
}
copy() {
const address = C._ts_tree_copy(this[0]);
return new Tree(INTERNAL, address, this.language, this.textCallback);
}
delete() {
C._ts_tree_delete(this[0]);
}
edit(edit) {
marshalEdit(edit);
C._ts_tree_edit_wasm(this[0]);
}
get rootNode() {
C._ts_tree_root_node_wasm(this[0]);
return unmarshalNode(this);
}
getLanguage() {
return this.language;
}
}
class Node {
constructor(internal, tree) {
if (internal !== INTERNAL) {
throw new Error('Illegal constructor')
}
this.tree = tree;
}
get typeId() {
marshalNode(this);
return C._ts_node_symbol_wasm(this.tree);
}
get type() {
return this.tree.language.types[this.typeId] || 'ERROR';
}
get startPosition() {
marshalNode(this);
C._ts_node_start_point_wasm(this.tree[0]);
return unmarshalPoint(TRANSFER_BUFFER);
}
get endPosition() {
marshalNode(this);
C._ts_node_end_point_wasm(this.tree[0]);
return unmarshalPoint(TRANSFER_BUFFER);
}
get startIndex() {
marshalNode(this);
return C._ts_node_start_index_wasm(this.tree[0]);
}
get endIndex() {
marshalNode(this);
return C._ts_node_end_index_wasm(this.tree[0]);
}
get text() {
const startIndex = this.startIndex;
const length = this.endIndex - startIndex;
let result = this.tree.textCallback(startIndex);
while (result.length < length) {
result += this.tree.textCallback(startIndex + result.length);
}
return result.slice(0, length);
}
equals(other) {
if (this === other) return true;
for (let i = 0; i < 5; i++) {
if (this[i] !== other[i]) return false;
}
return true;
}
get childCount() {
marshalNode(this);
return C._ts_node_child_count_wasm(this.tree[0]);
}
child(index) {
marshalNode(this);
C._ts_node_child_wasm(this.tree[0], index);
return unmarshalNode(this.tree);
}
get namedChildCount() {
marshalNode(this);
return C._ts_node_named_child_count_wasm(this.tree[0]);
}
namedChild(index) {
marshalNode(this);
C._ts_node_named_child_wasm(this.tree[0], index);
return unmarshalNode(this.tree);
}
get parent() {
marshalNode(this);
C._ts_node_parent_wasm(this.tree[0]);
return unmarshalNode(this.tree);
}
descendantForPosition(start, end = start) {
marshalNode(this);
let address = TRANSFER_BUFFER + SIZE_OF_NODE;
marshalPoint(address);
marshalPoint(address + SIZE_OF_POINT);
C._ts_node_descendant_for_position_wasm(this.tree[0]);
return unmarshalNode(this.tree);
}
namedDescendantForPosition(start, end = start) {
marshalNode(this);
let address = TRANSFER_BUFFER + SIZE_OF_NODE;
marshalPoint(address, start);
marshalPoint(address + SIZE_OF_POINT, end);
C._ts_node_named_descendant_for_position_wasm(this.tree[0]);
return unmarshalNode(this.tree);
}
toString() {
marshalNode(this);
const address = C._ts_node_to_string_wasm(this.tree[0]);
const result = AsciiToString(address);
C._free(address);
return result;
}
}
class Language {
constructor(internal, address) {
if (internal !== INTERNAL) {
throw new Error('Illegal constructor')
}
this[0] = address;
this.types = new Array(C._ts_language_symbol_count(this[0]));
for (let i = 0, n = this.types.length; i < n; i++) {
if (C._ts_language_symbol_type(this[0], i) < 2) {
this.types[i] = UTF8ToString(C._ts_language_symbol_name(this[0], i));
}
}
}
get version() {
return C._ts_language_version(this[0]);
}
static load(url) {
return fetch(url)
.then(response => response.arrayBuffer()
.then(buffer => {
if (response.ok) {
return loadWebAssemblyModule(new Uint8Array(buffer), {loadAsync: true});
} else {
const body = new TextDecoder('utf-8').decode(buffer);
throw new Error(`Language.load failed with status ${response.status}.\n\n${body}`)
}
}))
.then(exports => {
const functionName = Object.keys(exports).find(key => key.includes("tree_sitter_"));
const languageAddress = exports[functionName]();
return new Language(INTERNAL, languageAddress);
});
}
}
function marshalNode(node) {
let address = TRANSFER_BUFFER;
for (let i = 0; i < 5; i++) {
setValue(address, node[i], 'i32');
address += SIZE_OF_INT;
}
}
function unmarshalNode(tree) {
let address = TRANSFER_BUFFER;
const id = getValue(address, 'i32');
if (id === 0) return null;
const result = new Node(INTERNAL, tree);
result[0] = id;
address += SIZE_OF_INT;
for (let i = 1; i < 5; i++) {
result[i] = getValue(address, 'i32');
address += SIZE_OF_INT;
}
return result;
}
function marshalPoint(address, point) {
setValue(address, point.row, 'i32')
setValue(address + SIZE_OF_INT, point.column, 'i32')
}
function unmarshalPoint(address) {
return {
row: getValue(address, 'i32'),
column: getValue(address + SIZE_OF_INT, 'i32')
}
}
function marshalRange(address, range) {
setValue(address, range.startIndex, 'i32'); address += SIZE_OF_INT;
setValue(address, range.endIndex, 'i32'); address += SIZE_OF_INT;
marshalPoint(address, range.startPosition); address += SIZE_OF_POINT;
marshalPoint(address, range.endPosition); address += SIZE_OF_POINT;
}
function unmarshalRange(address) {
const result = {};
result.startIndex = getValue(address, 'i32'); address += SIZE_OF_INT;
result.endIndex = getValue(address, 'i32'); address += SIZE_OF_INT;
result.startPosition = unmarshalPoint(address); address += SIZE_OF_POINT;
result.endPosition = unmarshalPoint(address);
return result;
}
function marshalEdit(edit) {
let address = TRANSFER_BUFFER;
marshalPoint(address, edit.startPosition); address += SIZE_OF_POINT;
marshalPoint(address, edit.oldEndPosition); address += SIZE_OF_POINT;
marshalPoint(address, edit.newEndPosition); address += SIZE_OF_POINT;
setValue(address, edit.startIndex, 'i32'); address += SIZE_OF_INT;
setValue(address, edit.oldEndIndex, 'i32'); address += SIZE_OF_INT;
setValue(address, edit.newEndIndex, 'i32'); address += SIZE_OF_INT;
}
Parser.Language = Language;
return Parser;
}));

21
lib/web/imports.js Normal file
View file

@ -0,0 +1,21 @@
mergeInto(LibraryManager.library, {
tree_sitter_parse_callback: function(
inputBufferAddress,
index,
row,
column,
lengthAddress
) {
var INPUT_BUFFER_SIZE = 10 * 1024;
var string = currentParseCallback(index, {row: row, column: column});
setValue(lengthAddress, string.length, 'i32');
stringToUTF16(string, inputBufferAddress, INPUT_BUFFER_SIZE);
},
tree_sitter_log_callback: function(_payload, isLexMessage, messageAddress) {
if (currentLogCallback) {
const message = UTF8ToString(messageAddress);
currentLogCallback(isLexMessage, message);
}
}
});

11
lib/web/prefix.js Normal file
View file

@ -0,0 +1,11 @@
(function (root, factory) {
if (typeof define === 'function' && define.amd) {
define([], factory);
} else if (typeof exports === 'object') {
module.exports = factory();
module.exports.init();
delete module.exports.init;
} else {
window.TreeSitter = factory();
}
}(this, function () {

31
script/build-wasm Executable file
View file

@ -0,0 +1,31 @@
#!/usr/bin/env bash
export EMCC_FORCE_STDLIBS=libc++
args="-Os"
target_dir="target/release"
if [[ "$1" == "--debug" ]]; then
args="-s SAFE_HEAP=1 -O0"
target_dir="target/debug"
fi
mkdir -p $target_dir
emcc \
-s WASM=1 \
-s ALLOW_MEMORY_GROWTH \
-s MAIN_MODULE=1 \
-s ASSERTIONS=1 \
-s EXPORT_ALL=1 \
$args \
-std=c99 \
-D 'fprintf(...)=' \
-I lib/src \
-I lib/include \
-I lib/utf8proc \
--js-library lib/web/imports.js \
--pre-js lib/web/prefix.js \
--post-js lib/web/binding.js \
lib/src/lib.c \
lib/web/binding.c \
-o $target_dir/tree-sitter.js