Merge branch 'master' into simpler-large-char-set-code
This commit is contained in:
commit
be6e6d3708
18 changed files with 248 additions and 160 deletions
|
|
@ -1,6 +1,7 @@
|
|||
use super::write_file;
|
||||
use anyhow::{anyhow, Context, Result};
|
||||
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
|
||||
use indoc::indoc;
|
||||
use serde::Deserialize;
|
||||
use serde_json::{json, Map, Value};
|
||||
use std::fs::File;
|
||||
|
|
@ -213,6 +214,19 @@ pub fn generate_grammar_files(
|
|||
updated = true;
|
||||
}
|
||||
|
||||
// insert `tree-sitter` at the end
|
||||
if !package_json.contains_key("tree-sitter") {
|
||||
eprintln!("Adding a `tree-sitter` section to package.json");
|
||||
package_json.insert(
|
||||
"tree-sitter".to_string(),
|
||||
json!([{
|
||||
"scope": format!("source.{language_name}"),
|
||||
"injection-regex": format!("^{language_name}$"),
|
||||
}]),
|
||||
);
|
||||
updated = true;
|
||||
}
|
||||
|
||||
if updated {
|
||||
let mut package_json_str = serde_json::to_string_pretty(&package_json)?;
|
||||
package_json_str.push('\n');
|
||||
|
|
@ -261,9 +275,33 @@ pub fn generate_grammar_files(
|
|||
generate_file(path, LIB_RS_TEMPLATE, language_name)
|
||||
})?;
|
||||
|
||||
missing_path(path.join("build.rs"), |path| {
|
||||
generate_file(path, BUILD_RS_TEMPLATE, language_name)
|
||||
})?;
|
||||
missing_path_else(
|
||||
path.join("build.rs"),
|
||||
|path| generate_file(path, BUILD_RS_TEMPLATE, language_name),
|
||||
|path| {
|
||||
let build_rs =
|
||||
fs::read_to_string(path).with_context(|| "Failed to read build.rs")?;
|
||||
if !build_rs.contains("/utf-8") {
|
||||
let index = build_rs
|
||||
.find(" let parser_path = src_dir.join(\"parser.c\")")
|
||||
.ok_or_else(|| anyhow!(indoc!{
|
||||
"Failed to auto-update build.rs with the `/utf-8` flag for windows.
|
||||
To fix this, remove `bindings/rust/build.rs` and re-run `tree-sitter generate`"}))?;
|
||||
|
||||
let build_rs = format!(
|
||||
"{}{}{}\n{}",
|
||||
&build_rs[..index],
|
||||
" #[cfg(target_env = \"msvc\")]\n",
|
||||
" c_config.flag(\"-utf-8\");\n",
|
||||
&build_rs[index..]
|
||||
);
|
||||
|
||||
write_file(path, build_rs)?;
|
||||
eprintln!("Updated build.rs with the /utf-8 flag for Windows compilation");
|
||||
}
|
||||
Ok(())
|
||||
},
|
||||
)?;
|
||||
|
||||
missing_path(repo_path.join("Cargo.toml"), |path| {
|
||||
generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name.as_str())
|
||||
|
|
@ -425,8 +463,7 @@ fn lookup_package_json_for_path(path: &Path) -> Result<(PathBuf, PackageJSON)> {
|
|||
.then(|| -> Result<PackageJSON> {
|
||||
let file =
|
||||
File::open(pathbuf.as_path()).with_context(|| "Failed to open package.json")?;
|
||||
let package_json: PackageJSON = serde_json::from_reader(BufReader::new(file))?;
|
||||
Ok(package_json)
|
||||
Ok(serde_json::from_reader(BufReader::new(file))?)
|
||||
})
|
||||
.transpose()?;
|
||||
if let Some(package_json) = package_json {
|
||||
|
|
|
|||
|
|
@ -472,7 +472,7 @@ impl NfaBuilder {
|
|||
)
|
||||
})?;
|
||||
for c in code_points {
|
||||
if let Some(c) = std::char::from_u32(*c) {
|
||||
if let Some(c) = char::from_u32(*c) {
|
||||
chars = chars.add_char(c);
|
||||
}
|
||||
}
|
||||
|
|
@ -490,7 +490,7 @@ impl NfaBuilder {
|
|||
for (category, code_points) in UNICODE_CATEGORIES.iter() {
|
||||
if category.starts_with(&category_letter) {
|
||||
for c in code_points {
|
||||
if let Some(c) = std::char::from_u32(*c) {
|
||||
if let Some(c) = char::from_u32(*c) {
|
||||
chars = chars.add_char(c);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -802,9 +802,6 @@ impl Generator {
|
|||
is_included: bool,
|
||||
line_break: &str,
|
||||
) {
|
||||
// parenthesis needed if we add the `!eof` condition to explicitly avoid confusion with
|
||||
// precedence of `&&` and `||`
|
||||
let (mut need_open_paren, mut need_close_paren) = (false, false);
|
||||
for (i, range) in characters.ranges().enumerate() {
|
||||
let start = *range.start();
|
||||
let end = *range.end();
|
||||
|
|
@ -812,21 +809,20 @@ impl Generator {
|
|||
if i > 0 {
|
||||
add!(self, " ||{line_break}");
|
||||
}
|
||||
|
||||
if start == '\0' {
|
||||
add!(self, "!eof && ");
|
||||
(need_open_paren, need_close_paren) = (true, true);
|
||||
}
|
||||
if end == start {
|
||||
if need_open_paren {
|
||||
add!(self, "(");
|
||||
need_open_paren = false;
|
||||
add!(self, "(!eof && ");
|
||||
if end == '\0' {
|
||||
add!(self, "lookahead == 0");
|
||||
} else {
|
||||
add!(self, "lookahead <= ");
|
||||
}
|
||||
self.add_character(end);
|
||||
add!(self, ")");
|
||||
continue;
|
||||
} else if end == start {
|
||||
add!(self, "lookahead == ");
|
||||
self.add_character(start);
|
||||
if need_close_paren && i == characters.range_count() - 1 {
|
||||
add!(self, ")");
|
||||
need_close_paren = false;
|
||||
}
|
||||
} else if end as u32 == start as u32 + 1 {
|
||||
add!(self, "lookahead == ");
|
||||
self.add_character(start);
|
||||
|
|
@ -908,7 +904,7 @@ impl Generator {
|
|||
if action.in_main_token {
|
||||
add!(self, "ADVANCE({});", action.state);
|
||||
} else {
|
||||
add!(self, "SKIP({})", action.state);
|
||||
add!(self, "SKIP({});", action.state);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1209,14 +1205,11 @@ impl Generator {
|
|||
production_id,
|
||||
..
|
||||
} => {
|
||||
add!(self, "REDUCE({}, {child_count}", self.symbol_ids[&symbol]);
|
||||
if dynamic_precedence != 0 {
|
||||
add!(self, ", .dynamic_precedence = {dynamic_precedence}");
|
||||
}
|
||||
if production_id != 0 {
|
||||
add!(self, ", .production_id = {production_id}");
|
||||
}
|
||||
add!(self, ")");
|
||||
add!(
|
||||
self,
|
||||
"REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
|
||||
self.symbol_ids[&symbol]
|
||||
);
|
||||
}
|
||||
}
|
||||
add!(self, ",");
|
||||
|
|
@ -1268,7 +1261,7 @@ impl Generator {
|
|||
|
||||
add_line!(
|
||||
self,
|
||||
"TS_PUBLIC const TSLanguage *{language_function_name}() {{",
|
||||
"TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
|
||||
);
|
||||
indent!(self);
|
||||
add_line!(self, "static const TSLanguage language = {{");
|
||||
|
|
|
|||
|
|
@ -3,7 +3,6 @@ import PackageDescription
|
|||
|
||||
let package = Package(
|
||||
name: "TreeSitterCAMEL_PARSER_NAME",
|
||||
platforms: [.macOS(.v10_13), .iOS(.v11)],
|
||||
products: [
|
||||
.library(name: "TreeSitterCAMEL_PARSER_NAME", targets: ["TreeSitterCAMEL_PARSER_NAME"]),
|
||||
],
|
||||
|
|
|
|||
|
|
@ -13,8 +13,17 @@
|
|||
"src/parser.c",
|
||||
# NOTE: if your language has an external scanner, add it here.
|
||||
],
|
||||
"cflags_c": [
|
||||
"-std=c11",
|
||||
"conditions": [
|
||||
["OS!='win'", {
|
||||
"cflags_c": [
|
||||
"-std=c11",
|
||||
],
|
||||
}, { # OS == "win"
|
||||
"cflags_c": [
|
||||
"/std:c11",
|
||||
"/utf-8",
|
||||
],
|
||||
}],
|
||||
],
|
||||
}
|
||||
]
|
||||
|
|
|
|||
|
|
@ -4,6 +4,9 @@ fn main() {
|
|||
let mut c_config = cc::Build::new();
|
||||
c_config.std("c11").include(src_dir);
|
||||
|
||||
#[cfg(target_env = "msvc")]
|
||||
c_config.flag("-utf-8");
|
||||
|
||||
let parser_path = src_dir.join("parser.c");
|
||||
c_config.file(&parser_path);
|
||||
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());
|
||||
|
|
|
|||
|
|
@ -27,11 +27,13 @@ INCLUDEDIR ?= $(PREFIX)/include
|
|||
LIBDIR ?= $(PREFIX)/lib
|
||||
PCLIBDIR ?= $(LIBDIR)/pkgconfig
|
||||
|
||||
# object files
|
||||
OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))
|
||||
# source/object files
|
||||
PARSER := $(SRC_DIR)/parser.c
|
||||
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
|
||||
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
|
||||
|
||||
# flags
|
||||
ARFLAGS := rcs
|
||||
ARFLAGS ?= rcs
|
||||
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
|
||||
|
||||
# OS-specific bits
|
||||
|
|
@ -81,8 +83,8 @@ $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
|
|||
-e 's|=$(PREFIX)|=$${prefix}|' \
|
||||
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
|
||||
|
||||
$(SRC_DIR)/parser.c: grammar.js
|
||||
$(TS) generate --no-bindings
|
||||
$(PARSER): $(SRC_DIR)/grammar.json
|
||||
$(TS) generate --no-bindings $^
|
||||
|
||||
install: all
|
||||
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'
|
||||
|
|
|
|||
|
|
@ -38,9 +38,12 @@ setup(
|
|||
"src/parser.c",
|
||||
# NOTE: if your language uses an external scanner, add it here.
|
||||
],
|
||||
extra_compile_args=(
|
||||
["-std=c11"] if system() != 'Windows' else []
|
||||
),
|
||||
extra_compile_args=[
|
||||
"-std=c11",
|
||||
] if system() != "Windows" else [
|
||||
"/std:c11",
|
||||
"/utf-8",
|
||||
],
|
||||
define_macros=[
|
||||
("Py_LIMITED_API", "0x03080000"),
|
||||
("PY_SSIZE_T_CLEAN", None)
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ enum Commands {
|
|||
InitConfig(InitConfig),
|
||||
Generate(Generate),
|
||||
Build(Build),
|
||||
BuildWasm(BuildWasm),
|
||||
Parse(Parse),
|
||||
Test(Test),
|
||||
Query(Query),
|
||||
|
|
@ -115,6 +116,19 @@ struct Build {
|
|||
pub internal_build: bool,
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
#[command(about = "Compile a parser to WASM", alias = "bw")]
|
||||
struct BuildWasm {
|
||||
#[arg(
|
||||
short,
|
||||
long,
|
||||
help = "Run emscripten via docker even if it is installed locally"
|
||||
)]
|
||||
pub docker: bool,
|
||||
#[arg(index = 1, num_args = 1, help = "The path to output the wasm file")]
|
||||
pub path: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Args)]
|
||||
#[command(about = "Parse files", alias = "p")]
|
||||
struct Parse {
|
||||
|
|
@ -444,15 +458,11 @@ fn run() -> Result<()> {
|
|||
if build_options.wasm {
|
||||
let grammar_path =
|
||||
current_dir.join(build_options.path.as_deref().unwrap_or_default());
|
||||
let (output_dir, output_path) = if let Some(ref path) = build_options.output {
|
||||
(current_dir.clone(), Some(current_dir.join(path)))
|
||||
} else {
|
||||
(loader.parser_lib_path.clone(), None)
|
||||
};
|
||||
let output_path = build_options.output.map(|path| current_dir.join(path));
|
||||
wasm::compile_language_to_wasm(
|
||||
&loader,
|
||||
&grammar_path,
|
||||
&output_dir,
|
||||
¤t_dir,
|
||||
output_path,
|
||||
build_options.docker,
|
||||
)?;
|
||||
|
|
@ -498,6 +508,18 @@ fn run() -> Result<()> {
|
|||
}
|
||||
}
|
||||
|
||||
Commands::BuildWasm(wasm_options) => {
|
||||
eprintln!("`build-wasm` is deprecated and will be removed in v0.24.0. You should use `build --wasm` instead");
|
||||
let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default());
|
||||
wasm::compile_language_to_wasm(
|
||||
&loader,
|
||||
&grammar_path,
|
||||
¤t_dir,
|
||||
None,
|
||||
wasm_options.docker,
|
||||
)?;
|
||||
}
|
||||
|
||||
Commands::Parse(parse_options) => {
|
||||
let config = Config::load(parse_options.config_path)?;
|
||||
let output = if parse_options.output_dot {
|
||||
|
|
|
|||
|
|
@ -21,8 +21,7 @@ lazy_static! {
|
|||
(?P<equals>(?:=+){3,})
|
||||
(?P<suffix1>[^=\r\n][^\r\n]*)?
|
||||
\r?\n
|
||||
(?P<test_name>(?:[^=\r\n:][^\r\n]*\r?\n)+(?:(?:[ \t]*\r?\n)+)?)
|
||||
(?P<markers>((?::(?:skip|error|fail-fast|(language|platform)\([^\r\n)]+\))\r?\n)*))
|
||||
(?P<test_name_and_markers>(?:[^=\r\n][^\r\n]*\r?\n)+)
|
||||
===+
|
||||
(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n"
|
||||
)
|
||||
|
|
@ -511,29 +510,46 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
|
|||
let (mut skip, mut platform, mut fail_fast, mut error, mut languages) =
|
||||
(false, None, false, false, vec![]);
|
||||
|
||||
let markers = c.name("markers").map_or("".as_bytes(), |m| m.as_bytes());
|
||||
let test_name_and_markers = c
|
||||
.name("test_name_and_markers")
|
||||
.map_or("".as_bytes(), |m| m.as_bytes());
|
||||
|
||||
for marker in markers.split(|&c| c == b'\n').filter(|s| !s.is_empty()) {
|
||||
let marker = str::from_utf8(marker).unwrap();
|
||||
let (marker, right) = marker.split_at(marker.find('(').unwrap_or(marker.len()));
|
||||
match marker {
|
||||
":skip" => skip = true,
|
||||
let mut test_name = String::new();
|
||||
let mut seen_marker = false;
|
||||
|
||||
for line in test_name_and_markers
|
||||
.split(|&c| c == b'\n')
|
||||
.filter(|s| !s.is_empty())
|
||||
{
|
||||
let line = str::from_utf8(line).unwrap();
|
||||
match line.split('(').next().unwrap() {
|
||||
":skip" => (seen_marker, skip) = (true, true),
|
||||
":platform" => {
|
||||
if let Some(platforms) =
|
||||
right.strip_prefix('(').and_then(|s| s.strip_suffix(')'))
|
||||
{
|
||||
if let Some(platforms) = line.strip_prefix(':').and_then(|s| {
|
||||
s.strip_prefix("platform(")
|
||||
.and_then(|s| s.strip_suffix(')'))
|
||||
}) {
|
||||
seen_marker = true;
|
||||
platform = Some(
|
||||
platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS,
|
||||
);
|
||||
}
|
||||
}
|
||||
":fail-fast" => fail_fast = true,
|
||||
":error" => error = true,
|
||||
":fail-fast" => (seen_marker, fail_fast) = (true, true),
|
||||
":error" => (seen_marker, error) = (true, true),
|
||||
":language" => {
|
||||
if let Some(lang) = right.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
|
||||
if let Some(lang) = line.strip_prefix(':').and_then(|s| {
|
||||
s.strip_prefix("language(")
|
||||
.and_then(|s| s.strip_suffix(')'))
|
||||
}) {
|
||||
seen_marker = true;
|
||||
languages.push(lang.into());
|
||||
}
|
||||
}
|
||||
_ if !seen_marker => {
|
||||
test_name.push_str(line);
|
||||
test_name.push('\n');
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
|
@ -550,9 +566,11 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
|
|||
|
||||
if suffix1 == first_suffix && suffix2 == first_suffix {
|
||||
let header_range = c.get(0).unwrap().range();
|
||||
let test_name = c
|
||||
.name("test_name")
|
||||
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
|
||||
let test_name = if test_name.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(test_name.trim_end().to_string())
|
||||
};
|
||||
Some((
|
||||
header_delim_len,
|
||||
header_range,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue