Merge branch 'master' into simpler-large-char-set-code

This commit is contained in:
Max Brunsfeld 2024-04-11 16:03:20 -07:00
commit be6e6d3708
18 changed files with 248 additions and 160 deletions

View file

@ -1,6 +1,7 @@
use super::write_file;
use anyhow::{anyhow, Context, Result};
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use indoc::indoc;
use serde::Deserialize;
use serde_json::{json, Map, Value};
use std::fs::File;
@ -213,6 +214,19 @@ pub fn generate_grammar_files(
updated = true;
}
// insert `tree-sitter` at the end
if !package_json.contains_key("tree-sitter") {
eprintln!("Adding a `tree-sitter` section to package.json");
package_json.insert(
"tree-sitter".to_string(),
json!([{
"scope": format!("source.{language_name}"),
"injection-regex": format!("^{language_name}$"),
}]),
);
updated = true;
}
if updated {
let mut package_json_str = serde_json::to_string_pretty(&package_json)?;
package_json_str.push('\n');
@ -261,9 +275,33 @@ pub fn generate_grammar_files(
generate_file(path, LIB_RS_TEMPLATE, language_name)
})?;
missing_path(path.join("build.rs"), |path| {
generate_file(path, BUILD_RS_TEMPLATE, language_name)
})?;
missing_path_else(
path.join("build.rs"),
|path| generate_file(path, BUILD_RS_TEMPLATE, language_name),
|path| {
let build_rs =
fs::read_to_string(path).with_context(|| "Failed to read build.rs")?;
if !build_rs.contains("/utf-8") {
let index = build_rs
.find(" let parser_path = src_dir.join(\"parser.c\")")
.ok_or_else(|| anyhow!(indoc!{
"Failed to auto-update build.rs with the `/utf-8` flag for windows.
To fix this, remove `bindings/rust/build.rs` and re-run `tree-sitter generate`"}))?;
let build_rs = format!(
"{}{}{}\n{}",
&build_rs[..index],
" #[cfg(target_env = \"msvc\")]\n",
" c_config.flag(\"-utf-8\");\n",
&build_rs[index..]
);
write_file(path, build_rs)?;
eprintln!("Updated build.rs with the /utf-8 flag for Windows compilation");
}
Ok(())
},
)?;
missing_path(repo_path.join("Cargo.toml"), |path| {
generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name.as_str())
@ -425,8 +463,7 @@ fn lookup_package_json_for_path(path: &Path) -> Result<(PathBuf, PackageJSON)> {
.then(|| -> Result<PackageJSON> {
let file =
File::open(pathbuf.as_path()).with_context(|| "Failed to open package.json")?;
let package_json: PackageJSON = serde_json::from_reader(BufReader::new(file))?;
Ok(package_json)
Ok(serde_json::from_reader(BufReader::new(file))?)
})
.transpose()?;
if let Some(package_json) = package_json {

View file

@ -472,7 +472,7 @@ impl NfaBuilder {
)
})?;
for c in code_points {
if let Some(c) = std::char::from_u32(*c) {
if let Some(c) = char::from_u32(*c) {
chars = chars.add_char(c);
}
}
@ -490,7 +490,7 @@ impl NfaBuilder {
for (category, code_points) in UNICODE_CATEGORIES.iter() {
if category.starts_with(&category_letter) {
for c in code_points {
if let Some(c) = std::char::from_u32(*c) {
if let Some(c) = char::from_u32(*c) {
chars = chars.add_char(c);
}
}

View file

@ -802,9 +802,6 @@ impl Generator {
is_included: bool,
line_break: &str,
) {
// parenthesis needed if we add the `!eof` condition to explicitly avoid confusion with
// precedence of `&&` and `||`
let (mut need_open_paren, mut need_close_paren) = (false, false);
for (i, range) in characters.ranges().enumerate() {
let start = *range.start();
let end = *range.end();
@ -812,21 +809,20 @@ impl Generator {
if i > 0 {
add!(self, " ||{line_break}");
}
if start == '\0' {
add!(self, "!eof && ");
(need_open_paren, need_close_paren) = (true, true);
}
if end == start {
if need_open_paren {
add!(self, "(");
need_open_paren = false;
add!(self, "(!eof && ");
if end == '\0' {
add!(self, "lookahead == 0");
} else {
add!(self, "lookahead <= ");
}
self.add_character(end);
add!(self, ")");
continue;
} else if end == start {
add!(self, "lookahead == ");
self.add_character(start);
if need_close_paren && i == characters.range_count() - 1 {
add!(self, ")");
need_close_paren = false;
}
} else if end as u32 == start as u32 + 1 {
add!(self, "lookahead == ");
self.add_character(start);
@ -908,7 +904,7 @@ impl Generator {
if action.in_main_token {
add!(self, "ADVANCE({});", action.state);
} else {
add!(self, "SKIP({})", action.state);
add!(self, "SKIP({});", action.state);
}
}
@ -1209,14 +1205,11 @@ impl Generator {
production_id,
..
} => {
add!(self, "REDUCE({}, {child_count}", self.symbol_ids[&symbol]);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {dynamic_precedence}");
}
if production_id != 0 {
add!(self, ", .production_id = {production_id}");
}
add!(self, ")");
add!(
self,
"REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
self.symbol_ids[&symbol]
);
}
}
add!(self, ",");
@ -1268,7 +1261,7 @@ impl Generator {
add_line!(
self,
"TS_PUBLIC const TSLanguage *{language_function_name}() {{",
"TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
);
indent!(self);
add_line!(self, "static const TSLanguage language = {{");

View file

@ -3,7 +3,6 @@ import PackageDescription
let package = Package(
name: "TreeSitterCAMEL_PARSER_NAME",
platforms: [.macOS(.v10_13), .iOS(.v11)],
products: [
.library(name: "TreeSitterCAMEL_PARSER_NAME", targets: ["TreeSitterCAMEL_PARSER_NAME"]),
],

View file

@ -13,8 +13,17 @@
"src/parser.c",
# NOTE: if your language has an external scanner, add it here.
],
"cflags_c": [
"-std=c11",
"conditions": [
["OS!='win'", {
"cflags_c": [
"-std=c11",
],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
}
]

View file

@ -4,6 +4,9 @@ fn main() {
let mut c_config = cc::Build::new();
c_config.std("c11").include(src_dir);
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

View file

@ -27,11 +27,13 @@ INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# object files
OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))
# source/object files
PARSER := $(SRC_DIR)/parser.c
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
# flags
ARFLAGS := rcs
ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# OS-specific bits
@ -81,8 +83,8 @@ $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
$(SRC_DIR)/parser.c: grammar.js
$(TS) generate --no-bindings
$(PARSER): $(SRC_DIR)/grammar.json
$(TS) generate --no-bindings $^
install: all
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'

View file

@ -38,9 +38,12 @@ setup(
"src/parser.c",
# NOTE: if your language uses an external scanner, add it here.
],
extra_compile_args=(
["-std=c11"] if system() != 'Windows' else []
),
extra_compile_args=[
"-std=c11",
] if system() != "Windows" else [
"/std:c11",
"/utf-8",
],
define_macros=[
("Py_LIMITED_API", "0x03080000"),
("PY_SSIZE_T_CLEAN", None)

View file

@ -28,6 +28,7 @@ enum Commands {
InitConfig(InitConfig),
Generate(Generate),
Build(Build),
BuildWasm(BuildWasm),
Parse(Parse),
Test(Test),
Query(Query),
@ -115,6 +116,19 @@ struct Build {
pub internal_build: bool,
}
#[derive(Args)]
#[command(about = "Compile a parser to WASM", alias = "bw")]
struct BuildWasm {
#[arg(
short,
long,
help = "Run emscripten via docker even if it is installed locally"
)]
pub docker: bool,
#[arg(index = 1, num_args = 1, help = "The path to output the wasm file")]
pub path: Option<String>,
}
#[derive(Args)]
#[command(about = "Parse files", alias = "p")]
struct Parse {
@ -444,15 +458,11 @@ fn run() -> Result<()> {
if build_options.wasm {
let grammar_path =
current_dir.join(build_options.path.as_deref().unwrap_or_default());
let (output_dir, output_path) = if let Some(ref path) = build_options.output {
(current_dir.clone(), Some(current_dir.join(path)))
} else {
(loader.parser_lib_path.clone(), None)
};
let output_path = build_options.output.map(|path| current_dir.join(path));
wasm::compile_language_to_wasm(
&loader,
&grammar_path,
&output_dir,
&current_dir,
output_path,
build_options.docker,
)?;
@ -498,6 +508,18 @@ fn run() -> Result<()> {
}
}
Commands::BuildWasm(wasm_options) => {
eprintln!("`build-wasm` is deprecated and will be removed in v0.24.0. You should use `build --wasm` instead");
let grammar_path = current_dir.join(wasm_options.path.unwrap_or_default());
wasm::compile_language_to_wasm(
&loader,
&grammar_path,
&current_dir,
None,
wasm_options.docker,
)?;
}
Commands::Parse(parse_options) => {
let config = Config::load(parse_options.config_path)?;
let output = if parse_options.output_dot {

View file

@ -21,8 +21,7 @@ lazy_static! {
(?P<equals>(?:=+){3,})
(?P<suffix1>[^=\r\n][^\r\n]*)?
\r?\n
(?P<test_name>(?:[^=\r\n:][^\r\n]*\r?\n)+(?:(?:[ \t]*\r?\n)+)?)
(?P<markers>((?::(?:skip|error|fail-fast|(language|platform)\([^\r\n)]+\))\r?\n)*))
(?P<test_name_and_markers>(?:[^=\r\n][^\r\n]*\r?\n)+)
===+
(?P<suffix2>[^=\r\n][^\r\n]*)?\r?\n"
)
@ -511,29 +510,46 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
let (mut skip, mut platform, mut fail_fast, mut error, mut languages) =
(false, None, false, false, vec![]);
let markers = c.name("markers").map_or("".as_bytes(), |m| m.as_bytes());
let test_name_and_markers = c
.name("test_name_and_markers")
.map_or("".as_bytes(), |m| m.as_bytes());
for marker in markers.split(|&c| c == b'\n').filter(|s| !s.is_empty()) {
let marker = str::from_utf8(marker).unwrap();
let (marker, right) = marker.split_at(marker.find('(').unwrap_or(marker.len()));
match marker {
":skip" => skip = true,
let mut test_name = String::new();
let mut seen_marker = false;
for line in test_name_and_markers
.split(|&c| c == b'\n')
.filter(|s| !s.is_empty())
{
let line = str::from_utf8(line).unwrap();
match line.split('(').next().unwrap() {
":skip" => (seen_marker, skip) = (true, true),
":platform" => {
if let Some(platforms) =
right.strip_prefix('(').and_then(|s| s.strip_suffix(')'))
{
if let Some(platforms) = line.strip_prefix(':').and_then(|s| {
s.strip_prefix("platform(")
.and_then(|s| s.strip_suffix(')'))
}) {
seen_marker = true;
platform = Some(
platform.unwrap_or(false) || platforms.trim() == std::env::consts::OS,
);
}
}
":fail-fast" => fail_fast = true,
":error" => error = true,
":fail-fast" => (seen_marker, fail_fast) = (true, true),
":error" => (seen_marker, error) = (true, true),
":language" => {
if let Some(lang) = right.strip_prefix('(').and_then(|s| s.strip_suffix(')')) {
if let Some(lang) = line.strip_prefix(':').and_then(|s| {
s.strip_prefix("language(")
.and_then(|s| s.strip_suffix(')'))
}) {
seen_marker = true;
languages.push(lang.into());
}
}
_ if !seen_marker => {
test_name.push_str(line);
test_name.push('\n');
}
_ => {}
}
}
@ -550,9 +566,11 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
if suffix1 == first_suffix && suffix2 == first_suffix {
let header_range = c.get(0).unwrap().range();
let test_name = c
.name("test_name")
.map(|c| String::from_utf8_lossy(c.as_bytes()).trim_end().to_string());
let test_name = if test_name.is_empty() {
None
} else {
Some(test_name.trim_end().to_string())
};
Some((
header_delim_len,
header_range,