Merge branch 'master' into simpler-large-char-set-code

This commit is contained in:
Max Brunsfeld 2024-04-11 16:03:20 -07:00
commit be6e6d3708
18 changed files with 248 additions and 160 deletions

View file

@ -1,6 +1,7 @@
use super::write_file;
use anyhow::{anyhow, Context, Result};
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use indoc::indoc;
use serde::Deserialize;
use serde_json::{json, Map, Value};
use std::fs::File;
@ -213,6 +214,19 @@ pub fn generate_grammar_files(
updated = true;
}
// insert `tree-sitter` at the end
if !package_json.contains_key("tree-sitter") {
eprintln!("Adding a `tree-sitter` section to package.json");
package_json.insert(
"tree-sitter".to_string(),
json!([{
"scope": format!("source.{language_name}"),
"injection-regex": format!("^{language_name}$"),
}]),
);
updated = true;
}
if updated {
let mut package_json_str = serde_json::to_string_pretty(&package_json)?;
package_json_str.push('\n');
@ -261,9 +275,33 @@ pub fn generate_grammar_files(
generate_file(path, LIB_RS_TEMPLATE, language_name)
})?;
missing_path(path.join("build.rs"), |path| {
generate_file(path, BUILD_RS_TEMPLATE, language_name)
})?;
missing_path_else(
path.join("build.rs"),
|path| generate_file(path, BUILD_RS_TEMPLATE, language_name),
|path| {
let build_rs =
fs::read_to_string(path).with_context(|| "Failed to read build.rs")?;
if !build_rs.contains("/utf-8") {
let index = build_rs
.find(" let parser_path = src_dir.join(\"parser.c\")")
.ok_or_else(|| anyhow!(indoc!{
"Failed to auto-update build.rs with the `/utf-8` flag for windows.
To fix this, remove `bindings/rust/build.rs` and re-run `tree-sitter generate`"}))?;
let build_rs = format!(
"{}{}{}\n{}",
&build_rs[..index],
" #[cfg(target_env = \"msvc\")]\n",
" c_config.flag(\"-utf-8\");\n",
&build_rs[index..]
);
write_file(path, build_rs)?;
eprintln!("Updated build.rs with the /utf-8 flag for Windows compilation");
}
Ok(())
},
)?;
missing_path(repo_path.join("Cargo.toml"), |path| {
generate_file(path, CARGO_TOML_TEMPLATE, dashed_language_name.as_str())
@ -425,8 +463,7 @@ fn lookup_package_json_for_path(path: &Path) -> Result<(PathBuf, PackageJSON)> {
.then(|| -> Result<PackageJSON> {
let file =
File::open(pathbuf.as_path()).with_context(|| "Failed to open package.json")?;
let package_json: PackageJSON = serde_json::from_reader(BufReader::new(file))?;
Ok(package_json)
Ok(serde_json::from_reader(BufReader::new(file))?)
})
.transpose()?;
if let Some(package_json) = package_json {

View file

@ -472,7 +472,7 @@ impl NfaBuilder {
)
})?;
for c in code_points {
if let Some(c) = std::char::from_u32(*c) {
if let Some(c) = char::from_u32(*c) {
chars = chars.add_char(c);
}
}
@ -490,7 +490,7 @@ impl NfaBuilder {
for (category, code_points) in UNICODE_CATEGORIES.iter() {
if category.starts_with(&category_letter) {
for c in code_points {
if let Some(c) = std::char::from_u32(*c) {
if let Some(c) = char::from_u32(*c) {
chars = chars.add_char(c);
}
}

View file

@ -802,9 +802,6 @@ impl Generator {
is_included: bool,
line_break: &str,
) {
// parenthesis needed if we add the `!eof` condition to explicitly avoid confusion with
// precedence of `&&` and `||`
let (mut need_open_paren, mut need_close_paren) = (false, false);
for (i, range) in characters.ranges().enumerate() {
let start = *range.start();
let end = *range.end();
@ -812,21 +809,20 @@ impl Generator {
if i > 0 {
add!(self, " ||{line_break}");
}
if start == '\0' {
add!(self, "!eof && ");
(need_open_paren, need_close_paren) = (true, true);
}
if end == start {
if need_open_paren {
add!(self, "(");
need_open_paren = false;
add!(self, "(!eof && ");
if end == '\0' {
add!(self, "lookahead == 0");
} else {
add!(self, "lookahead <= ");
}
self.add_character(end);
add!(self, ")");
continue;
} else if end == start {
add!(self, "lookahead == ");
self.add_character(start);
if need_close_paren && i == characters.range_count() - 1 {
add!(self, ")");
need_close_paren = false;
}
} else if end as u32 == start as u32 + 1 {
add!(self, "lookahead == ");
self.add_character(start);
@ -908,7 +904,7 @@ impl Generator {
if action.in_main_token {
add!(self, "ADVANCE({});", action.state);
} else {
add!(self, "SKIP({})", action.state);
add!(self, "SKIP({});", action.state);
}
}
@ -1209,14 +1205,11 @@ impl Generator {
production_id,
..
} => {
add!(self, "REDUCE({}, {child_count}", self.symbol_ids[&symbol]);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {dynamic_precedence}");
}
if production_id != 0 {
add!(self, ", .production_id = {production_id}");
}
add!(self, ")");
add!(
self,
"REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
self.symbol_ids[&symbol]
);
}
}
add!(self, ",");
@ -1268,7 +1261,7 @@ impl Generator {
add_line!(
self,
"TS_PUBLIC const TSLanguage *{language_function_name}() {{",
"TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
);
indent!(self);
add_line!(self, "static const TSLanguage language = {{");

View file

@ -3,7 +3,6 @@ import PackageDescription
let package = Package(
name: "TreeSitterCAMEL_PARSER_NAME",
platforms: [.macOS(.v10_13), .iOS(.v11)],
products: [
.library(name: "TreeSitterCAMEL_PARSER_NAME", targets: ["TreeSitterCAMEL_PARSER_NAME"]),
],

View file

@ -13,8 +13,17 @@
"src/parser.c",
# NOTE: if your language has an external scanner, add it here.
],
"cflags_c": [
"-std=c11",
"conditions": [
["OS!='win'", {
"cflags_c": [
"-std=c11",
],
}, { # OS == "win"
"cflags_c": [
"/std:c11",
"/utf-8",
],
}],
],
}
]

View file

@ -4,6 +4,9 @@ fn main() {
let mut c_config = cc::Build::new();
c_config.std("c11").include(src_dir);
#[cfg(target_env = "msvc")]
c_config.flag("-utf-8");
let parser_path = src_dir.join("parser.c");
c_config.file(&parser_path);
println!("cargo:rerun-if-changed={}", parser_path.to_str().unwrap());

View file

@ -27,11 +27,13 @@ INCLUDEDIR ?= $(PREFIX)/include
LIBDIR ?= $(PREFIX)/lib
PCLIBDIR ?= $(LIBDIR)/pkgconfig
# object files
OBJS := $(patsubst %.c,%.o,$(wildcard $(SRC_DIR)/*.c))
# source/object files
PARSER := $(SRC_DIR)/parser.c
EXTRAS := $(filter-out $(PARSER),$(wildcard $(SRC_DIR)/*.c))
OBJS := $(patsubst %.c,%.o,$(PARSER) $(EXTRAS))
# flags
ARFLAGS := rcs
ARFLAGS ?= rcs
override CFLAGS += -I$(SRC_DIR) -std=c11 -fPIC
# OS-specific bits
@ -81,8 +83,8 @@ $(LANGUAGE_NAME).pc: bindings/c/$(LANGUAGE_NAME).pc.in
-e 's|=$(PREFIX)|=$${prefix}|' \
-e 's|@PREFIX@|$(PREFIX)|' $< > $@
$(SRC_DIR)/parser.c: grammar.js
$(TS) generate --no-bindings
$(PARSER): $(SRC_DIR)/grammar.json
$(TS) generate --no-bindings $^
install: all
install -d '$(DESTDIR)$(INCLUDEDIR)'/tree_sitter '$(DESTDIR)$(PCLIBDIR)' '$(DESTDIR)$(LIBDIR)'

View file

@ -38,9 +38,12 @@ setup(
"src/parser.c",
# NOTE: if your language uses an external scanner, add it here.
],
extra_compile_args=(
["-std=c11"] if system() != 'Windows' else []
),
extra_compile_args=[
"-std=c11",
] if system() != "Windows" else [
"/std:c11",
"/utf-8",
],
define_macros=[
("Py_LIMITED_API", "0x03080000"),
("PY_SSIZE_T_CLEAN", None)