diff --git a/cli/src/test.rs b/cli/src/test.rs index c4d74285..98404138 100644 --- a/cli/src/test.rs +++ b/cli/src/test.rs @@ -22,6 +22,7 @@ lazy_static! { .build() .unwrap(); static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap(); + static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap(); } #[derive(Debug, PartialEq, Eq)] @@ -34,6 +35,7 @@ pub enum TestEntry { name: String, input: Vec, output: String, + has_fields: bool, }, } @@ -135,6 +137,7 @@ fn run_tests( name, input, output, + has_fields, } => { if let Some(filter) = filter { if !name.contains(filter) { @@ -142,7 +145,10 @@ fn run_tests( } } let tree = parser.parse(&input, None).unwrap(); - let actual = tree.root_node().to_sexp(); + let mut actual = tree.root_node().to_sexp(); + if !has_fields { + actual = strip_sexp_fields(actual); + } for _ in 0..indent_level { print!(" "); } @@ -186,6 +192,10 @@ pub fn parse_tests(path: &Path) -> io::Result { } } +pub fn strip_sexp_fields(sexp: String) -> String { + SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string() +} + fn parse_test_content(name: String, content: String) -> TestEntry { let mut children = Vec::new(); let bytes = content.as_bytes(); @@ -209,10 +219,12 @@ fn parse_test_content(name: String, content: String) -> TestEntry { let input = bytes[previous_header_end..divider_start].to_vec(); let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string(); let output = output.replace(" )", ")"); + let has_fields = SEXP_FIELD_REGEX.is_match(&output); children.push(TestEntry::Example { name: previous_name, input, output, + has_fields, }); } } @@ -265,11 +277,13 @@ d name: "The first test".to_string(), input: "\na b c\n".as_bytes().to_vec(), output: "(a (b c))".to_string(), + has_fields: false, }, TestEntry::Example { name: "The second test".to_string(), input: "d".as_bytes().to_vec(), output: "(d)".to_string(), + has_fields: false, }, ] } diff --git a/cli/src/tests/corpus_test.rs b/cli/src/tests/corpus_test.rs index 70b27295..dba86d83 100644 --- a/cli/src/tests/corpus_test.rs +++ b/cli/src/tests/corpus_test.rs @@ -4,7 +4,7 @@ use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language}; use super::helpers::random::Rand; use super::helpers::scope_sequence::ScopeSequence; use crate::generate; -use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry}; +use crate::test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry}; use crate::util; use lazy_static::lazy_static; use std::{env, fs, time, usize}; @@ -67,7 +67,7 @@ fn test_real_language_corpus_files() { eprintln!("language: {:?}", language_name); } - for (example_name, input, expected_output) in tests { + for (example_name, input, expected_output, has_fields) in tests { eprintln!(" example: {:?}", example_name); if TRIAL_FILTER.map_or(true, |t| t == 0) { @@ -76,7 +76,10 @@ fn test_real_language_corpus_files() { let mut parser = get_parser(&mut log_session, "log.html"); parser.set_language(language).unwrap(); let tree = parser.parse(&input, None).unwrap(); - let actual_output = tree.root_node().to_sexp(); + let mut actual_output = tree.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } drop(tree); drop(parser); if actual_output != expected_output { @@ -144,7 +147,11 @@ fn test_real_language_corpus_files() { let tree3 = parser.parse(&input, Some(&tree2)).unwrap(); // Verify that the final tree matches the expectation from the corpus. - let actual_output = tree3.root_node().to_sexp(); + let mut actual_output = tree3.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } + if actual_output != expected_output { println!( "Incorrect parse for {} - {} - trial {}", @@ -241,7 +248,7 @@ fn test_feature_corpus_files() { eprintln!("test language: {:?}", language_name); } - for (name, input, expected_output) in tests { + for (name, input, expected_output, has_fields) in tests { eprintln!(" example: {:?}", name); allocations::start_recording(); @@ -249,7 +256,11 @@ fn test_feature_corpus_files() { let mut parser = get_parser(&mut log_session, "log.html"); parser.set_language(language).unwrap(); let tree = parser.parse(&input, None).unwrap(); - let actual_output = tree.root_node().to_sexp(); + let mut actual_output = tree.root_node().to_sexp(); + if !has_fields { + actual_output = strip_sexp_fields(actual_output); + } + drop(tree); drop(parser); if actual_output != expected_output { @@ -348,13 +359,14 @@ fn get_parser(session: &mut Option, log_filename: &str) -> Par parser } -fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String)> { - fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec, String)>) { +fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String, bool)> { + fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec, String, bool)>) { match test { TestEntry::Example { mut name, input, output, + has_fields, } => { if !prefix.is_empty() { name.insert_str(0, " - "); @@ -365,7 +377,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec, String)> { return; } } - result.push((name, input, output)); + result.push((name, input, output, has_fields)); } TestEntry::Group { mut name, children } => { if !prefix.is_empty() { diff --git a/cli/src/tests/parser_test.rs b/cli/src/tests/parser_test.rs index 7947463a..357cf37d 100644 --- a/cli/src/tests/parser_test.rs +++ b/cli/src/tests/parser_test.rs @@ -721,7 +721,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() { let root = tree.root_node(); assert_eq!( root.to_sexp(), - "(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))" + "(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))" ); assert_eq!(root.start_byte(), 2); assert_eq!(root.child(3).unwrap().start_byte(), 4); diff --git a/lib/src/subtree.c b/lib/src/subtree.c index 776a86fc..a7521fa3 100644 --- a/lib/src/subtree.c +++ b/lib/src/subtree.c @@ -805,56 +805,90 @@ static void ts_subtree__write_dot_string(FILE *f, const char *string) { } } -static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit, - const TSLanguage *language, bool is_root, - bool include_all, TSSymbol alias_symbol, - bool alias_is_named) { +static const char *ROOT_FIELD = "__ROOT__"; + +static size_t ts_subtree__write_to_string( + Subtree self, char *string, size_t limit, + const TSLanguage *language, bool include_all, + TSSymbol alias_symbol, bool alias_is_named, const char *field_name +) { if (!self.ptr) return snprintf(string, limit, "(NULL)"); char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; bool visible = include_all || - is_root || + alias_is_named || ts_subtree_missing(self) || - (ts_subtree_visible(self) && ts_subtree_named(self)) || - alias_is_named; - - if (visible && !is_root) { - cursor += snprintf(*writer, limit, " "); - } + (ts_subtree_visible(self) && ts_subtree_named(self)); if (visible) { + if (field_name != ROOT_FIELD) { + cursor += snprintf(*writer, limit, " "); + + if (field_name) { + cursor += snprintf(*writer, limit, "%s: ", field_name); + } + } + if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) { cursor += snprintf(*writer, limit, "(UNEXPECTED "); cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char); - } else if (ts_subtree_missing(self)) { - cursor += snprintf(*writer, limit, "(MISSING"); } else { TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self); const char *symbol_name = ts_language_symbol_name(language, symbol); - cursor += snprintf(*writer, limit, "(%s", symbol_name); + if (ts_subtree_missing(self)) { + cursor += snprintf(*writer, limit, "(MISSING "); + if (alias_is_named || ts_subtree_named(self)) { + cursor += snprintf(*writer, limit, "%s", symbol_name); + } else { + cursor += snprintf(*writer, limit, "\"%s\"", symbol_name); + } + } else { + cursor += snprintf(*writer, limit, "(%s", symbol_name); + } } } if (ts_subtree_child_count(self)) { const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id); + const TSFieldMapEntry *field_map, *field_map_end; + ts_language_field_map( + language, + self.ptr->production_id, + &field_map, + &field_map_end + ); + uint32_t structural_child_index = 0; for (uint32_t i = 0; i < self.ptr->child_count; i++) { Subtree child = self.ptr->children[i]; if (ts_subtree_extra(child)) { cursor += ts_subtree__write_to_string( child, *writer, limit, - language, false, include_all, - 0, false + language, include_all, + 0, false, NULL ); } else { - TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0; + TSSymbol alias_symbol = alias_sequence + ? alias_sequence[structural_child_index] + : 0; + bool alias_is_named = alias_symbol + ? ts_language_symbol_metadata(language, alias_symbol).named + : false; + + const char *child_field_name = visible ? NULL : field_name; + for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) { + if (!i->inherited && i->child_index == structural_child_index) { + child_field_name = language->field_names[i->field_id]; + break; + } + } + cursor += ts_subtree__write_to_string( child, *writer, limit, - language, false, include_all, - alias_symbol, - alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false + language, include_all, + alias_symbol, alias_is_named, child_field_name ); structural_child_index++; } @@ -866,15 +900,23 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim return cursor - string; } -char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) { +char *ts_subtree_string( + Subtree self, + const TSLanguage *language, + bool include_all +) { char scratch_string[1]; size_t size = ts_subtree__write_to_string( self, scratch_string, 0, - language, true, - include_all, 0, false + language, include_all, + 0, false, ROOT_FIELD ) + 1; char *result = malloc(size * sizeof(char)); - ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false); + ts_subtree__write_to_string( + self, result, size, + language, include_all, + 0, false, ROOT_FIELD + ); return result; } diff --git a/test/fixtures/error_corpus/c_errors.txt b/test/fixtures/error_corpus/c_errors.txt index ee63debf..4d0c8e8b 100644 --- a/test/fixtures/error_corpus/c_errors.txt +++ b/test/fixtures/error_corpus/c_errors.txt @@ -14,8 +14,8 @@ int main() { (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)) - (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))))) + (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")) + (expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))))) ============================================== Top-level declarations with missing semicolons @@ -27,8 +27,8 @@ static int b --- (translation_unit - (declaration (primitive_type) (identifier) (MISSING)) - (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING))) + (declaration (primitive_type) (identifier) (MISSING ";")) + (declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";"))) ========================================== Partial declaration lists inside ifdefs @@ -58,7 +58,7 @@ int c() { (comment) (declaration (primitive_type) (identifier)) (function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal)))) - (preproc_ifdef (identifier) (MISSING)))))) + (preproc_ifdef (identifier) (MISSING "#endif")))))) ========================================== If statements with incomplete expressions @@ -83,12 +83,12 @@ int main() { (if_statement (parenthesized_expression (field_expression (identifier) - (MISSING))) + (MISSING field_identifier))) (compound_statement (expression_statement (call_expression (identifier) (argument_list))) (expression_statement (call_expression (identifier) (argument_list))) (if_statement - (parenthesized_expression (pointer_expression (MISSING))) + (parenthesized_expression (pointer_expression (MISSING identifier))) (expression_statement (call_expression (identifier) (argument_list))))))))) ==================================== diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index ffa9d547..4aac3e37 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -36,7 +36,7 @@ Missing object-literal values (program (expression_statement (object (pair (property_identifier) (identifier)) - (pair (property_identifier) (MISSING))))) + (pair (property_identifier) (MISSING identifier))))) =================================================== Extra identifiers in expressions @@ -81,7 +81,7 @@ if ({a: 'b'} {c: 'd'}) { (assignment_expression (identifier) (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))) - (MISSING)) + (MISSING ";")) (function (formal_parameters (identifier)) (statement_block (expression_statement (identifier))))))) =================================================== @@ -153,7 +153,7 @@ const h = `i ${j(k} l` (identifier) (template_string (template_substitution (call_expression (identifier) - (arguments (identifier) (MISSING)))))))) + (arguments (identifier) (MISSING ")")))))))) ========================================================= Long sequences of invalid tokens