lib: Include fields in ts_node_string output

This allows you to assert about fields in tests. But if your test 
s-expression does *not* include fields, the fields will be stripped from 
the regexp before comparison.
This commit is contained in:
Max Brunsfeld 2019-02-13 09:47:21 -08:00
parent 9f608435ee
commit 65d1ce8593
6 changed files with 113 additions and 45 deletions

View file

@ -22,6 +22,7 @@ lazy_static! {
.build()
.unwrap();
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap();
}
#[derive(Debug, PartialEq, Eq)]
@ -34,6 +35,7 @@ pub enum TestEntry {
name: String,
input: Vec<u8>,
output: String,
has_fields: bool,
},
}
@ -135,6 +137,7 @@ fn run_tests(
name,
input,
output,
has_fields,
} => {
if let Some(filter) = filter {
if !name.contains(filter) {
@ -142,7 +145,10 @@ fn run_tests(
}
}
let tree = parser.parse(&input, None).unwrap();
let actual = tree.root_node().to_sexp();
let mut actual = tree.root_node().to_sexp();
if !has_fields {
actual = strip_sexp_fields(actual);
}
for _ in 0..indent_level {
print!(" ");
}
@ -186,6 +192,10 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
}
}
pub fn strip_sexp_fields(sexp: String) -> String {
SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
}
fn parse_test_content(name: String, content: String) -> TestEntry {
let mut children = Vec::new();
let bytes = content.as_bytes();
@ -209,10 +219,12 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
let input = bytes[previous_header_end..divider_start].to_vec();
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
let output = output.replace(" )", ")");
let has_fields = SEXP_FIELD_REGEX.is_match(&output);
children.push(TestEntry::Example {
name: previous_name,
input,
output,
has_fields,
});
}
}
@ -265,11 +277,13 @@ d
name: "The first test".to_string(),
input: "\na b c\n".as_bytes().to_vec(),
output: "(a (b c))".to_string(),
has_fields: false,
},
TestEntry::Example {
name: "The second test".to_string(),
input: "d".as_bytes().to_vec(),
output: "(d)".to_string(),
has_fields: false,
},
]
}

View file

@ -4,7 +4,7 @@ use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
use super::helpers::random::Rand;
use super::helpers::scope_sequence::ScopeSequence;
use crate::generate;
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
use crate::test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry};
use crate::util;
use lazy_static::lazy_static;
use std::{env, fs, time, usize};
@ -67,7 +67,7 @@ fn test_real_language_corpus_files() {
eprintln!("language: {:?}", language_name);
}
for (example_name, input, expected_output) in tests {
for (example_name, input, expected_output, has_fields) in tests {
eprintln!(" example: {:?}", example_name);
if TRIAL_FILTER.map_or(true, |t| t == 0) {
@ -76,7 +76,10 @@ fn test_real_language_corpus_files() {
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
drop(tree);
drop(parser);
if actual_output != expected_output {
@ -144,7 +147,11 @@ fn test_real_language_corpus_files() {
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let actual_output = tree3.root_node().to_sexp();
let mut actual_output = tree3.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output != expected_output {
println!(
"Incorrect parse for {} - {} - trial {}",
@ -241,7 +248,7 @@ fn test_feature_corpus_files() {
eprintln!("test language: {:?}", language_name);
}
for (name, input, expected_output) in tests {
for (name, input, expected_output, has_fields) in tests {
eprintln!(" example: {:?}", name);
allocations::start_recording();
@ -249,7 +256,11 @@ fn test_feature_corpus_files() {
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
drop(tree);
drop(parser);
if actual_output != expected_output {
@ -348,13 +359,14 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
parser
}
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String)>) {
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String, bool)>) {
match test {
TestEntry::Example {
mut name,
input,
output,
has_fields,
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");
@ -365,7 +377,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
return;
}
}
result.push((name, input, output));
result.push((name, input, output, has_fields));
}
TestEntry::Group { mut name, children } => {
if !prefix.is_empty() {

View file

@ -721,7 +721,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
let root = tree.root_node();
assert_eq!(
root.to_sexp(),
"(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"
"(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))"
);
assert_eq!(root.start_byte(), 2);
assert_eq!(root.child(3).unwrap().start_byte(), 4);

View file

@ -805,56 +805,90 @@ static void ts_subtree__write_dot_string(FILE *f, const char *string) {
}
}
static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit,
const TSLanguage *language, bool is_root,
bool include_all, TSSymbol alias_symbol,
bool alias_is_named) {
static const char *ROOT_FIELD = "__ROOT__";
static size_t ts_subtree__write_to_string(
Subtree self, char *string, size_t limit,
const TSLanguage *language, bool include_all,
TSSymbol alias_symbol, bool alias_is_named, const char *field_name
) {
if (!self.ptr) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
alias_is_named ||
ts_subtree_missing(self) ||
(ts_subtree_visible(self) && ts_subtree_named(self)) ||
alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
(ts_subtree_visible(self) && ts_subtree_named(self));
if (visible) {
if (field_name != ROOT_FIELD) {
cursor += snprintf(*writer, limit, " ");
if (field_name) {
cursor += snprintf(*writer, limit, "%s: ", field_name);
}
}
if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
} else if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING ");
if (alias_is_named || ts_subtree_named(self)) {
cursor += snprintf(*writer, limit, "%s", symbol_name);
} else {
cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
}
} else {
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
}
if (ts_subtree_child_count(self)) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
language,
self.ptr->production_id,
&field_map,
&field_map_end
);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (ts_subtree_extra(child)) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
language, include_all,
0, false, NULL
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
TSSymbol alias_symbol = alias_sequence
? alias_sequence[structural_child_index]
: 0;
bool alias_is_named = alias_symbol
? ts_language_symbol_metadata(language, alias_symbol).named
: false;
const char *child_field_name = visible ? NULL : field_name;
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (!i->inherited && i->child_index == structural_child_index) {
child_field_name = language->field_names[i->field_id];
break;
}
}
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
language, include_all,
alias_symbol, alias_is_named, child_field_name
);
structural_child_index++;
}
@ -866,15 +900,23 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim
return cursor - string;
}
char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) {
char *ts_subtree_string(
Subtree self,
const TSLanguage *language,
bool include_all
) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, true,
include_all, 0, false
language, include_all,
0, false, ROOT_FIELD
) + 1;
char *result = malloc(size * sizeof(char));
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
ts_subtree__write_to_string(
self, result, size,
language, include_all,
0, false, ROOT_FIELD
);
return result;
}

View file

@ -14,8 +14,8 @@ int main() {
(primitive_type)
(function_declarator (identifier) (parameter_list))
(compound_statement
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)))))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")))))
==============================================
Top-level declarations with missing semicolons
@ -27,8 +27,8 @@ static int b
---
(translation_unit
(declaration (primitive_type) (identifier) (MISSING))
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING)))
(declaration (primitive_type) (identifier) (MISSING ";"))
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";")))
==========================================
Partial declaration lists inside ifdefs
@ -58,7 +58,7 @@ int c() {
(comment)
(declaration (primitive_type) (identifier))
(function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal))))
(preproc_ifdef (identifier) (MISSING))))))
(preproc_ifdef (identifier) (MISSING "#endif"))))))
==========================================
If statements with incomplete expressions
@ -83,12 +83,12 @@ int main() {
(if_statement
(parenthesized_expression (field_expression
(identifier)
(MISSING)))
(MISSING field_identifier)))
(compound_statement
(expression_statement (call_expression (identifier) (argument_list)))
(expression_statement (call_expression (identifier) (argument_list)))
(if_statement
(parenthesized_expression (pointer_expression (MISSING)))
(parenthesized_expression (pointer_expression (MISSING identifier)))
(expression_statement (call_expression (identifier) (argument_list)))))))))
====================================

View file

@ -36,7 +36,7 @@ Missing object-literal values
(program (expression_statement (object
(pair (property_identifier) (identifier))
(pair (property_identifier) (MISSING)))))
(pair (property_identifier) (MISSING identifier)))))
===================================================
Extra identifiers in expressions
@ -81,7 +81,7 @@ if ({a: 'b'} {c: 'd'}) {
(assignment_expression
(identifier)
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))
(MISSING))
(MISSING ";"))
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))
===================================================
@ -153,7 +153,7 @@ const h = `i ${j(k} l`
(identifier)
(template_string (template_substitution (call_expression
(identifier)
(arguments (identifier) (MISSING))))))))
(arguments (identifier) (MISSING ")"))))))))
=========================================================
Long sequences of invalid tokens