Add golang example grammar

Also, support '\a' character class shorthand in regexes,
for alphabetical characters
This commit is contained in:
Max Brunsfeld 2014-03-29 16:29:34 -07:00
parent 13c4e6e648
commit 7adb0bf34f
10 changed files with 4044 additions and 1015 deletions

138
examples/grammars/golang.cc Normal file
View file

@ -0,0 +1,138 @@
#include "tree_sitter/compiler.h"
namespace tree_sitter_examples {
using tree_sitter::Grammar;
using namespace tree_sitter::rules;
static rule_ptr comma_sep(const rule_ptr &element) {
return choice({
seq({ element, repeat(seq({ str(","), element })) }),
blank(),
});
}
extern const Grammar golang({
{ "program", seq({
sym("package_directive"),
repeat(sym("imports_block")),
repeat(sym("declaration")) }) },
{ "package_directive", seq({
sym("_package"),
sym("package_name") }) },
{ "imports_block", seq({
sym("_import"),
choice({
seq({
str("("),
err(repeat(sym("package_import"))),
str(")")
}),
sym("package_import")
})
}) },
{ "package_import", sym("string") },
{ "declaration", seq({
choice({
sym("type_declaration"),
sym("var_declaration"),
sym("func_declaration") }),
blank() }) },
// Declarations
{ "type_declaration", seq({
sym("_type"),
sym("type_name"),
sym("type_expression") }) },
{ "var_declaration", seq({
sym("_var"),
sym("var_name"),
str("="),
sym("expression"),
}) },
{ "func_declaration", seq({
sym("_func"),
sym("var_name"),
sym("_func_signature"),
sym("statement_block"),
}) },
{ "statement_block", seq({
str("{"),
str("}"),
}) },
{ "expression", choice({
sym("number"),
}) },
{ "type_expression", choice({
sym("pointer_type"),
sym("slice_type"),
sym("map_type"),
sym("interface_type"),
sym("struct_type"),
sym("type_name") }) },
// Type expressions
{ "pointer_type", seq({
str("*"),
sym("type_expression") }) },
{ "map_type", seq({
sym("_map"),
str("["),
sym("type_expression"),
str("]"),
sym("type_expression") }) },
{ "slice_type", seq({
str("["),
str("]"),
sym("type_expression") }) },
{ "struct_type", seq({
sym("_struct"),
str("{"),
repeat(seq({
sym("var_name"),
sym("type_expression") })),
str("}") }) },
{ "interface_type", seq({
sym("_interface"),
str("{"),
repeat(seq({
sym("var_name"),
sym("_func_signature") })),
str("}") }) },
// Value expressions
{ "_func_signature", seq({
str("("),
comma_sep(seq({
comma_sep(sym("var_name")),
sym("type_expression"),
})),
str(")"),
choice({
seq({
str("("),
choice({
comma_sep(seq({ sym("var_name"), sym("type_name") })),
comma_sep(sym("type_name")),
}),
str(")") }),
sym("type_name"),
blank() })}) },
// Keywords
{ "_map", str("map") },
{ "_interface", str("interface") },
{ "_struct", str("struct") },
{ "_package", str("package") },
{ "_import", str("import") },
{ "_var", str("var") },
{ "_func", str("func") },
{ "_type", str("type") },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "package_name", sym("_identifier") },
{ "var_name", sym("_identifier") },
{ "type_name", sym("_identifier") },
{ "_identifier", pattern("\\a[\\w_]*") },
{ "number", pattern("\\d+(.\\d+)?") },
});
}

View file

@ -123,7 +123,7 @@ namespace tree_sitter_examples {
{ "false", str("false") },
{ "string", pattern("\"([^\"]|\\\\\")+\"") },
{ "identifier", pattern("[\\w_$]+") },
{ "identifier", pattern("\\a[\\w_$]*") },
{ "number", pattern("\\d+(.\\d+)?") },
});
}

2896
examples/parsers/golang.c Normal file

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -36,7 +36,7 @@ describe("building parse and lex tables", []() {
PreparedGrammar lex_grammar({
{ "plus", str("+") },
{ "variable", pattern("\\w+") },
{ "variable", pattern("\\a+") },
{ "number", pattern("\\d+") },
{ "left-paren", str("(") },
{ "right-paren", str(")") }
@ -71,7 +71,7 @@ describe("building parse and lex tables", []() {
AssertThat(lex_state(0).expected_inputs(), Equals(set<CharacterSet>({
CharacterSet({ '(' }),
CharacterSet({ CharacterRange('0', '9') }),
CharacterSet({ {'0', '9'} }),
CharacterSet({ {'a', 'z'}, {'A', 'Z'} }),
})));
});

View file

@ -11,6 +11,7 @@ namespace tree_sitter_examples {
extern const Grammar arithmetic;
extern const Grammar javascript;
extern const Grammar json;
extern const Grammar golang;
}
START_TEST
@ -29,6 +30,7 @@ describe("compiling the example grammars", []() {
compile_grammar(tree_sitter_examples::arithmetic, "arithmetic");
compile_grammar(tree_sitter_examples::json, "json");
compile_grammar(tree_sitter_examples::javascript, "javascript");
compile_grammar(tree_sitter_examples::golang, "golang");
});
END_TEST

View file

@ -23,7 +23,7 @@ describe("parsing pattern rules", []() {
AssertThat(
rule.to_rule_tree(),
EqualsPointer(seq({
character({ {'a', 'z'}, {'A', 'Z'} }),
character({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'} }),
character({ '-' }),
character({ {'0', '9'} })
})));

View file

@ -4,6 +4,7 @@
extern "C" ts_parser ts_parser_javascript();
extern "C" ts_parser ts_parser_json();
extern "C" ts_parser ts_parser_arithmetic();
extern "C" ts_parser ts_parser_golang();
START_TEST
@ -18,38 +19,25 @@ describe("Languages", [&]() {
ts_document_free(doc);
});
auto run_tests_for_language = [&](string language) {
for (auto &entry : test_entries_for_language(language)) {
it(entry.description.c_str(), [&]() {
ts_document_set_input_string(doc, entry.input.c_str());
AssertThat(ts_document_string(doc), Equals(entry.tree_string.c_str()));
auto run_tests_for_language = [&](string language, ts_parser parser) {
describe(language.c_str(), [&]() {
before_each([&]() {
ts_document_set_parser(doc, parser);
});
}
for (auto &entry : test_entries_for_language(language)) {
it(entry.description.c_str(), [&]() {
ts_document_set_input_string(doc, entry.input.c_str());
AssertThat(ts_document_string(doc), Equals(entry.tree_string.c_str()));
});
}
});
};
describe("json", [&]() {
before_each([&]() {
ts_document_set_parser(doc, ts_parser_json());
});
run_tests_for_language("json");
});
describe("arithmetic", [&]() {
before_each([&]() {
ts_document_set_parser(doc, ts_parser_arithmetic());
});
run_tests_for_language("arithmetic");
});
describe("javascript", [&]() {
before_each([&]() {
ts_document_set_parser(doc, ts_parser_javascript());
});
run_tests_for_language("javascript");
});
run_tests_for_language("json", ts_parser_json());
run_tests_for_language("arithmetic", ts_parser_arithmetic());
run_tests_for_language("javascript", ts_parser_javascript());
run_tests_for_language("golang", ts_parser_golang());
});
END_TEST

View file

@ -0,0 +1,77 @@
==========================================
parses trivial programs
==========================================
package trivial
type x int64
var y = 0
func z() {}
---
(program
(package_directive (package_name))
(type_declaration (type_name) (type_name))
(var_declaration (var_name) (number))
(func_declaration (var_name) (statement_block)))
==========================================
parses complex types
==========================================
package trivial
type x *struct {
field1 []int64
field2 map[string]interface{
DoStuff()
}
}
---
(program
(package_directive (package_name))
(type_declaration
(type_name)
(pointer_type (struct_type
(var_name) (slice_type (type_name))
(var_name) (map_type (type_name) (interface_type (var_name)))))))
============================================
parses functions arguments
============================================
package main
func oneArg(arg1 interface{}) {}
func argsOfSameType(arg1, arg2 string) {}
func argsOfDifferentTypes() (arg1 string, arg2 int64) {}
---
(program
(package_directive (package_name))
(func_declaration (var_name) (var_name) (interface_type) (statement_block))
(func_declaration (var_name) (var_name) (var_name) (type_name) (statement_block))
(func_declaration (var_name) (var_name) (type_name) (var_name) (type_name) (statement_block)))
============================================
parses functions with unnamed return values
============================================
package main
func oneReturnValue() string {}
func multipleReturnValues() (string, int64, error) {}
---
(program
(package_directive (package_name))
(func_declaration (var_name) (type_name)
(statement_block))
(func_declaration (var_name) (type_name) (type_name) (type_name)
(statement_block)))
============================================
parses functions with named return values
============================================
package main
func oneReturnValue() (result string) {}
func multipleReturnValues() (result string, count int64, err error) {}
---
(program
(package_directive (package_name))
(func_declaration (var_name) (var_name) (type_name)
(statement_block))
(func_declaration (var_name) (var_name) (type_name) (var_name) (type_name) (var_name) (type_name)
(statement_block)))

View file

@ -133,10 +133,12 @@ namespace tree_sitter {
case '(':
case ')':
return CharacterSet({ value });
case 'a':
return CharacterSet({ {'a', 'z'}, {'A', 'Z'} });
case 'w':
return CharacterSet({{'a', 'z'}, {'A', 'Z'}});
return CharacterSet({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'}});
case 'd':
return CharacterSet({CharacterRange('0', '9')});
return CharacterSet({ {'0', '9'} });
default:
error = "unrecognized escape sequence";
return CharacterSet();