Make the compile function plain C and take a JSON grammar

This commit is contained in:
Max Brunsfeld 2016-01-10 20:04:41 -08:00
parent b69e19c525
commit d4632ab9a9
54 changed files with 325 additions and 234 deletions

View file

@ -1,66 +1,30 @@
#ifndef TREE_SITTER_COMPILER_H_
#define TREE_SITTER_COMPILER_H_
#include <memory>
#include <string>
#include <utility>
#include <vector>
#ifdef __cplusplus
extern "C" {
#endif
namespace tree_sitter {
typedef enum {
TSCompileErrorTypeNone,
TSCompileErrorTypeInvalidGrammar,
TSCompileErrorTypeInvalidRegex,
TSCompileErrorTypeUndefinedSymbol,
TSCompileErrorTypeInvalidUbiquitousToken,
TSCompileErrorTypeLexConflict,
TSCompileErrorTypeParseConflict,
} TSCompileErrorType;
class Rule;
typedef std::shared_ptr<Rule> rule_ptr;
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &);
rule_ptr repeat(const rule_ptr &);
rule_ptr repeat1(const rule_ptr &);
rule_ptr seq(const std::vector<rule_ptr> &);
rule_ptr sym(const std::string &);
rule_ptr pattern(const std::string &);
rule_ptr str(const std::string &);
rule_ptr err(const rule_ptr &);
rule_ptr prec(int precedence, const rule_ptr &);
rule_ptr prec_left(const rule_ptr &);
rule_ptr prec_left(int precedence, const rule_ptr &);
rule_ptr prec_right(const rule_ptr &);
rule_ptr prec_right(int precedence, const rule_ptr &);
rule_ptr token(const rule_ptr &rule);
struct Grammar {
std::vector<std::pair<std::string, rule_ptr>> rules;
std::vector<rule_ptr> extra_tokens;
std::vector<std::vector<std::string>> expected_conflicts;
};
enum GrammarErrorType {
GrammarErrorTypeRegex,
GrammarErrorTypeUndefinedSymbol,
GrammarErrorTypeInvalidUbiquitousToken,
GrammarErrorTypeLexConflict,
GrammarErrorTypeParseConflict,
};
class GrammarError {
public:
GrammarError(GrammarErrorType type, std::string message) : type(type), message(message) {}
bool operator==(const GrammarError &other) const {
return type == other.type && message == other.message;
}
GrammarErrorType type;
std::string message;
};
std::pair<std::string, const GrammarError *> compile(const Grammar &,
std::string);
struct CompileResult {
typedef struct {
const char *code;
const char *error_message;
};
TSCompileErrorType error_type;
} TSCompileResult;
extern "C" CompileResult compile(const char *input);
TSCompileResult ts_compile_grammar(const char *input);
} // namespace tree_sitter
#ifdef __cplusplus
}
#endif
#endif // TREE_SITTER_COMPILER_H_

View file

@ -94,17 +94,17 @@ struct TSLanguage {
* Lexer Macros
*/
#define START_LEXER() \
lexer->start_fn(lexer, state); \
int32_t lookahead; \
next_state: \
#define START_LEXER() \
lexer->start_fn(lexer, state); \
int32_t lookahead; \
next_state: \
lookahead = lexer->lookahead;
#define START_TOKEN() lexer->start_token_fn(lexer);
#define GO_TO_STATE(state_value) \
{ \
state = state_value; \
state = state_value; \
goto next_state; \
}

View file

@ -1,4 +1,5 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/compile.h"
#include <fstream>
#include <iostream>
@ -29,10 +30,10 @@ describe("compiling the example grammars", []() {
it(("compiles the " + language + " grammar").c_str(), [&]() {
auto result = compile(grammar, language);
string code = result.first;
const GrammarError *error = result.second;
const CompileError error = result.second;
if (error)
AssertThat(error->message, Equals(""));
if (error.type)
AssertThat(error.message, Equals(""));
ofstream file(example_parser_dir + language + ".c");
file << get<0>(result);

View file

@ -1,32 +1,45 @@
#include "compiler/compiler_spec_helper.h"
#include "tree_sitter/compiler.h"
#include "compiler/compile.h"
using namespace rules;
START_TEST
describe("Compile", []() {
describe("compile_grammar", []() {
describe("when the grammar's start symbol is a token", [&]() {
it("does not fail", [&]() {
Grammar grammar{{
{ "rule1", str("the-value") }
}, {}, {}};
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "the_grammar",
"rules": {
"rule1": {
"type": "STRING",
"value": "hello"
}
}
}
)JSON");
auto result = compile(grammar, "test_grammar");
const GrammarError *error = result.second;
AssertThat(error, Equals<const GrammarError *>(nullptr));
AssertThat(string(result.error_message), IsEmpty());
AssertThat(string(result.code), !IsEmpty());
});
});
describe("when the grammar's start symbol is blank", [&]() {
it("does not fail", [&]() {
Grammar grammar{{
{ "rule1", blank() }
}, {}, {}};
TSCompileResult result = ts_compile_grammar(R"JSON(
{
"name": "the_grammar",
"rules": {
"rule1": {
"type": "BLANK"
}
}
}
)JSON");
auto result = compile(grammar, "test_grammar");
const GrammarError *error = result.second;
AssertThat(error, Equals<const GrammarError *>(nullptr));
AssertThat(string(result.error_message), IsEmpty());
AssertThat(string(result.code), !IsEmpty());
});
});
});

View file

@ -5,7 +5,7 @@
#include "compiler/helpers/stream_methods.h"
#include "compiler/helpers/equals_pointer.h"
#include "compiler/helpers/rule_helpers.h"
#include "tree_sitter/compiler.h"
#include "compiler/rules.h"
using namespace tree_sitter;
using namespace std;

View file

@ -23,11 +23,11 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
return stream << string("}>");
}
ostream &operator<<(ostream &stream, const GrammarError *error) {
if (error)
return stream << (string("#<grammar-error '") + error->message + "'>");
ostream &operator<<(ostream &stream, const CompileError &error) {
if (error.type)
return stream << (string("#<compile-error '") + error.message + "'>");
else
return stream << string("#<null>");
return stream << string("#<no-compile-error>");
}
ostream &operator<<(ostream &stream, const Rule &rule) {

View file

@ -7,7 +7,8 @@
#include <map>
#include <unordered_set>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
#include "compiler/compile_error.h"
using std::cout;
@ -98,7 +99,7 @@ struct ProductionStep;
struct PrecedenceRange;
ostream &operator<<(ostream &, const Grammar &);
ostream &operator<<(ostream &, const GrammarError &);
ostream &operator<<(ostream &, const CompileError &);
ostream &operator<<(ostream &, const Rule &);
ostream &operator<<(ostream &, const rule_ptr &);
ostream &operator<<(ostream &, const Variable &);

View file

@ -20,7 +20,7 @@ describe("expand_tokens", []() {
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<Variable>({
Variable("rule_A", VariableTypeNamed, seq({
i_sym(10),
@ -69,7 +69,7 @@ describe("expand_tokens", []() {
auto result = expand_tokens(grammar);
AssertThat(result.second, Equals((const GrammarError *)nullptr));
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<Variable>({
Variable("rule_A", VariableTypeNamed, seq({
i_sym(10),
@ -102,7 +102,7 @@ describe("expand_tokens", []() {
auto result = expand_tokens(grammar);
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
});
});
});

View file

@ -30,9 +30,9 @@ describe("extract_tokens", []() {
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
const GrammarError *error = get<2>(result);
CompileError error = get<2>(result);
AssertThat(error, Equals<const GrammarError *>(nullptr));
AssertThat(error, Equals(CompileError::none()));
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
Variable("rule_A", VariableTypeNamed, repeat1(seq({
@ -150,7 +150,7 @@ describe("extract_tokens", []() {
pattern("\\s+"),
}, {}});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
@ -167,7 +167,7 @@ describe("extract_tokens", []() {
str("y"),
}, {}});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
});
@ -181,7 +181,7 @@ describe("extract_tokens", []() {
i_sym(2),
}, {}});
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
{ Symbol(3, true) },
@ -196,9 +196,9 @@ describe("extract_tokens", []() {
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
}, { i_sym(1) }, {}});
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), EqualsPointer(
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: rule_B")));
});
@ -208,9 +208,9 @@ describe("extract_tokens", []() {
Variable("rule_B", VariableTypeNamed, str("y")),
}, { choice({ i_sym(1), blank() }) }, {}});
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
AssertThat(get<2>(result), EqualsPointer(
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: (choice (sym 1) (blank))")));
});
});

View file

@ -18,7 +18,7 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.variables, Equals(vector<Variable>({
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
Variable("y", VariableTypeNamed, i_sym(2)),
@ -34,7 +34,7 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second->message, Equals("Undefined rule 'y'"));
AssertThat(result.second.message, Equals("Undefined rule 'y'"));
});
});
@ -49,7 +49,7 @@ describe("intern_symbols", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.second, Equals(CompileError::none()));
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
});

View file

@ -222,8 +222,8 @@ describe("parse_regex", []() {
for (auto &row : invalid_inputs) {
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
auto result = parse_regex(row.pattern);
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
AssertThat(result.second->message, Contains(row.message));
AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
AssertThat(result.second.message, Contains(row.message));
});
}
});

View file

@ -1,4 +1,4 @@
#include "tree_sitter/compiler.h"
#include "compiler/rules.h"
namespace tree_sitter_examples {

View file

@ -1,7 +1,8 @@
#ifndef TREESITTER_EXAMPLES_HELPERS_
#define TREESITTER_EXAMPLES_HELPERS_
#include "tree_sitter/compiler.h"
#include "compiler/rules.h"
#include "compiler/grammar.h"
namespace tree_sitter_examples {

View file

@ -1,7 +1,6 @@
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
#include "tree_sitter/compiler.h"
#include "compiler/lex_table.h"
namespace tree_sitter {

View file

@ -45,7 +45,7 @@ class ParseTableBuilder {
const LexicalGrammar &lex_grammar)
: grammar(grammar), lexical_grammar(lex_grammar) {}
pair<ParseTable, const GrammarError *> build() {
pair<ParseTable, CompileError> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone),
@ -68,9 +68,9 @@ class ParseTableBuilder {
add_shift_actions(item_set, state_id);
if (!conflicts.empty())
return { parse_table, new GrammarError(GrammarErrorTypeParseConflict,
"Unresolved conflict.\n\n" +
*conflicts.begin()) };
return { parse_table,
CompileError(TSCompileErrorTypeParseConflict,
"Unresolved conflict.\n\n" + *conflicts.begin()) };
}
for (ParseStateId state = 0; state < parse_table.states.size(); state++) {
@ -83,7 +83,7 @@ class ParseTableBuilder {
parse_table.symbols.insert({ rules::ERROR(), {} });
return { parse_table, nullptr };
return { parse_table, CompileError::none() };
}
private:
@ -370,7 +370,7 @@ class ParseTableBuilder {
}
};
pair<ParseTable, const GrammarError *> build_parse_table(
pair<ParseTable, CompileError> build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
}

View file

@ -4,7 +4,7 @@
#include <utility>
#include <vector>
#include "compiler/parse_table.h"
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
@ -13,8 +13,8 @@ struct LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, const GrammarError *> build_parse_table(
const SyntaxGrammar &, const LexicalGrammar &);
std::pair<ParseTable, CompileError> build_parse_table(const SyntaxGrammar &,
const LexicalGrammar &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -4,6 +4,7 @@
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
namespace build_tables {
@ -13,11 +14,11 @@ using std::tuple;
using std::vector;
using std::make_tuple;
tuple<ParseTable, LexTable, const GrammarError *> build_tables(
tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
auto parse_table_result = build_parse_table(grammar, lex_grammar);
ParseTable parse_table = parse_table_result.first;
const GrammarError *error = parse_table_result.second;
const CompileError error = parse_table_result.second;
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
return make_tuple(parse_table, lex_table, error);
}

View file

@ -4,9 +4,9 @@
#include <string>
#include <tuple>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
@ -15,7 +15,7 @@ struct LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, const GrammarError *> build_tables(
std::tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &, const LexicalGrammar &);
} // namespace build_tables

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -2,7 +2,6 @@
#include <set>
#include <vector>
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/syntax_grammar.h"
namespace tree_sitter {

View file

@ -2,7 +2,6 @@
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/symbol.h"

View file

@ -2,7 +2,6 @@
#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/syntax_grammar.h"
#include "compiler/build_tables/parse_item.h"

View file

@ -1,7 +1,6 @@
#include "compiler/build_tables/parse_item.h"
#include <string>
#include "compiler/syntax_grammar.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -1,5 +1,4 @@
#include "compiler/build_tables/rule_can_be_blank.h"
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
#define COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace build_tables {

View file

@ -15,28 +15,44 @@ using std::vector;
using std::get;
using std::make_tuple;
CompileResult compile(const char *input) {
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
ParseGrammarResult parse_result = parse_grammar(string(input));
if (!parse_result.error_message.empty()) {
return {nullptr, parse_result.error_message.c_str()};
return { "", strdup(parse_result.error_message.c_str()),
TSCompileErrorTypeInvalidGrammar };
}
auto compile_result = compile(parse_result.grammar, parse_result.name);
if (compile_result.second) {
return {nullptr, compile_result.second->message.c_str()};
auto prepare_grammar_result =
prepare_grammar::prepare_grammar(parse_result.grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
CompileError error = get<2>(prepare_grammar_result);
if (error.type) {
return { "", strdup(error.message.c_str()), error.type };
}
return {compile_result.first.c_str(), nullptr};
auto table_build_result =
build_tables::build_tables(syntax_grammar, lexical_grammar);
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
error = get<2>(table_build_result);
if (error.type) {
return { "", strdup(error.message.c_str()), error.type };
}
string code = generate_code::c_code(parse_result.name, parse_table, lex_table,
syntax_grammar, lexical_grammar);
return { strdup(code.c_str()), "", TSCompileErrorTypeNone };
}
pair<string, const GrammarError *> compile(const Grammar &grammar,
std::string name) {
pair<string, const CompileError> compile(const Grammar &grammar,
std::string name) {
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
const GrammarError *error = get<2>(prepare_grammar_result);
if (error)
CompileError error = get<2>(prepare_grammar_result);
if (error.type)
return { "", error };
auto table_build_result =
@ -44,14 +60,13 @@ pair<string, const GrammarError *> compile(const Grammar &grammar,
const ParseTable &parse_table = get<0>(table_build_result);
const LexTable &lex_table = get<1>(table_build_result);
error = get<2>(table_build_result);
if (error)
if (error.type)
return { "", error };
string code = generate_code::c_code(name, parse_table, lex_table,
syntax_grammar, lexical_grammar);
return { code, nullptr };
return { code, CompileError::none() };
}
} // namespace tree_sitter

16
src/compiler/compile.h Normal file
View file

@ -0,0 +1,16 @@
#ifndef COMPILER_COMPILE_H_
#define COMPILER_COMPILE_H_
#include <string>
#include <utility>
#include "compiler/compile_error.h"
namespace tree_sitter {
struct Grammar;
std::pair<std::string, CompileError> compile(const Grammar &, std::string);
} // namespace tree_sitter
#endif // COMPILER_COMPILE_H_

View file

@ -0,0 +1,28 @@
#ifndef COMPILER_COMPILE_ERROR_H_
#define COMPILER_COMPILE_ERROR_H_
#include <string>
#include "tree_sitter/compiler.h"
namespace tree_sitter {
class CompileError {
public:
CompileError(TSCompileErrorType type, std::string message)
: type(type), message(message) {}
static CompileError none() {
return CompileError(TSCompileErrorTypeNone, "");
}
bool operator==(const CompileError &other) const {
return type == other.type && message == other.message;
}
TSCompileErrorType type;
std::string message;
};
} // namespace tree_sitter
#endif // COMPILER_COMPILE_ERROR_H_

View file

@ -188,7 +188,9 @@ class CCodeGenerator {
}
void add_lex_function() {
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
line(
"static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) "
"{");
indent([&]() {
line("START_LEXER();");
_switch("state", [&]() {

19
src/compiler/grammar.h Normal file
View file

@ -0,0 +1,19 @@
#ifndef COMPILER_GRAMMAR_H_
#define COMPILER_GRAMMAR_H_
#include <vector>
#include <string>
#include <utility>
#include "compiler/rule.h"
namespace tree_sitter {
struct Grammar {
std::vector<std::pair<std::string, rule_ptr>> rules;
std::vector<rule_ptr> extra_tokens;
std::vector<std::vector<std::string>> expected_conflicts;
};
} // namespace tree_sitter
#endif // COMPILER_GRAMMAR_H_

View file

@ -4,7 +4,7 @@
#include <vector>
#include <string>
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
#include "compiler/variable.h"
namespace tree_sitter {

View file

@ -2,8 +2,9 @@
#include <string>
#include <vector>
#include <utility>
#include "tree_sitter/compiler.h"
#include "json.h"
#include "compiler/rule.h"
#include "compiler/rules.h"
namespace tree_sitter {
@ -40,7 +41,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
type = rule_type_json.u.string.ptr;
if (type == "BLANK") {
return {blank(), ""};
return { blank(), "" };
}
if (type == "CHOICE") {
@ -61,7 +62,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
goto error;
}
}
return {choice(members), ""};
return { choice(members), "" };
}
if (type == "SEQ") {
@ -82,14 +83,14 @@ ParseRuleResult parse_rule(json_value *rule_json) {
goto error;
}
}
return {seq(members), ""};
return { seq(members), "" };
}
if (type == "ERROR") {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return {err(content.rule), ""};
return { err(content.rule), "" };
} else {
error_message = "Invalid error content: " + content.error_message;
goto error;
@ -100,7 +101,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return {repeat(content.rule), ""};
return { repeat(content.rule), "" };
} else {
error_message = "Invalid repeat content: " + content.error_message;
goto error;
@ -111,7 +112,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return {repeat1(content.rule), ""};
return { repeat1(content.rule), "" };
} else {
error_message = "Invalid repeat1 content: " + content.error_message;
goto error;
@ -122,7 +123,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
json_value content_json = rule_json->operator[]("content");
ParseRuleResult content = parse_rule(&content_json);
if (content.rule.get()) {
return {token(content.rule), ""};
return { token(content.rule), "" };
} else {
error_message = "Invalid token content: " + content.error_message;
goto error;
@ -132,7 +133,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
if (type == "PATTERN") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return {pattern(value_json.u.string.ptr), ""};
return { pattern(value_json.u.string.ptr), "" };
} else {
error_message = "Pattern value must be a string";
goto error;
@ -142,7 +143,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
if (type == "STRING") {
json_value value_json = rule_json->operator[]("value");
if (value_json.type == json_string) {
return {str(value_json.u.string.ptr), ""};
return { str(value_json.u.string.ptr), "" };
} else {
error_message = "String rule value must be a string";
goto error;
@ -152,7 +153,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
if (type == "SYMBOL") {
json_value value_json = rule_json->operator[]("name");
if (value_json.type == json_string) {
return {sym(value_json.u.string.ptr), ""};
return { sym(value_json.u.string.ptr), "" };
} else {
error_message = "Symbol value must be a string";
goto error;
@ -173,7 +174,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
goto error;
}
return {prec(precedence_json.u.integer, content.rule), ""};
return { prec(precedence_json.u.integer, content.rule), "" };
}
if (type == "PREC_LEFT") {
@ -190,7 +191,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
goto error;
}
return {prec_left(precedence_json.u.integer, content.rule), ""};
return { prec_left(precedence_json.u.integer, content.rule), "" };
}
if (type == "PREC_RIGHT") {
@ -207,13 +208,13 @@ ParseRuleResult parse_rule(json_value *rule_json) {
goto error;
}
return {prec_right(precedence_json.u.integer, content.rule), ""};
return { prec_right(precedence_json.u.integer, content.rule), "" };
}
error_message = "Unknown rule type " + type;
error:
return {rule_ptr(), error_message};
return { rule_ptr(), error_message };
}
ParseGrammarResult parse_grammar(const string &input) {
@ -222,11 +223,12 @@ ParseGrammarResult parse_grammar(const string &input) {
Grammar grammar;
json_value name_json, rules_json, extras_json, conflicts_json;
json_settings settings = {0, 0, 0, 0, 0, 0};
json_settings settings = { 0, 0, 0, 0, 0, 0 };
char parse_error[json_error_max];
json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
json_value *grammar_json =
json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
if (!grammar_json) {
error_message = "Failed to parse JSON";
error_message = string("Invalid JSON at ") + parse_error;
goto error;
}
@ -254,7 +256,8 @@ ParseGrammarResult parse_grammar(const string &input) {
ParseRuleResult entry = parse_rule(entry_json.value);
if (!entry.rule.get()) {
error_message = string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
error_message =
string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
goto error;
}
@ -295,7 +298,8 @@ ParseGrammarResult parse_grammar(const string &input) {
}
vector<string> conflict;
for (size_t j = 0, conflict_length = conflict_json->u.array.length; j < conflict_length; j++) {
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
j < conflict_length; j++) {
json_value *conflict_entry_json = conflict_json->u.array.values[j];
if (conflict_entry_json->type != json_string) {
error_message = "Each conflict entry must be an array of strings";
@ -309,14 +313,14 @@ ParseGrammarResult parse_grammar(const string &input) {
}
}
return {name, grammar, ""};
return { name, grammar, "" };
error:
if (grammar_json) {
json_value_free(grammar_json);
}
return {"", Grammar{}, error_message};
return { "", Grammar{}, error_message };
}
} // namespace tree_sitter

View file

@ -3,6 +3,7 @@
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
namespace tree_sitter {

View file

@ -3,6 +3,7 @@
#include <string>
#include <utility>
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/rule.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
#define COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
namespace tree_sitter {
namespace prepare_grammar {

View file

@ -53,36 +53,35 @@ class ExpandTokens : public rules::IdentityRuleFn {
rule_ptr apply_to(const Pattern *rule) {
auto pair = parse_regex(rule->value);
if (!error)
if (!error.type)
error = pair.second;
return pair.first;
}
public:
const GrammarError *error;
ExpandTokens() : error(nullptr) {}
CompileError error;
ExpandTokens() : error(CompileError::none()) {}
};
pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &grammar) {
pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
LexicalGrammar result;
ExpandTokens expander;
for (const Variable &variable : grammar.variables) {
auto rule = expander.apply(variable.rule);
if (expander.error)
if (expander.error.type)
return { result, expander.error };
result.variables.push_back(Variable(variable.name, variable.type, rule));
}
for (auto &sep : grammar.separators) {
auto rule = expander.apply(sep);
if (expander.error)
if (expander.error.type)
return { result, expander.error };
result.separators.push_back(rule);
}
return { result, nullptr };
return { result, CompileError::none() };
}
} // namespace prepare_grammar

View file

@ -2,7 +2,7 @@
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
@ -10,8 +10,7 @@ struct LexicalGrammar;
namespace prepare_grammar {
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
const LexicalGrammar &);
std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -2,7 +2,7 @@
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {

View file

@ -90,12 +90,12 @@ class TokenExtractor : public rules::IdentityRuleFn {
vector<Variable> tokens;
};
static const GrammarError *ubiq_token_err(const string &message) {
return new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
"Not a token: " + message);
static CompileError ubiq_token_err(const string &message) {
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: " + message);
}
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
const InternedGrammar &grammar) {
InitialSyntaxGrammar syntax_grammar;
LexicalGrammar lexical_grammar;
@ -186,7 +186,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
syntax_grammar.extra_tokens.insert(new_symbol);
}
return make_tuple(syntax_grammar, lexical_grammar, nullptr);
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
}
} // namespace prepare_grammar

View file

@ -2,7 +2,7 @@
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
#include <tuple>
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
#include "compiler/lexical_grammar.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/prepare_grammar/interned_grammar.h"
@ -10,8 +10,8 @@
namespace tree_sitter {
namespace prepare_grammar {
std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
extract_tokens(const InternedGrammar &);
std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
const InternedGrammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -3,6 +3,7 @@
#include <vector>
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/named_symbol.h"
@ -42,12 +43,12 @@ class InternSymbols : public rules::IdentityRuleFn {
string missing_rule_name;
};
const GrammarError *missing_rule_error(string rule_name) {
return new GrammarError(GrammarErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'");
CompileError missing_rule_error(string rule_name) {
return CompileError(TSCompileErrorTypeUndefinedSymbol,
"Undefined rule '" + rule_name + "'");
}
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
InternedGrammar result;
InternSymbols interner(grammar);
@ -78,7 +79,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
result.expected_conflicts.insert(entry);
}
return { result, nullptr };
return { result, CompileError::none() };
}
} // namespace prepare_grammar

View file

@ -3,13 +3,16 @@
#include <utility>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/compile_error.h"
#include "compiler/prepare_grammar/interned_grammar.h"
namespace tree_sitter {
struct Grammar;
namespace prepare_grammar {
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -1,7 +1,7 @@
#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {

View file

@ -32,7 +32,7 @@ class PatternParser {
next();
}
pair<rule_ptr, const GrammarError *> rule(bool nested) {
pair<rule_ptr, CompileError> rule(bool nested) {
vector<rule_ptr> choices = {};
do {
if (!choices.empty()) {
@ -42,17 +42,17 @@ class PatternParser {
break;
}
auto pair = term(nested);
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
choices.push_back(pair.first);
} while (has_more_input());
auto rule =
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
return { rule, nullptr };
return { rule, CompileError::none() };
}
private:
pair<rule_ptr, const GrammarError *> term(bool nested) {
pair<rule_ptr, CompileError> term(bool nested) {
rule_ptr result = Blank::build();
do {
if (peek() == '|')
@ -60,16 +60,16 @@ class PatternParser {
if (nested && peek() == ')')
break;
auto pair = factor();
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
result = Seq::build({ result, pair.first });
} while (has_more_input());
return { result, nullptr };
return { result, CompileError::none() };
}
pair<rule_ptr, const GrammarError *> factor() {
pair<rule_ptr, CompileError> factor() {
auto pair = atom();
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
rule_ptr result = pair.first;
if (has_more_input()) {
@ -88,30 +88,30 @@ class PatternParser {
break;
}
}
return { result, nullptr };
return { result, CompileError::none() };
}
pair<rule_ptr, const GrammarError *> atom() {
pair<rule_ptr, CompileError> atom() {
switch (peek()) {
case '(': {
next();
auto pair = rule(true);
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
if (peek() != ')')
return error("unmatched open paren");
next();
return { pair.first, nullptr };
return { pair.first, CompileError::none() };
}
case '[': {
next();
auto pair = char_set();
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
if (peek() != ']')
return error("unmatched open square bracket");
next();
return { pair.first.copy(), nullptr };
return { pair.first.copy(), CompileError::none() };
}
case ')': {
return error("unmatched close paren");
@ -121,18 +121,19 @@ class PatternParser {
}
case '.': {
next();
return { CharacterSet().include_all().exclude('\n').copy(), nullptr };
return { CharacterSet().include_all().exclude('\n').copy(),
CompileError::none() };
}
default: {
auto pair = single_char();
if (pair.second)
if (pair.second.type)
return { Blank::build(), pair.second };
return { pair.first.copy(), nullptr };
return { pair.first.copy(), CompileError::none() };
}
}
}
pair<CharacterSet, const GrammarError *> char_set() {
pair<CharacterSet, CompileError> char_set() {
CharacterSet result;
bool is_affirmative = true;
if (peek() == '^') {
@ -143,7 +144,7 @@ class PatternParser {
while (has_more_input() && (peek() != ']')) {
auto pair = single_char();
if (pair.second)
if (pair.second.type)
return { CharacterSet(), pair.second };
if (is_affirmative)
result.add_set(pair.first);
@ -151,10 +152,10 @@ class PatternParser {
result.remove_set(pair.first);
}
return { result, nullptr };
return { result, CompileError::none() };
}
pair<CharacterSet, const GrammarError *> single_char() {
pair<CharacterSet, CompileError> single_char() {
CharacterSet value;
switch (peek()) {
case '\\':
@ -173,7 +174,7 @@ class PatternParser {
value = CharacterSet().include(first_char);
}
}
return { value, nullptr };
return { value, CompileError::none() };
}
CharacterSet escaped_char(uint32_t value) {
@ -217,8 +218,8 @@ class PatternParser {
return lookahead && iter <= end;
}
pair<rule_ptr, const GrammarError *> error(string msg) {
return { Blank::build(), new GrammarError(GrammarErrorTypeRegex, msg) };
pair<rule_ptr, CompileError> error(string msg) {
return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
}
string input;
@ -227,7 +228,7 @@ class PatternParser {
int32_t lookahead;
};
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
return PatternParser(input.c_str()).rule(false);
}

View file

@ -3,12 +3,13 @@
#include <string>
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
namespace prepare_grammar {
std::pair<rule_ptr, const GrammarError *> parse_regex(const std::string &);
std::pair<rule_ptr, CompileError> parse_regex(const std::string &);
} // namespace prepare_grammar
} // namespace tree_sitter

View file

@ -16,14 +16,14 @@ using std::tuple;
using std::get;
using std::make_tuple;
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
const Grammar &input_grammar) {
/*
* Convert all string-based `NamedSymbols` into numerical `Symbols`
*/
auto intern_result = intern_symbols(input_grammar);
const GrammarError *error = intern_result.second;
if (error)
CompileError error = intern_result.second;
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
@ -31,7 +31,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
*/
auto extract_result = extract_tokens(intern_result.first);
error = get<2>(extract_result);
if (error)
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
@ -45,7 +45,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
LexicalGrammar lex_grammar = expand_tokens_result.first;
error = expand_tokens_result.second;
if (error)
if (error.type)
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
/*
@ -58,7 +58,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
*/
lex_grammar = normalize_rules(lex_grammar);
return make_tuple(syntax_grammar, lex_grammar, nullptr);
return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
}
} // namespace prepare_grammar

View file

@ -4,15 +4,15 @@
#include <tuple>
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct Grammar;
class GrammarError;
namespace prepare_grammar {
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
const Grammar &);
} // namespace prepare_grammar

View file

@ -1,5 +1,4 @@
#include "compiler/prepare_grammar/token_description.h"
#include "tree_sitter/compiler.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/pattern.h"
#include "compiler/rules/seq.h"

View file

@ -2,7 +2,7 @@
#define COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {
namespace prepare_grammar {

29
src/compiler/rules.h Normal file
View file

@ -0,0 +1,29 @@
#ifndef COMPILER_RULES_H_
#define COMPILER_RULES_H_
#include <string>
#include <vector>
#include <memory>
#include "compiler/rule.h"
namespace tree_sitter {
rule_ptr blank();
rule_ptr choice(const std::vector<rule_ptr> &);
rule_ptr repeat(const rule_ptr &);
rule_ptr repeat1(const rule_ptr &);
rule_ptr seq(const std::vector<rule_ptr> &);
rule_ptr sym(const std::string &);
rule_ptr pattern(const std::string &);
rule_ptr str(const std::string &);
rule_ptr err(const rule_ptr &);
rule_ptr prec(int precedence, const rule_ptr &);
rule_ptr prec_left(const rule_ptr &);
rule_ptr prec_left(int precedence, const rule_ptr &);
rule_ptr prec_right(const rule_ptr &);
rule_ptr prec_right(int precedence, const rule_ptr &);
rule_ptr token(const rule_ptr &rule);
} // namespace std
#endif // COMPILER_RULES_H_

View file

@ -3,7 +3,6 @@
#include <string>
#include <map>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
namespace tree_sitter {

View file

@ -2,8 +2,8 @@
#include <map>
#include <set>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
#include "compiler/rules.h"
#include "compiler/rules/blank.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/choice.h"

View file

@ -4,7 +4,6 @@
#include <vector>
#include <string>
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/variable.h"

View file

@ -2,7 +2,7 @@
#define COMPILER_VARIABLE_H_
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rule.h"
#include "compiler/rules/symbol.h"
namespace tree_sitter {