Make the compile function plain C and take a JSON grammar
This commit is contained in:
parent
b69e19c525
commit
d4632ab9a9
54 changed files with 325 additions and 234 deletions
|
|
@ -1,66 +1,30 @@
|
|||
#ifndef TREE_SITTER_COMPILER_H_
|
||||
#define TREE_SITTER_COMPILER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace tree_sitter {
|
||||
typedef enum {
|
||||
TSCompileErrorTypeNone,
|
||||
TSCompileErrorTypeInvalidGrammar,
|
||||
TSCompileErrorTypeInvalidRegex,
|
||||
TSCompileErrorTypeUndefinedSymbol,
|
||||
TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
TSCompileErrorTypeLexConflict,
|
||||
TSCompileErrorTypeParseConflict,
|
||||
} TSCompileErrorType;
|
||||
|
||||
class Rule;
|
||||
typedef std::shared_ptr<Rule> rule_ptr;
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &);
|
||||
rule_ptr repeat(const rule_ptr &);
|
||||
rule_ptr repeat1(const rule_ptr &);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &);
|
||||
rule_ptr sym(const std::string &);
|
||||
rule_ptr pattern(const std::string &);
|
||||
rule_ptr str(const std::string &);
|
||||
rule_ptr err(const rule_ptr &);
|
||||
rule_ptr prec(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_left(const rule_ptr &);
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
|
||||
struct Grammar {
|
||||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
};
|
||||
|
||||
enum GrammarErrorType {
|
||||
GrammarErrorTypeRegex,
|
||||
GrammarErrorTypeUndefinedSymbol,
|
||||
GrammarErrorTypeInvalidUbiquitousToken,
|
||||
GrammarErrorTypeLexConflict,
|
||||
GrammarErrorTypeParseConflict,
|
||||
};
|
||||
|
||||
class GrammarError {
|
||||
public:
|
||||
GrammarError(GrammarErrorType type, std::string message) : type(type), message(message) {}
|
||||
bool operator==(const GrammarError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
GrammarErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
std::pair<std::string, const GrammarError *> compile(const Grammar &,
|
||||
std::string);
|
||||
struct CompileResult {
|
||||
typedef struct {
|
||||
const char *code;
|
||||
const char *error_message;
|
||||
};
|
||||
TSCompileErrorType error_type;
|
||||
} TSCompileResult;
|
||||
|
||||
extern "C" CompileResult compile(const char *input);
|
||||
TSCompileResult ts_compile_grammar(const char *input);
|
||||
|
||||
} // namespace tree_sitter
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // TREE_SITTER_COMPILER_H_
|
||||
|
|
|
|||
|
|
@ -94,17 +94,17 @@ struct TSLanguage {
|
|||
* Lexer Macros
|
||||
*/
|
||||
|
||||
#define START_LEXER() \
|
||||
lexer->start_fn(lexer, state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
#define START_LEXER() \
|
||||
lexer->start_fn(lexer, state); \
|
||||
int32_t lookahead; \
|
||||
next_state: \
|
||||
lookahead = lexer->lookahead;
|
||||
|
||||
#define START_TOKEN() lexer->start_token_fn(lexer);
|
||||
|
||||
#define GO_TO_STATE(state_value) \
|
||||
{ \
|
||||
state = state_value; \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/compile.h"
|
||||
#include <fstream>
|
||||
#include <iostream>
|
||||
|
||||
|
|
@ -29,10 +30,10 @@ describe("compiling the example grammars", []() {
|
|||
it(("compiles the " + language + " grammar").c_str(), [&]() {
|
||||
auto result = compile(grammar, language);
|
||||
string code = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
const CompileError error = result.second;
|
||||
|
||||
if (error)
|
||||
AssertThat(error->message, Equals(""));
|
||||
if (error.type)
|
||||
AssertThat(error.message, Equals(""));
|
||||
|
||||
ofstream file(example_parser_dir + language + ".c");
|
||||
file << get<0>(result);
|
||||
|
|
|
|||
|
|
@ -1,32 +1,45 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Compile", []() {
|
||||
describe("compile_grammar", []() {
|
||||
describe("when the grammar's start symbol is a token", [&]() {
|
||||
it("does not fail", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "rule1", str("the-value") }
|
||||
}, {}, {}};
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "the_grammar",
|
||||
"rules": {
|
||||
"rule1": {
|
||||
"type": "STRING",
|
||||
"value": "hello"
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
auto result = compile(grammar, "test_grammar");
|
||||
const GrammarError *error = result.second;
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(string(result.error_message), IsEmpty());
|
||||
AssertThat(string(result.code), !IsEmpty());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is blank", [&]() {
|
||||
it("does not fail", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "rule1", blank() }
|
||||
}, {}, {}};
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "the_grammar",
|
||||
"rules": {
|
||||
"rule1": {
|
||||
"type": "BLANK"
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
auto result = compile(grammar, "test_grammar");
|
||||
const GrammarError *error = result.second;
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(string(result.error_message), IsEmpty());
|
||||
AssertThat(string(result.code), !IsEmpty());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -5,7 +5,7 @@
|
|||
#include "compiler/helpers/stream_methods.h"
|
||||
#include "compiler/helpers/equals_pointer.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace tree_sitter;
|
||||
using namespace std;
|
||||
|
|
|
|||
|
|
@ -23,11 +23,11 @@ ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
|||
return stream << string("}>");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const GrammarError *error) {
|
||||
if (error)
|
||||
return stream << (string("#<grammar-error '") + error->message + "'>");
|
||||
ostream &operator<<(ostream &stream, const CompileError &error) {
|
||||
if (error.type)
|
||||
return stream << (string("#<compile-error '") + error.message + "'>");
|
||||
else
|
||||
return stream << string("#<null>");
|
||||
return stream << string("#<no-compile-error>");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Rule &rule) {
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
using std::cout;
|
||||
|
||||
|
|
@ -98,7 +99,7 @@ struct ProductionStep;
|
|||
struct PrecedenceRange;
|
||||
|
||||
ostream &operator<<(ostream &, const Grammar &);
|
||||
ostream &operator<<(ostream &, const GrammarError &);
|
||||
ostream &operator<<(ostream &, const CompileError &);
|
||||
ostream &operator<<(ostream &, const Rule &);
|
||||
ostream &operator<<(ostream &, const rule_ptr &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
|
|
@ -69,7 +69,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
|
|
@ -102,7 +102,7 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
|
||||
AssertThat(result.second, Equals(CompileError(TSCompileErrorTypeInvalidRegex, "unmatched open paren")));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -30,9 +30,9 @@ describe("extract_tokens", []() {
|
|||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
const GrammarError *error = get<2>(result);
|
||||
CompileError error = get<2>(result);
|
||||
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
|
|
@ -150,7 +150,7 @@ describe("extract_tokens", []() {
|
|||
pattern("\\s+"),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(2));
|
||||
AssertThat(get<1>(result).separators[0], EqualsPointer(str("y")));
|
||||
|
|
@ -167,7 +167,7 @@ describe("extract_tokens", []() {
|
|||
str("y"),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
});
|
||||
|
|
@ -181,7 +181,7 @@ describe("extract_tokens", []() {
|
|||
i_sym(2),
|
||||
}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
|
|
@ -196,9 +196,9 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
}, { i_sym(1) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
|
|
@ -208,9 +208,9 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
AssertThat(get<2>(result), EqualsPointer(
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: (choice (sym 1) (blank))")));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ describe("intern_symbols", []() {
|
|||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
|
||||
Variable("y", VariableTypeNamed, i_sym(2)),
|
||||
|
|
@ -34,7 +34,7 @@ describe("intern_symbols", []() {
|
|||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second->message, Equals("Undefined rule 'y'"));
|
||||
AssertThat(result.second.message, Equals("Undefined rule 'y'"));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
@ -49,7 +49,7 @@ describe("intern_symbols", []() {
|
|||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -222,8 +222,8 @@ describe("parse_regex", []() {
|
|||
for (auto &row : invalid_inputs) {
|
||||
it(("handles invalid regexes with " + row.description).c_str(), [&]() {
|
||||
auto result = parse_regex(row.pattern);
|
||||
AssertThat(result.second, !Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.second->message, Contains(row.message));
|
||||
AssertThat(result.second.type, Equals(TSCompileErrorTypeInvalidRegex));
|
||||
AssertThat(result.second.message, Contains(row.message));
|
||||
});
|
||||
}
|
||||
});
|
||||
|
|
|
|||
2
spec/fixtures/grammars/helpers.cc
vendored
2
spec/fixtures/grammars/helpers.cc
vendored
|
|
@ -1,4 +1,4 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
|
|
|
|||
3
spec/fixtures/grammars/helpers.h
vendored
3
spec/fixtures/grammars/helpers.h
vendored
|
|
@ -1,7 +1,8 @@
|
|||
#ifndef TREESITTER_EXAMPLES_HELPERS_
|
||||
#define TREESITTER_EXAMPLES_HELPERS_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter_examples {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar), lexical_grammar(lex_grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
|
|
@ -68,9 +68,9 @@ class ParseTableBuilder {
|
|||
add_shift_actions(item_set, state_id);
|
||||
|
||||
if (!conflicts.empty())
|
||||
return { parse_table, new GrammarError(GrammarErrorTypeParseConflict,
|
||||
"Unresolved conflict.\n\n" +
|
||||
*conflicts.begin()) };
|
||||
return { parse_table,
|
||||
CompileError(TSCompileErrorTypeParseConflict,
|
||||
"Unresolved conflict.\n\n" + *conflicts.begin()) };
|
||||
}
|
||||
|
||||
for (ParseStateId state = 0; state < parse_table.states.size(); state++) {
|
||||
|
|
@ -83,7 +83,7 @@ class ParseTableBuilder {
|
|||
|
||||
parse_table.symbols.insert({ rules::ERROR(), {} });
|
||||
|
||||
return { parse_table, nullptr };
|
||||
return { parse_table, CompileError::none() };
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -370,7 +370,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, const GrammarError *> build_parse_table(
|
||||
pair<ParseTable, CompileError> build_parse_table(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
return ParseTableBuilder(grammar, lex_grammar).build();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -13,8 +13,8 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::pair<ParseTable, const GrammarError *> build_parse_table(
|
||||
const SyntaxGrammar &, const LexicalGrammar &);
|
||||
std::pair<ParseTable, CompileError> build_parse_table(const SyntaxGrammar &,
|
||||
const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -13,11 +14,11 @@ using std::tuple;
|
|||
using std::vector;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<ParseTable, LexTable, const GrammarError *> build_tables(
|
||||
tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
const GrammarError *error = parse_table_result.second;
|
||||
const CompileError error = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
return make_tuple(parse_table, lex_table, error);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -15,7 +15,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::tuple<ParseTable, LexTable, const GrammarError *> build_tables(
|
||||
std::tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &, const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#define COMPILER_BUILD_TABLES_PARSE_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include <string>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
|
||||
#define COMPILER_BUILD_TABLES_RULE_CAN_BE_BLANK_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -15,28 +15,44 @@ using std::vector;
|
|||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
CompileResult compile(const char *input) {
|
||||
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
||||
ParseGrammarResult parse_result = parse_grammar(string(input));
|
||||
if (!parse_result.error_message.empty()) {
|
||||
return {nullptr, parse_result.error_message.c_str()};
|
||||
return { "", strdup(parse_result.error_message.c_str()),
|
||||
TSCompileErrorTypeInvalidGrammar };
|
||||
}
|
||||
|
||||
auto compile_result = compile(parse_result.grammar, parse_result.name);
|
||||
if (compile_result.second) {
|
||||
return {nullptr, compile_result.second->message.c_str()};
|
||||
auto prepare_grammar_result =
|
||||
prepare_grammar::prepare_grammar(parse_result.grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
if (error.type) {
|
||||
return { "", strdup(error.message.c_str()), error.type };
|
||||
}
|
||||
|
||||
return {compile_result.first.c_str(), nullptr};
|
||||
auto table_build_result =
|
||||
build_tables::build_tables(syntax_grammar, lexical_grammar);
|
||||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
error = get<2>(table_build_result);
|
||||
if (error.type) {
|
||||
return { "", strdup(error.message.c_str()), error.type };
|
||||
}
|
||||
|
||||
string code = generate_code::c_code(parse_result.name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
||||
return { strdup(code.c_str()), "", TSCompileErrorTypeNone };
|
||||
}
|
||||
|
||||
pair<string, const GrammarError *> compile(const Grammar &grammar,
|
||||
std::string name) {
|
||||
pair<string, const CompileError> compile(const Grammar &grammar,
|
||||
std::string name) {
|
||||
auto prepare_grammar_result = prepare_grammar::prepare_grammar(grammar);
|
||||
const SyntaxGrammar &syntax_grammar = get<0>(prepare_grammar_result);
|
||||
const LexicalGrammar &lexical_grammar = get<1>(prepare_grammar_result);
|
||||
const GrammarError *error = get<2>(prepare_grammar_result);
|
||||
|
||||
if (error)
|
||||
CompileError error = get<2>(prepare_grammar_result);
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
|
||||
auto table_build_result =
|
||||
|
|
@ -44,14 +60,13 @@ pair<string, const GrammarError *> compile(const Grammar &grammar,
|
|||
const ParseTable &parse_table = get<0>(table_build_result);
|
||||
const LexTable &lex_table = get<1>(table_build_result);
|
||||
error = get<2>(table_build_result);
|
||||
|
||||
if (error)
|
||||
if (error.type)
|
||||
return { "", error };
|
||||
|
||||
string code = generate_code::c_code(name, parse_table, lex_table,
|
||||
syntax_grammar, lexical_grammar);
|
||||
|
||||
return { code, nullptr };
|
||||
return { code, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
16
src/compiler/compile.h
Normal file
16
src/compiler/compile.h
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef COMPILER_COMPILE_H_
|
||||
#define COMPILER_COMPILE_H_
|
||||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
|
||||
std::pair<std::string, CompileError> compile(const Grammar &, std::string);
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_COMPILE_H_
|
||||
28
src/compiler/compile_error.h
Normal file
28
src/compiler/compile_error.h
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
#ifndef COMPILER_COMPILE_ERROR_H_
|
||||
#define COMPILER_COMPILE_ERROR_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class CompileError {
|
||||
public:
|
||||
CompileError(TSCompileErrorType type, std::string message)
|
||||
: type(type), message(message) {}
|
||||
|
||||
static CompileError none() {
|
||||
return CompileError(TSCompileErrorTypeNone, "");
|
||||
}
|
||||
|
||||
bool operator==(const CompileError &other) const {
|
||||
return type == other.type && message == other.message;
|
||||
}
|
||||
|
||||
TSCompileErrorType type;
|
||||
std::string message;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_COMPILE_ERROR_H_
|
||||
|
|
@ -188,7 +188,9 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_lex_function() {
|
||||
line("static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) {");
|
||||
line(
|
||||
"static TSTree *ts_lex(TSLexer *lexer, TSStateId state, bool error_mode) "
|
||||
"{");
|
||||
indent([&]() {
|
||||
line("START_LEXER();");
|
||||
_switch("state", [&]() {
|
||||
|
|
|
|||
19
src/compiler/grammar.h
Normal file
19
src/compiler/grammar.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef COMPILER_GRAMMAR_H_
|
||||
#define COMPILER_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar {
|
||||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_GRAMMAR_H_
|
||||
|
|
@ -4,7 +4,7 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,9 @@
|
|||
#include <string>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "json.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -40,7 +41,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
type = rule_type_json.u.string.ptr;
|
||||
|
||||
if (type == "BLANK") {
|
||||
return {blank(), ""};
|
||||
return { blank(), "" };
|
||||
}
|
||||
|
||||
if (type == "CHOICE") {
|
||||
|
|
@ -61,7 +62,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
goto error;
|
||||
}
|
||||
}
|
||||
return {choice(members), ""};
|
||||
return { choice(members), "" };
|
||||
}
|
||||
|
||||
if (type == "SEQ") {
|
||||
|
|
@ -82,14 +83,14 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
goto error;
|
||||
}
|
||||
}
|
||||
return {seq(members), ""};
|
||||
return { seq(members), "" };
|
||||
}
|
||||
|
||||
if (type == "ERROR") {
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return {err(content.rule), ""};
|
||||
return { err(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid error content: " + content.error_message;
|
||||
goto error;
|
||||
|
|
@ -100,7 +101,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return {repeat(content.rule), ""};
|
||||
return { repeat(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat content: " + content.error_message;
|
||||
goto error;
|
||||
|
|
@ -111,7 +112,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return {repeat1(content.rule), ""};
|
||||
return { repeat1(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid repeat1 content: " + content.error_message;
|
||||
goto error;
|
||||
|
|
@ -122,7 +123,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
json_value content_json = rule_json->operator[]("content");
|
||||
ParseRuleResult content = parse_rule(&content_json);
|
||||
if (content.rule.get()) {
|
||||
return {token(content.rule), ""};
|
||||
return { token(content.rule), "" };
|
||||
} else {
|
||||
error_message = "Invalid token content: " + content.error_message;
|
||||
goto error;
|
||||
|
|
@ -132,7 +133,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (type == "PATTERN") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return {pattern(value_json.u.string.ptr), ""};
|
||||
return { pattern(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "Pattern value must be a string";
|
||||
goto error;
|
||||
|
|
@ -142,7 +143,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (type == "STRING") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
return {str(value_json.u.string.ptr), ""};
|
||||
return { str(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "String rule value must be a string";
|
||||
goto error;
|
||||
|
|
@ -152,7 +153,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (type == "SYMBOL") {
|
||||
json_value value_json = rule_json->operator[]("name");
|
||||
if (value_json.type == json_string) {
|
||||
return {sym(value_json.u.string.ptr), ""};
|
||||
return { sym(value_json.u.string.ptr), "" };
|
||||
} else {
|
||||
error_message = "Symbol value must be a string";
|
||||
goto error;
|
||||
|
|
@ -173,7 +174,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
goto error;
|
||||
}
|
||||
|
||||
return {prec(precedence_json.u.integer, content.rule), ""};
|
||||
return { prec(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
if (type == "PREC_LEFT") {
|
||||
|
|
@ -190,7 +191,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
goto error;
|
||||
}
|
||||
|
||||
return {prec_left(precedence_json.u.integer, content.rule), ""};
|
||||
return { prec_left(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
if (type == "PREC_RIGHT") {
|
||||
|
|
@ -207,13 +208,13 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
goto error;
|
||||
}
|
||||
|
||||
return {prec_right(precedence_json.u.integer, content.rule), ""};
|
||||
return { prec_right(precedence_json.u.integer, content.rule), "" };
|
||||
}
|
||||
|
||||
error_message = "Unknown rule type " + type;
|
||||
|
||||
error:
|
||||
return {rule_ptr(), error_message};
|
||||
return { rule_ptr(), error_message };
|
||||
}
|
||||
|
||||
ParseGrammarResult parse_grammar(const string &input) {
|
||||
|
|
@ -222,11 +223,12 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
Grammar grammar;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json;
|
||||
|
||||
json_settings settings = {0, 0, 0, 0, 0, 0};
|
||||
json_settings settings = { 0, 0, 0, 0, 0, 0 };
|
||||
char parse_error[json_error_max];
|
||||
json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
|
||||
json_value *grammar_json =
|
||||
json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
|
||||
if (!grammar_json) {
|
||||
error_message = "Failed to parse JSON";
|
||||
error_message = string("Invalid JSON at ") + parse_error;
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -254,7 +256,8 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
ParseRuleResult entry = parse_rule(entry_json.value);
|
||||
|
||||
if (!entry.rule.get()) {
|
||||
error_message = string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
|
||||
error_message =
|
||||
string("Invalid rule '") + entry_json.name + "' " + entry.error_message;
|
||||
goto error;
|
||||
}
|
||||
|
||||
|
|
@ -295,7 +298,8 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
|
||||
vector<string> conflict;
|
||||
for (size_t j = 0, conflict_length = conflict_json->u.array.length; j < conflict_length; j++) {
|
||||
for (size_t j = 0, conflict_length = conflict_json->u.array.length;
|
||||
j < conflict_length; j++) {
|
||||
json_value *conflict_entry_json = conflict_json->u.array.values[j];
|
||||
if (conflict_entry_json->type != json_string) {
|
||||
error_message = "Each conflict entry must be an array of strings";
|
||||
|
|
@ -309,14 +313,14 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
}
|
||||
|
||||
return {name, grammar, ""};
|
||||
return { name, grammar, "" };
|
||||
|
||||
error:
|
||||
if (grammar_json) {
|
||||
json_value_free(grammar_json);
|
||||
}
|
||||
|
||||
return {"", Grammar{}, error_message};
|
||||
return { "", Grammar{}, error_message };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_EXPAND_REPEATS_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -53,36 +53,35 @@ class ExpandTokens : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr apply_to(const Pattern *rule) {
|
||||
auto pair = parse_regex(rule->value);
|
||||
if (!error)
|
||||
if (!error.type)
|
||||
error = pair.second;
|
||||
return pair.first;
|
||||
}
|
||||
|
||||
public:
|
||||
const GrammarError *error;
|
||||
ExpandTokens() : error(nullptr) {}
|
||||
CompileError error;
|
||||
ExpandTokens() : error(CompileError::none()) {}
|
||||
};
|
||||
|
||||
pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &grammar) {
|
||||
pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar) {
|
||||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
auto rule = expander.apply(variable.rule);
|
||||
if (expander.error)
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.variables.push_back(Variable(variable.name, variable.type, rule));
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
auto rule = expander.apply(sep);
|
||||
if (expander.error)
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.separators.push_back(rule);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXPAND_TOKENS_H_
|
||||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -10,8 +10,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
||||
const LexicalGrammar &);
|
||||
std::pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -90,12 +90,12 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
static const GrammarError *ubiq_token_err(const string &message) {
|
||||
return new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
static CompileError ubiq_token_err(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
}
|
||||
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &grammar) {
|
||||
InitialSyntaxGrammar syntax_grammar;
|
||||
LexicalGrammar lexical_grammar;
|
||||
|
|
@ -186,7 +186,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens
|
|||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
}
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, nullptr);
|
||||
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_TOKENS_H_
|
||||
|
||||
#include <tuple>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
|
@ -10,8 +10,8 @@
|
|||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
extract_tokens(const InternedGrammar &);
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
const InternedGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include <vector>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
|
|
@ -42,12 +43,12 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
string missing_rule_name;
|
||||
};
|
||||
|
||||
const GrammarError *missing_rule_error(string rule_name) {
|
||||
return new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'");
|
||||
CompileError missing_rule_error(string rule_name) {
|
||||
return CompileError(TSCompileErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'");
|
||||
}
|
||||
|
||||
pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
||||
InternedGrammar result;
|
||||
InternSymbols interner(grammar);
|
||||
|
||||
|
|
@ -78,7 +79,7 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
|
|||
result.expected_conflicts.insert(entry);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -3,13 +3,16 @@
|
|||
|
||||
#include <utility>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/compile_error.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
std::pair<InternedGrammar, CompileError> intern_symbols(const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_IS_TOKEN_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class PatternParser {
|
|||
next();
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> rule(bool nested) {
|
||||
pair<rule_ptr, CompileError> rule(bool nested) {
|
||||
vector<rule_ptr> choices = {};
|
||||
do {
|
||||
if (!choices.empty()) {
|
||||
|
|
@ -42,17 +42,17 @@ class PatternParser {
|
|||
break;
|
||||
}
|
||||
auto pair = term(nested);
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
choices.push_back(pair.first);
|
||||
} while (has_more_input());
|
||||
auto rule =
|
||||
(choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
|
||||
return { rule, nullptr };
|
||||
return { rule, CompileError::none() };
|
||||
}
|
||||
|
||||
private:
|
||||
pair<rule_ptr, const GrammarError *> term(bool nested) {
|
||||
pair<rule_ptr, CompileError> term(bool nested) {
|
||||
rule_ptr result = Blank::build();
|
||||
do {
|
||||
if (peek() == '|')
|
||||
|
|
@ -60,16 +60,16 @@ class PatternParser {
|
|||
if (nested && peek() == ')')
|
||||
break;
|
||||
auto pair = factor();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
result = Seq::build({ result, pair.first });
|
||||
} while (has_more_input());
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> factor() {
|
||||
pair<rule_ptr, CompileError> factor() {
|
||||
auto pair = atom();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
rule_ptr result = pair.first;
|
||||
if (has_more_input()) {
|
||||
|
|
@ -88,30 +88,30 @@ class PatternParser {
|
|||
break;
|
||||
}
|
||||
}
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> atom() {
|
||||
pair<rule_ptr, CompileError> atom() {
|
||||
switch (peek()) {
|
||||
case '(': {
|
||||
next();
|
||||
auto pair = rule(true);
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ')')
|
||||
return error("unmatched open paren");
|
||||
next();
|
||||
return { pair.first, nullptr };
|
||||
return { pair.first, CompileError::none() };
|
||||
}
|
||||
case '[': {
|
||||
next();
|
||||
auto pair = char_set();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
if (peek() != ']')
|
||||
return error("unmatched open square bracket");
|
||||
next();
|
||||
return { pair.first.copy(), nullptr };
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
}
|
||||
case ')': {
|
||||
return error("unmatched close paren");
|
||||
|
|
@ -121,18 +121,19 @@ class PatternParser {
|
|||
}
|
||||
case '.': {
|
||||
next();
|
||||
return { CharacterSet().include_all().exclude('\n').copy(), nullptr };
|
||||
return { CharacterSet().include_all().exclude('\n').copy(),
|
||||
CompileError::none() };
|
||||
}
|
||||
default: {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { Blank::build(), pair.second };
|
||||
return { pair.first.copy(), nullptr };
|
||||
return { pair.first.copy(), CompileError::none() };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> char_set() {
|
||||
pair<CharacterSet, CompileError> char_set() {
|
||||
CharacterSet result;
|
||||
bool is_affirmative = true;
|
||||
if (peek() == '^') {
|
||||
|
|
@ -143,7 +144,7 @@ class PatternParser {
|
|||
|
||||
while (has_more_input() && (peek() != ']')) {
|
||||
auto pair = single_char();
|
||||
if (pair.second)
|
||||
if (pair.second.type)
|
||||
return { CharacterSet(), pair.second };
|
||||
if (is_affirmative)
|
||||
result.add_set(pair.first);
|
||||
|
|
@ -151,10 +152,10 @@ class PatternParser {
|
|||
result.remove_set(pair.first);
|
||||
}
|
||||
|
||||
return { result, nullptr };
|
||||
return { result, CompileError::none() };
|
||||
}
|
||||
|
||||
pair<CharacterSet, const GrammarError *> single_char() {
|
||||
pair<CharacterSet, CompileError> single_char() {
|
||||
CharacterSet value;
|
||||
switch (peek()) {
|
||||
case '\\':
|
||||
|
|
@ -173,7 +174,7 @@ class PatternParser {
|
|||
value = CharacterSet().include(first_char);
|
||||
}
|
||||
}
|
||||
return { value, nullptr };
|
||||
return { value, CompileError::none() };
|
||||
}
|
||||
|
||||
CharacterSet escaped_char(uint32_t value) {
|
||||
|
|
@ -217,8 +218,8 @@ class PatternParser {
|
|||
return lookahead && iter <= end;
|
||||
}
|
||||
|
||||
pair<rule_ptr, const GrammarError *> error(string msg) {
|
||||
return { Blank::build(), new GrammarError(GrammarErrorTypeRegex, msg) };
|
||||
pair<rule_ptr, CompileError> error(string msg) {
|
||||
return { Blank::build(), CompileError(TSCompileErrorTypeInvalidRegex, msg) };
|
||||
}
|
||||
|
||||
string input;
|
||||
|
|
@ -227,7 +228,7 @@ class PatternParser {
|
|||
int32_t lookahead;
|
||||
};
|
||||
|
||||
pair<rule_ptr, const GrammarError *> parse_regex(const std::string &input) {
|
||||
pair<rule_ptr, CompileError> parse_regex(const std::string &input) {
|
||||
return PatternParser(input.c_str()).rule(false);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -3,12 +3,13 @@
|
|||
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<rule_ptr, const GrammarError *> parse_regex(const std::string &);
|
||||
std::pair<rule_ptr, CompileError> parse_regex(const std::string &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -16,14 +16,14 @@ using std::tuple;
|
|||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &input_grammar) {
|
||||
/*
|
||||
* Convert all string-based `NamedSymbols` into numerical `Symbols`
|
||||
*/
|
||||
auto intern_result = intern_symbols(input_grammar);
|
||||
const GrammarError *error = intern_result.second;
|
||||
if (error)
|
||||
CompileError error = intern_result.second;
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -31,7 +31,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
*/
|
||||
auto extract_result = extract_tokens(intern_result.first);
|
||||
error = get<2>(extract_result);
|
||||
if (error)
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -45,7 +45,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
LexicalGrammar lex_grammar = expand_tokens_result.first;
|
||||
error = expand_tokens_result.second;
|
||||
if (error)
|
||||
if (error.type)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
/*
|
||||
|
|
@ -58,7 +58,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
*/
|
||||
lex_grammar = normalize_rules(lex_grammar);
|
||||
|
||||
return make_tuple(syntax_grammar, lex_grammar, nullptr);
|
||||
return make_tuple(syntax_grammar, lex_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@
|
|||
#include <tuple>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/compile_error.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct Grammar;
|
||||
class GrammarError;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, CompileError> prepare_grammar(
|
||||
const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -1,5 +1,4 @@
|
|||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_TOKEN_DESCRIPTION_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
|
|||
29
src/compiler/rules.h
Normal file
29
src/compiler/rules.h
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
#ifndef COMPILER_RULES_H_
|
||||
#define COMPILER_RULES_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
rule_ptr blank();
|
||||
rule_ptr choice(const std::vector<rule_ptr> &);
|
||||
rule_ptr repeat(const rule_ptr &);
|
||||
rule_ptr repeat1(const rule_ptr &);
|
||||
rule_ptr seq(const std::vector<rule_ptr> &);
|
||||
rule_ptr sym(const std::string &);
|
||||
rule_ptr pattern(const std::string &);
|
||||
rule_ptr str(const std::string &);
|
||||
rule_ptr err(const rule_ptr &);
|
||||
rule_ptr prec(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_left(const rule_ptr &);
|
||||
rule_ptr prec_left(int precedence, const rule_ptr &);
|
||||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
|
||||
} // namespace std
|
||||
|
||||
#endif // COMPILER_RULES_H_
|
||||
|
|
@ -3,7 +3,6 @@
|
|||
|
||||
#include <string>
|
||||
#include <map>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -2,8 +2,8 @@
|
|||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/variable.h"
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_VARIABLE_H_
|
||||
|
||||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue