Merge branch 'flatten-rules-into-productions'
This branch had diverged considerably, so merging it required changing a lot of code. Conflicts: project.gyp spec/compiler/build_tables/action_takes_precedence_spec.cc spec/compiler/build_tables/build_conflict_spec.cc spec/compiler/build_tables/build_parse_table_spec.cc spec/compiler/build_tables/first_symbols_spec.cc spec/compiler/build_tables/item_set_closure_spec.cc spec/compiler/build_tables/item_set_transitions_spec.cc spec/compiler/build_tables/rule_can_be_blank_spec.cc spec/compiler/helpers/containers.h spec/compiler/prepare_grammar/expand_repeats_spec.cc spec/compiler/prepare_grammar/extract_tokens_spec.cc src/compiler/build_tables/action_takes_precedence.h src/compiler/build_tables/build_parse_table.cc src/compiler/build_tables/first_symbols.cc src/compiler/build_tables/first_symbols.h src/compiler/build_tables/item_set_closure.cc src/compiler/build_tables/item_set_transitions.cc src/compiler/build_tables/parse_item.cc src/compiler/build_tables/parse_item.h src/compiler/build_tables/rule_can_be_blank.cc src/compiler/build_tables/rule_can_be_blank.h src/compiler/prepare_grammar/expand_repeats.cc src/compiler/prepare_grammar/extract_tokens.cc src/compiler/prepare_grammar/extract_tokens.h src/compiler/prepare_grammar/prepare_grammar.cc src/compiler/rules/built_in_symbols.cc src/compiler/rules/built_in_symbols.h src/compiler/syntax_grammar.cc src/compiler/syntax_grammar.h
This commit is contained in:
commit
ebc52f109d
71 changed files with 30354 additions and 33188 deletions
|
|
@ -13,7 +13,6 @@
|
|||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/first_symbols.cc',
|
||||
'src/compiler/build_tables/get_completion_status.cc',
|
||||
'src/compiler/build_tables/get_metadata.cc',
|
||||
'src/compiler/build_tables/item.cc',
|
||||
|
|
@ -32,13 +31,17 @@
|
|||
'src/compiler/parse_table.cc',
|
||||
'src/compiler/prepare_grammar/expand_repeats.cc',
|
||||
'src/compiler/prepare_grammar/expand_tokens.cc',
|
||||
'src/compiler/prepare_grammar/extract_choices.cc',
|
||||
'src/compiler/prepare_grammar/extract_tokens.cc',
|
||||
'src/compiler/prepare_grammar/flatten_grammar.cc',
|
||||
'src/compiler/prepare_grammar/intern_symbols.cc',
|
||||
'src/compiler/prepare_grammar/is_token.cc',
|
||||
'src/compiler/prepare_grammar/parse_regex.cc',
|
||||
'src/compiler/prepare_grammar/prepare_grammar.cc',
|
||||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/rule.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/variable.cc',
|
||||
'src/compiler/rules/blank.cc',
|
||||
'src/compiler/rules/built_in_symbols.cc',
|
||||
'src/compiler/rules/character_range.cc',
|
||||
|
|
|
|||
|
|
@ -1,117 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("first_symbols", []() {
|
||||
SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
auto rule = seq({ i_token(0), i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, true),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
choice({
|
||||
i_token(0),
|
||||
blank() }),
|
||||
i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, true),
|
||||
Symbol(1, true)
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
|
||||
auto rule = choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, true),
|
||||
Symbol(2, true),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
i_token(0),
|
||||
blank(),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, true),
|
||||
Symbol(1, true),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(11, true)
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(3, true),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,7 +1,8 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/item_set_closure.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
|
@ -10,39 +11,55 @@ START_TEST
|
|||
|
||||
describe("item_set_closure", []() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"E",
|
||||
seq({
|
||||
i_sym(1),
|
||||
i_token(11),
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone, 100},
|
||||
{Symbol(11, true), 0, AssociativityNone, 101},
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"T",
|
||||
seq({
|
||||
i_token(12),
|
||||
i_token(13),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, true), 0, AssociativityNone, 102},
|
||||
{Symbol(13, true), 0, AssociativityNone, 103},
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Production({
|
||||
{Symbol(2), 0, AssociativityNone, 104},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, true), 0, AssociativityNone, 105},
|
||||
{Symbol(15, true), 0, AssociativityNone, 106},
|
||||
})
|
||||
}),
|
||||
}, {}, {}};
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(
|
||||
ParseItem(Symbol(0), grammar.rules[0].rule, {}),
|
||||
set<Symbol>({ Symbol(10, true) }),
|
||||
grammar
|
||||
);
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 0, 100),
|
||||
set<Symbol>({ Symbol(10, true) }),
|
||||
}
|
||||
});
|
||||
|
||||
item_set_closure(&item_set, grammar);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), grammar.rules[1].rule, {}),
|
||||
set<Symbol>({ Symbol(11, true) }),
|
||||
ParseItem(Symbol(0), 0, 0, 100),
|
||||
set<Symbol>({ Symbol(10, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), grammar.rules[0].rule, {}),
|
||||
set<Symbol>({ Symbol(10, true) }),
|
||||
ParseItem(Symbol(1), 0, 0, 102),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 1, 0, 104),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 0, 105),
|
||||
set<Symbol>({ Symbol(11, true) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/item_set_transitions.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/rule_helpers.h"
|
||||
|
||||
using namespace rules;
|
||||
|
|
@ -17,63 +17,67 @@ describe("char_transitions(LexItemSet)", []() {
|
|||
});
|
||||
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"A",
|
||||
blank(),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"B",
|
||||
i_token(21),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("A", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(11, true), 0, AssociativityNone, 101},
|
||||
{Symbol(12, true), 0, AssociativityNone, 102},
|
||||
{Symbol(13, true), 0, AssociativityNone, 103},
|
||||
{Symbol(1), 0, AssociativityNone, 104},
|
||||
{Symbol(14, true), 0, AssociativityNone, 105},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("B", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(15, true), 0, AssociativityNone, 106},
|
||||
})
|
||||
})
|
||||
}, {}, {}};
|
||||
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), { Symbol(101) }),
|
||||
set<Symbol>({ Symbol(23, true) })
|
||||
},
|
||||
ParseItem(Symbol(0), 0, 2, 103),
|
||||
set<Symbol>({ Symbol(16, true) })
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(22, true),
|
||||
Symbol(13, true),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), i_sym(1), { Symbol(101), Symbol(22) }),
|
||||
set<Symbol>({ Symbol(23, true) }),
|
||||
ParseItem(Symbol(0), 0, 3, 104),
|
||||
set<Symbol>({ Symbol(16, true) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), i_token(21), {}),
|
||||
set<Symbol>({ Symbol(23, true) })
|
||||
ParseItem(Symbol(1), 0, 0, 106),
|
||||
set<Symbol>({ Symbol(14, true) })
|
||||
},
|
||||
})
|
||||
},
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -11,16 +10,8 @@ START_TEST
|
|||
|
||||
describe("LexConflictManager", []() {
|
||||
LexicalGrammar lexical_grammar{{
|
||||
{
|
||||
"other_token",
|
||||
pattern("[a-b]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"lookahead_token",
|
||||
pattern("[c-d]"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("other_token", VariableTypeNamed, pattern("[a-b]")),
|
||||
Variable("lookahead_token", VariableTypeNamed, pattern("[c-d]"))
|
||||
}, {}};
|
||||
|
||||
LexConflictManager conflict_manager(lexical_grammar);
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/parse_conflict_manager.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
@ -11,31 +10,11 @@ START_TEST
|
|||
|
||||
describe("ParseConflictManager", []() {
|
||||
SyntaxGrammar syntax_grammar{{
|
||||
{
|
||||
"in_progress_rule1",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"in_progress_rule2",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"reduced_rule",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"other_rule1",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"other_rule2",
|
||||
i_token(0),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
SyntaxVariable("in_progress_rule1", VariableTypeNamed, { Production() }),
|
||||
SyntaxVariable("in_progress_rule2", VariableTypeNamed, { Production() }),
|
||||
SyntaxVariable("reduced_rule", VariableTypeNamed, { Production() }),
|
||||
SyntaxVariable("other_rule1", VariableTypeNamed, { Production() }),
|
||||
SyntaxVariable("other_rule2", VariableTypeNamed, { Production() }),
|
||||
}, { Symbol(2, true) }, {}};
|
||||
|
||||
pair<bool, ConflictType> result;
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
|
@ -54,37 +53,6 @@ describe("rule_can_be_blank", [&]() {
|
|||
rule = make_shared<rules::Metadata>(sym("one"), map<rules::MetadataKey, int>());
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"A",
|
||||
choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank()
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"B",
|
||||
choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13)
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
}, {}, {}};
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
|
||||
});
|
||||
|
||||
it("terminates for left-recursive rules that can't be blank", [&]() {
|
||||
rule = i_sym(1);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -29,8 +29,9 @@ describe("compiling the example grammars", []() {
|
|||
string code = result.first;
|
||||
const GrammarError *error = result.second;
|
||||
|
||||
AssertThat(error, Equals((GrammarError *)nullptr));
|
||||
|
||||
if (error)
|
||||
AssertThat(error->message, Equals(""));
|
||||
|
||||
ofstream file(example_parser_dir + language + ".c");
|
||||
file << get<0>(result);
|
||||
file.close();
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ namespace tree_sitter {
|
|||
return make_shared<rules::Metadata>(rule, values);
|
||||
}
|
||||
|
||||
bool operator==(const RuleEntry &left, const RuleEntry &right) {
|
||||
bool operator==(const Variable &left, const Variable &right) {
|
||||
return left.name == right.name && left.rule->operator==(*right.rule) &&
|
||||
left.type == right.type;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
rule_ptr metadata(rule_ptr, std::map<rules::MetadataKey, int>);
|
||||
|
|
@ -13,7 +13,7 @@ namespace tree_sitter {
|
|||
rule_ptr i_sym(size_t index);
|
||||
rule_ptr i_token(size_t index);
|
||||
|
||||
bool operator==(const RuleEntry &left, const RuleEntry &right);
|
||||
bool operator==(const Variable &left, const Variable &right);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
|
|
@ -42,8 +43,12 @@ ostream &operator<<(ostream &stream, const rule_ptr &rule) {
|
|||
return stream;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const RuleEntry &entry) {
|
||||
return stream << string("{") << entry.name << string(", ") << entry.rule << string(", ") << to_string(entry.type) << string("}");
|
||||
ostream &operator<<(ostream &stream, const Variable &variable) {
|
||||
return stream << string("{") << variable.name << string(", ") << variable.rule << string(", ") << to_string(variable.type) << string("}");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
|
||||
return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}");
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const LexAction &action) {
|
||||
|
|
@ -100,6 +105,10 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
|
|||
return stream;
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ProductionStep &step) {
|
||||
return stream << string("(production_step symbol:") << step.symbol << string(" precedence:") << to_string(step.precedence) << ")";
|
||||
}
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
|
||||
|
|
@ -107,8 +116,11 @@ ostream &operator<<(ostream &stream, const build_tables::LexItem &item) {
|
|||
<< string(")");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const build_tables::ParseItem &item) {
|
||||
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
|
||||
ostream &operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("(item variable:") << to_string(item.variable_index)
|
||||
<< string(" production:") << to_string(item.production_index)
|
||||
<< string(" step:") << to_string(item.step_index)
|
||||
<< string(" remaining_rule:") << to_string(item.rule_id)
|
||||
<< string(")");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -37,8 +37,8 @@ inline std::ostream& operator<<(std::ostream &stream, const std::set<T> &set) {
|
|||
return stream << ")";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T> &set) {
|
||||
template<typename T, typename H, typename E>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T, H, E> &set) {
|
||||
stream << std::string("(set: ");
|
||||
bool started = false;
|
||||
for (auto item : set) {
|
||||
|
|
@ -89,19 +89,23 @@ namespace tree_sitter {
|
|||
using std::ostream;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
struct RuleEntry;
|
||||
struct Variable;
|
||||
struct SyntaxVariable;
|
||||
class LexAction;
|
||||
class ParseAction;
|
||||
class ParseState;
|
||||
struct ProductionStep;
|
||||
|
||||
ostream &operator<<(ostream &, const Grammar &);
|
||||
ostream &operator<<(ostream &, const GrammarError &);
|
||||
ostream &operator<<(ostream &, const Rule &);
|
||||
ostream &operator<<(ostream &, const rule_ptr &);
|
||||
ostream &operator<<(ostream &, const RuleEntry &);
|
||||
std::ostream &operator<<(ostream &stream, const LexAction &);
|
||||
std::ostream &operator<<(ostream &stream, const ParseAction &);
|
||||
std::ostream &operator<<(ostream &stream, const ParseState &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
ostream &operator<<(ostream &, const SyntaxVariable &);
|
||||
ostream &operator<<(ostream &, const LexAction &);
|
||||
ostream &operator<<(ostream &, const ParseAction &);
|
||||
ostream &operator<<(ostream &, const ParseState &);
|
||||
ostream &operator<<(ostream &, const ProductionStep &);
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
|
|
@ -109,9 +113,9 @@ struct MetadataRange;
|
|||
class LexItem;
|
||||
class ParseItem;
|
||||
|
||||
ostream &operator<<(ostream &stream, const MetadataRange &);
|
||||
ostream &operator<<(ostream &stream, const LexItem &);
|
||||
ostream &operator<<(ostream &stream, const ParseItem &);
|
||||
ostream &operator<<(ostream &, const MetadataRange &);
|
||||
ostream &operator<<(ostream &, const LexItem &);
|
||||
ostream &operator<<(ostream &, const ParseItem &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,232 +1,152 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
repeat(i_token(0)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat(i_token(0))),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_sym(1), blank() }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(0),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({ i_sym(1), blank() })),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(0),
|
||||
choice({ i_sym(1), blank() })
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)),
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)),
|
||||
})),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() })
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() })
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() })
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_token(10), repeat(i_token(11)) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat(i_token(11))
|
||||
})),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({ i_token(10), i_sym(1), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() }),
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({ i_token(10), i_sym(1), blank() })),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(1), blank() }),
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
seq({ i_token(3), repeat(i_token(4)) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat(i_token(4)) }),
|
||||
seq({ i_token(2), repeat(i_token(4)) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat(i_token(4))
|
||||
})),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
choice({
|
||||
seq({ i_token(1), choice({ i_sym(2), blank() }) }),
|
||||
seq({ i_token(2), choice({ i_sym(2), blank() }) }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
seq({ i_token(3), choice({ i_sym(2), blank() }) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(4),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), choice({ i_sym(2), blank() }) }),
|
||||
seq({ i_token(2), choice({ i_sym(2), blank() }) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
choice({ i_sym(2), blank() })
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(4),
|
||||
choice({ i_sym(2), blank() }),
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)),
|
||||
})),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
seq({
|
||||
choice({ i_sym(1), blank() }),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() }),
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
{
|
||||
"rule0_repeat2",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
choice({ i_sym(1), blank() }),
|
||||
choice({ i_sym(2), blank() }),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(1), blank() }),
|
||||
})),
|
||||
Variable("rule0_repeat2", VariableTypeAuxiliary, seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(2), blank() }),
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
{
|
||||
"rule0",
|
||||
repeat(i_token(10)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
repeat(i_token(11)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat(i_token(10))),
|
||||
Variable("rule1", VariableTypeNamed, repeat(i_token(11))),
|
||||
}, {}, {}};
|
||||
|
||||
auto match = expand_repeats(grammar);
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(match.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule0",
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
i_sym(2),
|
||||
blank(),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, choice({
|
||||
i_sym(3),
|
||||
blank(),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(2), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
choice({ i_sym(3), blank() }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule0_repeat1",
|
||||
seq({
|
||||
i_token(10),
|
||||
choice({ i_sym(2), blank() }),
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
{
|
||||
"rule1_repeat1",
|
||||
seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(3), blank() })
|
||||
}),
|
||||
RuleEntryTypeAuxiliary
|
||||
},
|
||||
})),
|
||||
Variable("rule1_repeat1", VariableTypeAuxiliary, seq({
|
||||
i_token(11),
|
||||
choice({ i_sym(3), blank() })
|
||||
})),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -11,64 +11,48 @@ describe("expand_tokens", []() {
|
|||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
})),
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
}),
|
||||
i_sym(11),
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
i_sym(11),
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("\u03B1 \u03B2"), // α β
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, str("\u03B1 \u03B2")),
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), {
|
||||
{PRECEDENCE, 1},
|
||||
{IS_TOKEN, 1},
|
||||
})),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
@ -76,64 +60,44 @@ describe("expand_tokens", []() {
|
|||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
})),
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((const GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
})),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{
|
||||
"rule_A",
|
||||
pattern("[^\u03B1-\u03B4]*"), // [^α-δ]
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, pattern("[^\u03B1-\u03B4]*")),
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat(character({ 945, 946, 947, 948 }, false))),
|
||||
})));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}))
|
||||
}, {}};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
|
|
|||
74
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
74
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
|
|
@ -0,0 +1,74 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_choices;
|
||||
|
||||
class rule_vector : public vector<rule_ptr> {
|
||||
public:
|
||||
bool operator==(const vector<rule_ptr> &other) const {
|
||||
if (this->size() != other.size()) return false;
|
||||
for (size_t i = 0; i < this->size(); i++) {
|
||||
auto rule = this->operator[](i);
|
||||
auto other_rule = other[i];
|
||||
if (!rule->operator==(*rule))
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
rule_vector(const initializer_list<rule_ptr> &list) :
|
||||
vector<rule_ptr>(list) {}
|
||||
};
|
||||
|
||||
describe("extract_choices", []() {
|
||||
it("expands rules containing choices into multiple rules", [&]() {
|
||||
auto rule = seq({
|
||||
sym("a"),
|
||||
choice({ sym("b"), sym("c"), sym("d") }),
|
||||
sym("e")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("b"), sym("e") }),
|
||||
seq({ sym("a"), sym("c"), sym("e") }),
|
||||
seq({ sym("a"), sym("d"), sym("e") }),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles metadata rules", [&]() {
|
||||
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
prec(5, sym("b")),
|
||||
prec(5, sym("c")),
|
||||
prec(5, sym("d")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles nested choices", [&]() {
|
||||
auto rule = choice({
|
||||
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
|
||||
sym("d")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("c") }),
|
||||
seq({ sym("b"), sym("c") }),
|
||||
sym("d"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
auto rule = repeat(choice({ sym("a"), sym("b") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
repeat(sym("a")),
|
||||
repeat(sym("b")),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
|
||||
START_TEST
|
||||
|
|
@ -8,238 +9,133 @@ START_TEST
|
|||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InternedGrammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
repeat(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
})),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
pattern("ij+"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
choice({ str("kl"), blank() }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_D",
|
||||
repeat(i_sym(3)),
|
||||
RuleEntryTypeNamed,
|
||||
}
|
||||
Variable("rule_A", VariableTypeNamed, repeat(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))),
|
||||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat(i_sym(3)))
|
||||
}, {}, {}});
|
||||
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
const GrammarError *error = get<2>(result);
|
||||
|
||||
AssertThat(error, Equals<const GrammarError *>(nullptr));
|
||||
|
||||
AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
repeat(seq({
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat(seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
i_token(1),
|
||||
// This pattern is now the second rule in the lexical grammar.
|
||||
i_token(1),
|
||||
|
||||
choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
i_token(3),
|
||||
choice({
|
||||
// Rule 1, which this symbol pointed to, has been moved to the
|
||||
// lexical grammar.
|
||||
i_token(3),
|
||||
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
i_sym(1),
|
||||
// This symbol's index has been decremented, because a previous rule
|
||||
// was moved to the lexical grammar.
|
||||
i_sym(1),
|
||||
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
})),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
choice({ i_token(4), blank() }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_D",
|
||||
repeat(i_sym(2)),
|
||||
RuleEntryTypeNamed,
|
||||
}
|
||||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
}))),
|
||||
|
||||
Variable("rule_C", VariableTypeNamed, choice({ i_token(4), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat(i_sym(2))),
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
// Strings become anonymous rules.
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous,
|
||||
},
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
|
||||
// Patterns become hidden rules.
|
||||
{
|
||||
"/cd*/",
|
||||
pattern("cd*"),
|
||||
RuleEntryTypeAuxiliary,
|
||||
},
|
||||
Variable("/cd*/", VariableTypeAuxiliary, pattern("cd*")),
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
{
|
||||
"/(ef|gh)*/",
|
||||
repeat(choice({ str("ef"), str("gh") })),
|
||||
RuleEntryTypeAuxiliary,
|
||||
},
|
||||
Variable("/(ef|gh)*/", VariableTypeAuxiliary, repeat(choice({
|
||||
str("ef"),
|
||||
str("gh")
|
||||
}))),
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
{
|
||||
"rule_B",
|
||||
pattern("ij+"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
|
||||
// Strings become anonymous rules.
|
||||
{
|
||||
"kl",
|
||||
str("kl"),
|
||||
RuleEntryTypeAnonymous,
|
||||
},
|
||||
|
||||
Variable("kl", VariableTypeAnonymous, str("kl")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
}),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})),
|
||||
}, {}, {}});
|
||||
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({ i_token(0), i_sym(0), i_token(0) }),
|
||||
RuleEntryTypeNamed
|
||||
}
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })),
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
})))
|
||||
});
|
||||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({ i_sym(1), str("ab") }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
str("cd"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
seq({ str("ef"), str("cd") }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
|
||||
Variable("rule_B", VariableTypeNamed, str("cd")),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
|
||||
}, {}, {}});
|
||||
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"rule_A",
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
i_token(1),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
seq({ i_token(2), i_token(1) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })),
|
||||
Variable("rule_B", VariableTypeNamed, i_token(1)),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })),
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"ab",
|
||||
str("ab"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
{
|
||||
"cd",
|
||||
str("cd"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
{
|
||||
"ef",
|
||||
str("ef"),
|
||||
RuleEntryTypeAnonymous
|
||||
},
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
Variable("cd", VariableTypeAnonymous, str("cd")),
|
||||
Variable("ef", VariableTypeAnonymous, str("ef")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("ok"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
repeat(i_sym(0)),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
repeat(seq({ i_sym(0), i_sym(0) })),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
}, { str(" ") }, { { Symbol(1), Symbol(2) } }});
|
||||
|
||||
SyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.rules.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0), Symbol(1) },
|
||||
})));
|
||||
|
|
@ -248,11 +144,7 @@ describe("extract_tokens", []() {
|
|||
describe("handling ubiquitous tokens", [&]() {
|
||||
it("adds inline ubiquitous tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("x"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
}, {
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
|
|
@ -268,22 +160,10 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("updates ubiquitous symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{ {
|
||||
{
|
||||
"rule_A",
|
||||
seq({ str("w"), str("x"), i_sym(1) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
str("y"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"rule_C",
|
||||
str("z"),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
Variable("rule_C", VariableTypeNamed, str("z")),
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}});
|
||||
|
|
@ -299,16 +179,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any ubiquitous tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
seq({ str("x"), i_sym(1) }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
seq({ str("y"), str("z") }),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
}, { i_sym(1) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
|
|
@ -319,16 +191,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any ubiquitous tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
{
|
||||
"rule_A",
|
||||
str("x"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"rule_B",
|
||||
str("y"),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals<const GrammarError *>(nullptr));
|
||||
|
|
|
|||
179
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
179
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
|
|
@ -0,0 +1,179 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
template<typename T, typename Func>
|
||||
std::vector<typename std::result_of<Func(T)>::type>
|
||||
collect(const std::vector<T> &v, Func f) {
|
||||
vector<typename std::result_of<Func(T)>::type> result;
|
||||
for (const T &item : v)
|
||||
result.push_back(f(item));
|
||||
return result;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::flatten_grammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("flatten_grammar", []() {
|
||||
InitialSyntaxGrammar input_grammar{{
|
||||
|
||||
// Choices within rules are extracted, resulting in multiple productions.
|
||||
Variable("variable0", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
choice({ i_sym(2), i_sym(3) }),
|
||||
i_sym(4),
|
||||
})),
|
||||
|
||||
// When multiple precedence values are nested, the inner precedence wins.
|
||||
Variable("variable1", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
prec(101, seq({
|
||||
i_sym(2),
|
||||
choice({
|
||||
prec(102, seq({
|
||||
i_sym(3),
|
||||
i_sym(4)
|
||||
}), AssociativityRight),
|
||||
i_sym(5),
|
||||
}),
|
||||
i_sym(6),
|
||||
})),
|
||||
i_sym(7),
|
||||
})),
|
||||
|
||||
// When a precedence is applied to the end of a rule, its value is assigned
|
||||
// to the last step of the corresponding production.
|
||||
Variable("variable2", VariableTypeHidden, seq({
|
||||
prec(102, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
})),
|
||||
prec(103, seq({
|
||||
i_sym(3),
|
||||
i_sym(4),
|
||||
})),
|
||||
}))
|
||||
|
||||
}, {}, {}};
|
||||
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto get_symbol_sequences = [&](vector<Production> productions) {
|
||||
return collect(productions, [](Production p) {
|
||||
return collect(p, [](ProductionStep e) {
|
||||
return e.symbol;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
auto get_precedence_sequences = [&](vector<Production> productions) {
|
||||
return collect(productions, [](Production p) {
|
||||
return collect(p, [](ProductionStep e) {
|
||||
return e.precedence;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
auto get_associativity_sequences = [&](vector<Production> productions) {
|
||||
return collect(productions, [](Production p) {
|
||||
return collect(p, [](ProductionStep e) {
|
||||
return e.associativity;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
auto get_rule_id_sequences = [&](vector<Production> productions) {
|
||||
return collect(productions, [](Production p) {
|
||||
return collect(p, [](ProductionStep e) {
|
||||
return e.rule_id;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
it("preserves the names and types of the grammar's variables", [&]() {
|
||||
AssertThat(grammar.variables[0].name, Equals("variable0"));
|
||||
AssertThat(grammar.variables[1].name, Equals("variable1"));
|
||||
AssertThat(grammar.variables[2].name, Equals("variable2"));
|
||||
|
||||
AssertThat(grammar.variables[0].type, Equals(VariableTypeNamed));
|
||||
AssertThat(grammar.variables[1].type, Equals(VariableTypeNamed));
|
||||
AssertThat(grammar.variables[2].type, Equals(VariableTypeHidden));
|
||||
});
|
||||
|
||||
it("turns each variable's rule with a vector of possible symbol sequences", [&]() {
|
||||
AssertThat(
|
||||
get_symbol_sequences(grammar.variables[0].productions),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(4) },
|
||||
{ Symbol(1), Symbol(3), Symbol(4) }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_sequences(grammar.variables[1].productions),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
|
||||
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_sequences(grammar.variables[2].productions),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4) },
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each symbol with the precedence binding it to its previous neighbor", [&]() {
|
||||
AssertThat(
|
||||
get_precedence_sequences(grammar.variables[0].productions),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 0, 0 }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_precedence_sequences(grammar.variables[1].productions),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 101, 102, 101, 0 },
|
||||
{ 0, 0, 101, 101, 0 }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_precedence_sequences(grammar.variables[2].productions),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 102, 0, 103 },
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each symbol with the correct associativity annotation", [&]() {
|
||||
Associativity none = AssociativityNone;
|
||||
|
||||
AssertThat(
|
||||
get_associativity_sequences(grammar.variables[1].productions),
|
||||
Equals(vector<vector<Associativity>>({
|
||||
{ none, none, AssociativityLeft, AssociativityRight, AssociativityLeft, none },
|
||||
{ none, none, AssociativityLeft, AssociativityLeft, none }
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each unique remaining subsequence of symbols and precedences with a rule_id", [&]() {
|
||||
// Variable 0: only the last symbol is the same for both productions.
|
||||
auto variable0_step_ids = get_rule_id_sequences(grammar.variables[0].productions);
|
||||
AssertThat(variable0_step_ids[0][0], !Equals(variable0_step_ids[1][0]));
|
||||
AssertThat(variable0_step_ids[0][1], !Equals(variable0_step_ids[1][1]));
|
||||
AssertThat(variable0_step_ids[0][2], Equals(variable0_step_ids[1][2]));
|
||||
|
||||
// Variable 1: the last *two* symbols are the same for both productions.
|
||||
auto variable1_step_ids = get_rule_id_sequences(grammar.variables[1].productions);
|
||||
AssertThat(variable1_step_ids[0][0], !Equals(variable1_step_ids[1][0]));
|
||||
AssertThat(variable1_step_ids[0][1], !Equals(variable1_step_ids[1][1]));
|
||||
AssertThat(variable1_step_ids[0][4], Equals(variable1_step_ids[1][3]));
|
||||
AssertThat(variable1_step_ids[0][5], Equals(variable1_step_ids[1][4]));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -19,22 +19,10 @@ describe("intern_symbols", []() {
|
|||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.rules, Equals(vector<RuleEntry>({
|
||||
{
|
||||
"x",
|
||||
choice({ i_sym(1), i_sym(2) }),
|
||||
RuleEntryTypeNamed
|
||||
},
|
||||
{
|
||||
"y",
|
||||
i_sym(2),
|
||||
RuleEntryTypeNamed,
|
||||
},
|
||||
{
|
||||
"_z",
|
||||
str("stuff"),
|
||||
RuleEntryTypeHidden
|
||||
},
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
|
||||
Variable("y", VariableTypeNamed, i_sym(2)),
|
||||
Variable("_z", VariableTypeHidden, str("stuff")),
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
|
|||
5
spec/fixtures/grammars/javascript.cc
vendored
5
spec/fixtures/grammars/javascript.cc
vendored
|
|
@ -300,10 +300,7 @@ extern const Grammar javascript = Grammar({
|
|||
infix_op(">", "_expression", PREC_REL) }) },
|
||||
|
||||
{ "type_op", choice({
|
||||
prec(PREC_REL, seq({
|
||||
choice({ sym("_expression"), sym("identifier") }),
|
||||
str("in"),
|
||||
sym("_expression") })),
|
||||
infix_op("in", "_expression", PREC_REL),
|
||||
infix_op("instanceof", "_expression", PREC_REL),
|
||||
prefix_op("typeof", "_expression", PREC_TYPE) }) },
|
||||
|
||||
|
|
|
|||
1645
spec/fixtures/parsers/c.c
vendored
1645
spec/fixtures/parsers/c.c
vendored
File diff suppressed because it is too large
Load diff
2461
spec/fixtures/parsers/golang.c
vendored
2461
spec/fixtures/parsers/golang.c
vendored
File diff suppressed because it is too large
Load diff
56779
spec/fixtures/parsers/javascript.c
vendored
56779
spec/fixtures/parsers/javascript.c
vendored
File diff suppressed because it is too large
Load diff
|
|
@ -35,7 +35,7 @@ describe("Languages", [&]() {
|
|||
describe(("The " + pair.first + " parser").c_str(), [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, pair.second);
|
||||
// ts_document_set_debugger(doc, log_debugger_make(true));
|
||||
// ts_document_set_debugger(doc, log_debugger_make(false));
|
||||
});
|
||||
|
||||
for (auto &entry : test_entries_for_language(pair.first)) {
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@
|
|||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
|
@ -66,7 +66,7 @@ class LexTableBuilder {
|
|||
|
||||
else if (symbol.is_token)
|
||||
result.insert(LexItem(
|
||||
symbol, after_separators(lex_grammar.rules[symbol.index].rule)));
|
||||
symbol, after_separators(lex_grammar.variables[symbol.index].rule)));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
class LexicalGrammar;
|
||||
struct LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -12,7 +12,8 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/get_completion_status.h"
|
||||
#include "compiler/build_tables/get_metadata.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
|
|
@ -35,7 +36,6 @@ class ParseTableBuilder {
|
|||
const LexicalGrammar lexical_grammar;
|
||||
ParseConflictManager conflict_manager;
|
||||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
vector<vector<Symbol>> productions;
|
||||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
ParseTable parse_table;
|
||||
std::set<string> conflicts;
|
||||
|
|
@ -48,11 +48,10 @@ class ParseTableBuilder {
|
|||
conflict_manager(grammar) {}
|
||||
|
||||
pair<ParseTable, const GrammarError *> build() {
|
||||
auto start_symbol = grammar.rules.empty() ? make_shared<Symbol>(0, true)
|
||||
: make_shared<Symbol>(0);
|
||||
ParseItem start_item(rules::START(), start_symbol, {});
|
||||
add_parse_state(
|
||||
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
||||
ParseItem start_item(rules::START(), 0, 0, -2);
|
||||
ParseItemSet start_item_set({ { start_item, { rules::END_OF_INPUT() } } });
|
||||
item_set_closure(&start_item_set, grammar);
|
||||
add_parse_state(start_item_set);
|
||||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
|
|
@ -105,20 +104,41 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
struct CompletionStatus {
|
||||
bool is_done;
|
||||
int precedence;
|
||||
Associativity associativity;
|
||||
};
|
||||
|
||||
CompletionStatus get_completion_status(const ParseItem &item) {
|
||||
CompletionStatus result{ false, 0, AssociativityNone };
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size()) {
|
||||
result.is_done = true;
|
||||
if (item.step_index > 0) {
|
||||
const ProductionStep &step = production[item.step_index - 1];
|
||||
result.precedence = step.precedence;
|
||||
result.associativity = step.associativity;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
|
||||
CompletionStatus completion_status = get_completion_status(item.rule);
|
||||
CompletionStatus completion_status = get_completion_status(item);
|
||||
if (completion_status.is_done) {
|
||||
ParseAction action =
|
||||
(item.lhs == rules::START())
|
||||
(item.lhs() == rules::START())
|
||||
? ParseAction::Accept()
|
||||
: ParseAction::Reduce(item.lhs, item.consumed_symbols.size(),
|
||||
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
|
||||
completion_status.precedence,
|
||||
completion_status.associativity,
|
||||
get_production_id(item.consumed_symbols));
|
||||
item.production_index);
|
||||
|
||||
for (const auto &lookahead_sym : lookahead_symbols)
|
||||
add_action(state_id, lookahead_sym, action, item_set);
|
||||
|
|
@ -157,40 +177,42 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead_sym,
|
||||
const ParseAction &action,
|
||||
ParseAction *add_action(ParseStateId state_id, Symbol lookahead,
|
||||
const ParseAction &new_action,
|
||||
const ParseItemSet &item_set) {
|
||||
auto ¤t_actions = parse_table.states[state_id].actions;
|
||||
auto current_entry = current_actions.find(lookahead_sym);
|
||||
const auto ¤t_actions = parse_table.states[state_id].actions;
|
||||
const auto ¤t_entry = current_actions.find(lookahead);
|
||||
if (current_entry == current_actions.end())
|
||||
return &parse_table.set_action(state_id, lookahead_sym, action);
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
|
||||
const ParseAction current_action = current_entry->second[0];
|
||||
const ParseAction old_action = current_entry->second[0];
|
||||
auto resolution =
|
||||
conflict_manager.resolve(action, current_action, lookahead_sym);
|
||||
conflict_manager.resolve(new_action, old_action, lookahead);
|
||||
|
||||
switch (resolution.second) {
|
||||
case ConflictTypeNone:
|
||||
if (resolution.first)
|
||||
return &parse_table.set_action(state_id, lookahead_sym, action);
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
break;
|
||||
|
||||
case ConflictTypeResolved:
|
||||
if (action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(action.production_id);
|
||||
if (current_action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(current_action.production_id);
|
||||
case ConflictTypeResolved: {
|
||||
if (resolution.first)
|
||||
return &parse_table.set_action(state_id, lookahead_sym, action);
|
||||
return &parse_table.set_action(state_id, lookahead, new_action);
|
||||
if (old_action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(production_id(old_action));
|
||||
if (new_action.type == ParseActionTypeReduce)
|
||||
parse_table.fragile_production_ids.insert(production_id(new_action));
|
||||
break;
|
||||
}
|
||||
|
||||
case ConflictTypeUnresolved: {
|
||||
set<Symbol> goal_symbols = item_set_goal_symbols(item_set);
|
||||
if (has_expected_conflict(goal_symbols))
|
||||
return &parse_table.add_action(state_id, lookahead_sym, action);
|
||||
auto old_goal_syms = goal_symbols(item_set, old_action, lookahead);
|
||||
auto new_goal_syms = goal_symbols(item_set, new_action, lookahead);
|
||||
if (has_expected_conflict(old_goal_syms, new_goal_syms))
|
||||
return &parse_table.add_action(state_id, lookahead, new_action);
|
||||
else
|
||||
conflicts.insert(conflict_description(action, current_action,
|
||||
lookahead_sym, goal_symbols));
|
||||
conflicts.insert(conflict_description(
|
||||
lookahead, old_action, old_goal_syms, new_action, new_goal_syms));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -198,9 +220,14 @@ class ParseTableBuilder {
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
bool has_expected_conflict(const set<Symbol> &symbols) {
|
||||
pair<Symbol, int> production_id(const ParseAction &action) {
|
||||
return { action.symbol, action.production_id };
|
||||
}
|
||||
|
||||
bool has_expected_conflict(set<Symbol> symbols1, const set<Symbol> &symbols2) {
|
||||
symbols1.insert(symbols2.begin(), symbols2.end());
|
||||
for (const auto &conflicting_symbols : grammar.expected_conflicts)
|
||||
if (symbols == conflicting_symbols)
|
||||
if (symbols1 == conflicting_symbols)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -209,46 +236,55 @@ class ParseTableBuilder {
|
|||
set<int> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (!item.consumed_symbols.empty()) {
|
||||
auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
|
||||
result.insert(precedence_range.min);
|
||||
result.insert(precedence_range.max);
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index > 0) {
|
||||
if (item.step_index < production.size())
|
||||
result.insert(production[item.step_index].precedence);
|
||||
else
|
||||
result.insert(production[item.step_index - 1].precedence);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> item_set_goal_symbols(const ParseItemSet &item_set) {
|
||||
set<Symbol> goal_symbols(const ParseItemSet &item_set,
|
||||
const ParseAction &action,
|
||||
const Symbol &lookahead_sym) {
|
||||
set<Symbol> result;
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (!item.consumed_symbols.empty())
|
||||
result.insert(item.lhs);
|
||||
switch (action.type) {
|
||||
case ParseActionTypeShift: {
|
||||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index < production.size() &&
|
||||
production[item.step_index].symbol == lookahead_sym)
|
||||
result.insert(item.lhs());
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
||||
case ParseActionTypeReduce:
|
||||
result.insert(action.symbol);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
string conflict_description(const ParseAction &new_action,
|
||||
string conflict_description(const Symbol &lookahead,
|
||||
const ParseAction &old_action,
|
||||
const rules::Symbol &symbol,
|
||||
const set<Symbol> &goal_symbols) const {
|
||||
string symbols_string;
|
||||
bool started = false;
|
||||
for (const auto &symbol : goal_symbols) {
|
||||
if (started)
|
||||
symbols_string += ", ";
|
||||
symbols_string += symbol_name(symbol);
|
||||
started = true;
|
||||
}
|
||||
|
||||
return "Within: " + symbols_string +
|
||||
"\n"
|
||||
"Lookahead: " +
|
||||
symbol_name(symbol) + "\n" +
|
||||
const set<Symbol> &old_goal_symbols,
|
||||
const ParseAction &new_action,
|
||||
const set<Symbol> &new_goal_symbols) const {
|
||||
return "Lookahead: " + symbol_name(lookahead) + "\n" +
|
||||
"Possible Actions:\n"
|
||||
"* " +
|
||||
action_description(old_action) + "\n" + "* " +
|
||||
action_description(new_action);
|
||||
action_description(old_action, old_goal_symbols) + "\n" + "* " +
|
||||
action_description(new_action, new_goal_symbols);
|
||||
}
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) const {
|
||||
|
|
@ -260,20 +296,31 @@ class ParseTableBuilder {
|
|||
else
|
||||
return "";
|
||||
} else if (symbol.is_token) {
|
||||
return lexical_grammar.rules[symbol.index].name;
|
||||
return lexical_grammar.variables[symbol.index].name;
|
||||
} else {
|
||||
return grammar.rules[symbol.index].name;
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
}
|
||||
|
||||
string action_description(const ParseAction &action) const {
|
||||
string action_description(const ParseAction &action,
|
||||
const set<Symbol> &goal_symbols) const {
|
||||
string symbols_string;
|
||||
bool started = false;
|
||||
for (const auto &symbol : goal_symbols) {
|
||||
if (started)
|
||||
symbols_string += ", ";
|
||||
symbols_string += symbol_name(symbol);
|
||||
started = true;
|
||||
}
|
||||
|
||||
string result;
|
||||
|
||||
switch (action.type) {
|
||||
case ParseActionTypeReduce: {
|
||||
result = "Reduce";
|
||||
for (const rules::Symbol &symbol : productions[action.production_id])
|
||||
result += " " + symbol_name(symbol);
|
||||
for (const ProductionStep &step :
|
||||
grammar.productions(action.symbol)[action.production_id])
|
||||
result += " " + symbol_name(step.symbol);
|
||||
result += " -> " + symbol_name(action.symbol);
|
||||
break;
|
||||
}
|
||||
|
|
@ -297,17 +344,6 @@ class ParseTableBuilder {
|
|||
|
||||
return result;
|
||||
}
|
||||
|
||||
size_t get_production_id(const vector<rules::Symbol> &symbols) {
|
||||
auto begin = productions.begin();
|
||||
auto end = productions.end();
|
||||
auto iter = find(begin, end, symbols);
|
||||
if (iter == end) {
|
||||
productions.push_back(symbols);
|
||||
return productions.size() - 1;
|
||||
}
|
||||
return iter - begin;
|
||||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, const GrammarError *> build_parse_table(
|
||||
|
|
|
|||
|
|
@ -8,8 +8,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
struct SyntaxGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
#include <tuple>
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -10,8 +10,8 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
struct SyntaxGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,67 +0,0 @@
|
|||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
|
||||
class FirstSymbols : public rules::RuleFn<set<Symbol>> {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
public:
|
||||
explicit FirstSymbols(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
private:
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
if (!insertion_result.second)
|
||||
return set<Symbol>();
|
||||
|
||||
set<Symbol> result({ *rule });
|
||||
if (!rule->is_token) {
|
||||
set<Symbol> &&symbols = apply(grammar->rules[rule->index].rule);
|
||||
result.insert(symbols.begin(), symbols.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Choice *rule) {
|
||||
set<Symbol> result;
|
||||
for (const auto &element : rule->elements) {
|
||||
auto &&element_symbols = apply(element);
|
||||
result.insert(element_symbols.begin(), element_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Seq *rule) {
|
||||
auto &&result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, *grammar)) {
|
||||
auto &&right_symbols = apply(rule->right);
|
||||
result.insert(right_symbols.begin(), right_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_symbols(const rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return FirstSymbols(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
#define COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
||||
#include <set>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of symbols that can appear at the beginning of a sentential
|
||||
* form derivable from a given rule in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol> first_symbols(const rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
|
@ -3,11 +3,10 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -17,45 +16,63 @@ using std::vector;
|
|||
using std::pair;
|
||||
using rules::Symbol;
|
||||
|
||||
const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
||||
const set<Symbol> &starting_lookahead_symbols,
|
||||
const SyntaxGrammar &grammar) {
|
||||
ParseItemSet result;
|
||||
void item_set_closure(ParseItemSet *item_set, const SyntaxGrammar &grammar) {
|
||||
vector<pair<ParseItem, set<Symbol>>> items_to_process;
|
||||
items_to_process.push_back({ starting_item, starting_lookahead_symbols });
|
||||
items_to_process.insert(items_to_process.end(), item_set->begin(),
|
||||
item_set->end());
|
||||
item_set->clear();
|
||||
|
||||
while (!items_to_process.empty()) {
|
||||
ParseItem item = items_to_process.back().first;
|
||||
set<Symbol> new_lookahead_symbols = items_to_process.back().second;
|
||||
items_to_process.pop_back();
|
||||
|
||||
set<Symbol> &lookahead_symbols = result[item];
|
||||
set<Symbol> &lookahead_symbols = item_set->operator[](item);
|
||||
size_t previous_size = lookahead_symbols.size();
|
||||
lookahead_symbols.insert(new_lookahead_symbols.begin(),
|
||||
new_lookahead_symbols.end());
|
||||
|
||||
if (lookahead_symbols.size() == previous_size)
|
||||
continue;
|
||||
|
||||
for (const auto &pair : sym_transitions(item.rule)) {
|
||||
const Symbol &symbol = pair.first;
|
||||
const rule_ptr &next_rule = pair.second;
|
||||
const Production &item_production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
|
||||
if (symbol.is_token || symbol.is_built_in())
|
||||
continue;
|
||||
if (item.step_index == item_production.size())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols = first_symbols(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
next_lookahead_symbols.insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
Symbol symbol = item_production[item.step_index].symbol;
|
||||
|
||||
items_to_process.push_back(
|
||||
{ ParseItem(symbol, grammar.rules[symbol.index].rule, {}),
|
||||
next_lookahead_symbols });
|
||||
if (symbol.is_token || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols;
|
||||
unsigned int next_step = item.step_index + 1;
|
||||
if (next_step == item_production.size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production[next_step].symbol });
|
||||
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
if (!next_lookahead_symbols.insert(following_symbol).second)
|
||||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
if (!production.empty())
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
if (!production.empty())
|
||||
items_to_process.push_back(
|
||||
{ ParseItem(symbol, i, 0, production[0].rule_id),
|
||||
next_lookahead_symbols });
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -1,19 +1,16 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
#define COMPILER_BUILD_TABLES_ITEM_SET_CLOSURE_H_
|
||||
|
||||
#include <set>
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
const ParseItemSet item_set_closure(const ParseItem &,
|
||||
const std::set<rules::Symbol> &,
|
||||
const SyntaxGrammar &);
|
||||
void item_set_closure(ParseItemSet *, const SyntaxGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@
|
|||
#include "compiler/build_tables/merge_transitions.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -22,20 +22,23 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
|
|||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
for (auto &transition : sym_transitions(item.rule)) {
|
||||
vector<Symbol> consumed_symbols(item.consumed_symbols);
|
||||
consumed_symbols.push_back(transition.first);
|
||||
ParseItem new_item(item.lhs, transition.second, consumed_symbols);
|
||||
merge_sym_transition<ParseItemSet>(
|
||||
&result, { transition.first,
|
||||
item_set_closure(new_item, lookahead_symbols, grammar) },
|
||||
[](ParseItemSet *left, const ParseItemSet *right) {
|
||||
for (auto &pair : *right)
|
||||
left->operator[](pair.first)
|
||||
.insert(pair.second.begin(), pair.second.end());
|
||||
});
|
||||
}
|
||||
const Production &production =
|
||||
grammar.productions(item.lhs())[item.production_index];
|
||||
if (item.step_index == production.size())
|
||||
continue;
|
||||
|
||||
const Symbol &symbol = production[item.step_index].symbol;
|
||||
unsigned int step = item.step_index + 1;
|
||||
int rule_id = step < production.size() ? production[step].rule_id : 0;
|
||||
ParseItem new_item(item.lhs(), item.production_index, step, rule_id);
|
||||
|
||||
result[symbol][new_item].insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
}
|
||||
|
||||
for (auto &pair : result)
|
||||
item_set_closure(&pair.second, grammar);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
struct SyntaxGrammar;
|
||||
|
||||
namespace rules {
|
||||
class CharacterSet;
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
|
|||
|
|
@ -1,34 +1,42 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include <string>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs, const rule_ptr rule,
|
||||
const vector<rules::Symbol> &consumed_symbols)
|
||||
: Item(lhs, rule), consumed_symbols(consumed_symbols) {}
|
||||
ParseItem::ParseItem(const Symbol &lhs, unsigned int production_index,
|
||||
unsigned int step_index, int rule_id)
|
||||
: variable_index(lhs.index),
|
||||
production_index(production_index),
|
||||
step_index(step_index),
|
||||
rule_id(rule_id) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return (lhs == other.lhs) &&
|
||||
(consumed_symbols.size() == other.consumed_symbols.size()) &&
|
||||
(rule == other.rule || rule->operator==(*other.rule));
|
||||
return (variable_index == other.variable_index) &&
|
||||
(rule_id == other.rule_id) && (step_index == other.step_index);
|
||||
}
|
||||
|
||||
bool ParseItem::operator<(const ParseItem &other) const {
|
||||
if (lhs < other.lhs)
|
||||
if (variable_index < other.variable_index)
|
||||
return true;
|
||||
if (other.lhs < lhs)
|
||||
if (variable_index > other.variable_index)
|
||||
return false;
|
||||
if (consumed_symbols.size() < other.consumed_symbols.size())
|
||||
if (step_index < other.step_index)
|
||||
return true;
|
||||
if (other.consumed_symbols.size() < consumed_symbols.size())
|
||||
if (step_index > other.step_index)
|
||||
return false;
|
||||
return rule < other.rule;
|
||||
return rule_id < other.rule_id;
|
||||
}
|
||||
|
||||
Symbol ParseItem::lhs() const {
|
||||
return Symbol(variable_index);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -10,13 +10,17 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class ParseItem : public Item {
|
||||
class ParseItem {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, rule_ptr rule,
|
||||
const std::vector<rules::Symbol> &consumed_symbols);
|
||||
ParseItem(const rules::Symbol &, unsigned int, unsigned int, int);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
bool operator<(const ParseItem &other) const;
|
||||
std::vector<rules::Symbol> consumed_symbols;
|
||||
rules::Symbol lhs() const;
|
||||
|
||||
int variable_index;
|
||||
unsigned int production_index;
|
||||
unsigned int step_index;
|
||||
int rule_id;
|
||||
};
|
||||
|
||||
typedef std::map<ParseItem, std::set<rules::Symbol>> ParseItemSet;
|
||||
|
|
@ -29,9 +33,8 @@ namespace std {
|
|||
template <>
|
||||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbols.size());
|
||||
return hash<unsigned int>()(item.variable_index) ^
|
||||
hash<int>()(item.rule_id) ^ hash<unsigned int>()(item.step_index);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
@ -12,8 +10,6 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
|
||||
class CanBeBlank : public rules::RuleFn<bool> {
|
||||
protected:
|
||||
bool apply_to(const rules::Blank *) {
|
||||
|
|
@ -40,35 +36,9 @@ class CanBeBlank : public rules::RuleFn<bool> {
|
|||
}
|
||||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
|
||||
: grammar(grammar) {}
|
||||
|
||||
private:
|
||||
using CanBeBlank::apply_to;
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
return !rule->is_token && apply(grammar->rules[rule->index].rule);
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule) {
|
||||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule, const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,13 +4,9 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
bool rule_can_be_blank(const rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rule_ptr &rule, const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
#include "compiler/prepare_grammar/prepare_grammar.h"
|
||||
#include "compiler/build_tables/build_tables.h"
|
||||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -7,7 +7,8 @@
|
|||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/lex_table.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
|
||||
|
|
@ -15,19 +16,15 @@ namespace tree_sitter {
|
|||
namespace generate_code {
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::vector;
|
||||
using util::escape_char;
|
||||
|
||||
static RuleEntry ERROR_ENTRY{
|
||||
"error", rule_ptr(), RuleEntryTypeNamed,
|
||||
};
|
||||
|
||||
static RuleEntry EOF_ENTRY{
|
||||
"end", rule_ptr(), RuleEntryTypeAuxiliary,
|
||||
};
|
||||
static Variable ERROR_ENTRY("error", VariableTypeNamed, rule_ptr());
|
||||
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
|
||||
|
||||
static const map<char, string> REPLACEMENTS({
|
||||
{ '~', "TILDE" },
|
||||
|
|
@ -149,15 +146,15 @@ class CCodeGenerator {
|
|||
for (const auto &symbol : parse_table.symbols) {
|
||||
line("[" + symbol_id(symbol) + "] = ");
|
||||
|
||||
switch (entry_for_symbol(symbol).type) {
|
||||
case RuleEntryTypeNamed:
|
||||
switch (symbol_type(symbol)) {
|
||||
case VariableTypeNamed:
|
||||
add("TSNodeTypeNamed,");
|
||||
break;
|
||||
case RuleEntryTypeAnonymous:
|
||||
case VariableTypeAnonymous:
|
||||
add("TSNodeTypeAnonymous,");
|
||||
break;
|
||||
case RuleEntryTypeHidden:
|
||||
case RuleEntryTypeAuxiliary:
|
||||
case VariableTypeHidden:
|
||||
case VariableTypeAuxiliary:
|
||||
add("TSNodeTypeHidden,");
|
||||
break;
|
||||
}
|
||||
|
|
@ -338,15 +335,18 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
RuleEntry entry = entry_for_symbol(symbol);
|
||||
string name = sanitize_name(entry.name);
|
||||
if (symbol.is_built_in())
|
||||
return "ts_builtin_sym_" + name;
|
||||
if (symbol == rules::ERROR())
|
||||
return "ts_builtin_sym_error";
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "ts_builtin_sym_end";
|
||||
|
||||
switch (entry.type) {
|
||||
case RuleEntryTypeAuxiliary:
|
||||
auto entry = entry_for_symbol(symbol);
|
||||
string name = sanitize_name(entry.first);
|
||||
|
||||
switch (entry.second) {
|
||||
case VariableTypeAuxiliary:
|
||||
return "aux_sym_" + name;
|
||||
case RuleEntryTypeAnonymous:
|
||||
case VariableTypeAnonymous:
|
||||
return "anon_sym_" + name;
|
||||
default:
|
||||
return "sym_" + name;
|
||||
|
|
@ -358,26 +358,30 @@ class CCodeGenerator {
|
|||
return "ERROR";
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "END";
|
||||
return entry_for_symbol(symbol).name;
|
||||
return entry_for_symbol(symbol).first;
|
||||
}
|
||||
|
||||
const RuleEntry &entry_for_symbol(const rules::Symbol &symbol) {
|
||||
VariableType symbol_type(const rules::Symbol &symbol) {
|
||||
if (symbol == rules::ERROR())
|
||||
return ERROR_ENTRY;
|
||||
return VariableTypeNamed;
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return EOF_ENTRY;
|
||||
if (symbol.is_token)
|
||||
return lexical_grammar.rules[symbol.index];
|
||||
else
|
||||
return syntax_grammar.rules[symbol.index];
|
||||
return VariableTypeHidden;
|
||||
return entry_for_symbol(symbol).second;
|
||||
}
|
||||
|
||||
string rule_name(const rules::Symbol &symbol) {
|
||||
return entry_for_symbol(symbol).name;
|
||||
pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol.is_token) {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
} else {
|
||||
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
}
|
||||
|
||||
bool reduce_action_is_fragile(const ParseAction &action) const {
|
||||
return parse_table.fragile_production_ids.find(action.production_id) !=
|
||||
return parse_table.fragile_production_ids.find(
|
||||
{ action.symbol, action.production_id }) !=
|
||||
parse_table.fragile_production_ids.end();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,10 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
struct SyntaxGrammar;
|
||||
class LexTable;
|
||||
class LexicalGrammar;
|
||||
class ParseTable;
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace generate_code {
|
||||
|
||||
|
|
|
|||
19
src/compiler/lexical_grammar.h
Normal file
19
src/compiler/lexical_grammar.h
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
#ifndef COMPILER_LEXICAL_GRAMMAR_H_
|
||||
#define COMPILER_LEXICAL_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar {
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> separators;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_LEXICAL_GRAMMAR_H_
|
||||
|
|
@ -60,7 +60,7 @@ ParseAction ParseAction::ReduceExtra(Symbol symbol) {
|
|||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence, Associativity associativity,
|
||||
int production_id) {
|
||||
unsigned int production_id) {
|
||||
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
|
||||
{ precedence }, associativity, production_id);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,7 +35,8 @@ class ParseAction {
|
|||
static ParseAction Shift(ParseStateId state_index,
|
||||
std::set<int> precedence_values);
|
||||
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
int precedence, Associativity, int production_id);
|
||||
int precedence, Associativity,
|
||||
unsigned int production_id);
|
||||
static ParseAction ShiftExtra();
|
||||
static ParseAction ReduceExtra(rules::Symbol symbol);
|
||||
bool operator==(const ParseAction &) const;
|
||||
|
|
@ -87,7 +88,7 @@ class ParseTable {
|
|||
|
||||
std::vector<ParseState> states;
|
||||
std::set<rules::Symbol> symbols;
|
||||
std::set<int> fragile_production_ids;
|
||||
std::set<std::pair<rules::Symbol, unsigned int>> fragile_production_ids;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -42,12 +42,10 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
rule_name + string("_repeat") + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back({
|
||||
helper_rule_name,
|
||||
aux_rules.push_back(Variable(
|
||||
helper_rule_name, VariableTypeAuxiliary,
|
||||
Seq::build({ inner_rule, Choice::build({ repeat_symbol.copy(),
|
||||
make_shared<Blank>() }) }),
|
||||
RuleEntryTypeAuxiliary,
|
||||
});
|
||||
make_shared<Blank>() }) })));
|
||||
return repeat_symbol.copy();
|
||||
}
|
||||
|
||||
|
|
@ -64,21 +62,21 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
return apply(rule);
|
||||
}
|
||||
|
||||
vector<RuleEntry> aux_rules;
|
||||
vector<Variable> aux_rules;
|
||||
};
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
SyntaxGrammar result;
|
||||
result.rules = grammar.rules;
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
||||
InitialSyntaxGrammar result;
|
||||
result.variables = grammar.variables;
|
||||
result.ubiquitous_tokens = grammar.ubiquitous_tokens;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
|
||||
ExpandRepeats expander(result.rules.size());
|
||||
for (auto &rule_entry : result.rules)
|
||||
rule_entry.rule = expander.expand(rule_entry.rule, rule_entry.name);
|
||||
ExpandRepeats expander(result.variables.size());
|
||||
for (auto &variable : result.variables)
|
||||
variable.rule = expander.expand(variable.rule, variable.name);
|
||||
|
||||
result.rules.insert(result.rules.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
result.variables.insert(result.variables.end(), expander.aux_rules.begin(),
|
||||
expander.aux_rules.end());
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -4,12 +4,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
struct InitialSyntaxGrammar;
|
||||
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -3,7 +3,7 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include <map>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/string.h"
|
||||
|
|
@ -68,11 +68,11 @@ pair<LexicalGrammar, const GrammarError *> expand_tokens(
|
|||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (auto &entry : grammar.rules) {
|
||||
auto rule = expander.apply(entry.rule);
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
auto rule = expander.apply(variable.rule);
|
||||
if (expander.error)
|
||||
return { result, expander.error };
|
||||
result.rules.push_back({ entry.name, rule, entry.type });
|
||||
result.variables.push_back(Variable(variable.name, variable.type, rule));
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
class LexicalGrammar;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
|
|
|
|||
57
src/compiler/prepare_grammar/extract_choices.cc
Normal file
57
src/compiler/prepare_grammar/extract_choices.cc
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
|
||||
class ExtractChoices : public rules::RuleFn<vector<rule_ptr>> {
|
||||
vector<rule_ptr> default_apply(const Rule *rule) {
|
||||
return vector<rule_ptr>({ rule->copy() });
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto left_entry : apply(rule->left))
|
||||
for (auto right_entry : apply(rule->right))
|
||||
result.push_back(rules::Seq::build({ left_entry, right_entry }));
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto entry : apply(rule->rule))
|
||||
result.push_back(make_shared<rules::Metadata>(entry, rule->value));
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto element : rule->elements)
|
||||
for (auto entry : apply(element))
|
||||
result.push_back(entry);
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto element : apply(rule->content))
|
||||
result.push_back(make_shared<rules::Repeat>(element));
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<rule_ptr> extract_choices(const rule_ptr &rule) {
|
||||
return ExtractChoices().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
15
src/compiler/prepare_grammar/extract_choices.h
Normal file
15
src/compiler/prepare_grammar/extract_choices.h
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::vector<rule_ptr> extract_choices(const rule_ptr &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
|
@ -5,7 +5,8 @@
|
|||
#include <string>
|
||||
#include <tuple>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/string.h"
|
||||
|
|
@ -56,7 +57,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to_token(const Rule *input, RuleEntryType entry_type) {
|
||||
rule_ptr apply_to_token(const Rule *input, VariableType entry_type) {
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].rule->operator==(*input)) {
|
||||
token_usage_counts[i]++;
|
||||
|
|
@ -65,31 +66,29 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr rule = input->copy();
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back({
|
||||
token_description(rule), rule, entry_type,
|
||||
});
|
||||
tokens.push_back(Variable(token_description(rule), entry_type, rule));
|
||||
token_usage_counts.push_back(1);
|
||||
return make_shared<Symbol>(index, true);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::String *rule) {
|
||||
return apply_to_token(rule, RuleEntryTypeAnonymous);
|
||||
return apply_to_token(rule, VariableTypeAnonymous);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Pattern *rule) {
|
||||
return apply_to_token(rule, RuleEntryTypeAuxiliary);
|
||||
return apply_to_token(rule, VariableTypeAuxiliary);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
if (rule->value_for(rules::IS_TOKEN) > 0)
|
||||
return apply_to_token(rule->rule.get(), RuleEntryTypeAuxiliary);
|
||||
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary);
|
||||
else
|
||||
return rules::IdentityRuleFn::apply_to(rule);
|
||||
}
|
||||
|
||||
public:
|
||||
vector<size_t> token_usage_counts;
|
||||
vector<RuleEntry> tokens;
|
||||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
static const GrammarError *ubiq_token_err(const string &message) {
|
||||
|
|
@ -97,9 +96,9 @@ static const GrammarError *ubiq_token_err(const string &message) {
|
|||
"Not a token: " + message);
|
||||
}
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const InternedGrammar &grammar) {
|
||||
SyntaxGrammar syntax_grammar;
|
||||
InitialSyntaxGrammar syntax_grammar;
|
||||
LexicalGrammar lexical_grammar;
|
||||
SymbolReplacer symbol_replacer;
|
||||
TokenExtractor extractor;
|
||||
|
|
@ -107,31 +106,30 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
/*
|
||||
* First, extract all of the grammar's tokens into the lexical grammar.
|
||||
*/
|
||||
vector<RuleEntry> processed_rules;
|
||||
for (const RuleEntry &entry : grammar.rules)
|
||||
processed_rules.push_back({
|
||||
entry.name, extractor.apply(entry.rule), entry.type,
|
||||
});
|
||||
lexical_grammar.rules = extractor.tokens;
|
||||
vector<Variable> processed_variables;
|
||||
for (const Variable &variable : grammar.variables)
|
||||
processed_variables.push_back(
|
||||
Variable(variable.name, variable.type, extractor.apply(variable.rule)));
|
||||
lexical_grammar.variables = extractor.tokens;
|
||||
|
||||
/*
|
||||
* If a rule's entire content was extracted as a token and that token didn't
|
||||
* appear within any other rule, then remove that rule from the syntax
|
||||
* If a variable's entire rule was extracted as a token and that token didn't
|
||||
* appear within any other rule, then remove that variable from the syntax
|
||||
* grammar, giving its name to the token in the lexical grammar. Any symbols
|
||||
* that pointed to that rule will need to be updated to point to the rule in
|
||||
* the lexical grammar. Symbols that pointed to later rules will need to have
|
||||
* their indices decremented.
|
||||
* that pointed to that variable will need to be updated to point to the
|
||||
* variable in the lexical grammar. Symbols that pointed to later variables
|
||||
* will need to have their indices decremented.
|
||||
*/
|
||||
size_t i = 0;
|
||||
for (const RuleEntry &entry : processed_rules) {
|
||||
auto symbol = dynamic_pointer_cast<const Symbol>(entry.rule);
|
||||
for (const Variable &variable : processed_variables) {
|
||||
auto symbol = dynamic_pointer_cast<const Symbol>(variable.rule);
|
||||
if (symbol.get() && symbol->is_token && !symbol->is_built_in() &&
|
||||
extractor.token_usage_counts[symbol->index] == 1) {
|
||||
lexical_grammar.rules[symbol->index].type = entry.type;
|
||||
lexical_grammar.rules[symbol->index].name = entry.name;
|
||||
lexical_grammar.variables[symbol->index].type = variable.type;
|
||||
lexical_grammar.variables[symbol->index].name = variable.name;
|
||||
symbol_replacer.replacements.insert({ Symbol(i), *symbol });
|
||||
} else {
|
||||
syntax_grammar.rules.push_back(entry);
|
||||
syntax_grammar.variables.push_back(variable);
|
||||
}
|
||||
i++;
|
||||
}
|
||||
|
|
@ -139,14 +137,14 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
/*
|
||||
* Perform any replacements of symbols needed based on the previous step.
|
||||
*/
|
||||
for (RuleEntry &entry : syntax_grammar.rules)
|
||||
entry.rule = symbol_replacer.apply(entry.rule);
|
||||
for (Variable &variable : syntax_grammar.variables)
|
||||
variable.rule = symbol_replacer.apply(variable.rule);
|
||||
|
||||
for (auto &symbol_set : grammar.expected_conflicts) {
|
||||
set<Symbol> new_symbol_set;
|
||||
for (const Symbol &symbol : symbol_set)
|
||||
new_symbol_set.insert(symbol_replacer.replace_symbol(symbol));
|
||||
syntax_grammar.expected_conflicts.insert(new_symbol_set);
|
||||
for (const ConflictSet &conflict_set : grammar.expected_conflicts) {
|
||||
ConflictSet new_conflict_set;
|
||||
for (const Symbol &symbol : conflict_set)
|
||||
new_conflict_set.insert(symbol_replacer.replace_symbol(symbol));
|
||||
syntax_grammar.expected_conflicts.insert(new_conflict_set);
|
||||
}
|
||||
|
||||
/*
|
||||
|
|
@ -171,7 +169,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
if (!new_symbol.is_token)
|
||||
return make_tuple(
|
||||
syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(syntax_grammar.rules[new_symbol.index].name));
|
||||
ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
|
||||
|
||||
syntax_grammar.ubiquitous_tokens.insert(new_symbol);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -3,18 +3,15 @@
|
|||
|
||||
#include <tuple>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class Grammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const InternedGrammar &);
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *>
|
||||
extract_tokens(const InternedGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
154
src/compiler/prepare_grammar/flatten_grammar.cc
Normal file
154
src/compiler/prepare_grammar/flatten_grammar.cc
Normal file
|
|
@ -0,0 +1,154 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::find;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
|
||||
class FlattenRule : public rules::RuleFn<void> {
|
||||
public:
|
||||
bool has_pending_precedence;
|
||||
int pending_precedence;
|
||||
vector<int> precedence_stack;
|
||||
bool has_pending_associativity;
|
||||
Associativity pending_associativity;
|
||||
vector<Associativity> associativity_stack;
|
||||
Production production;
|
||||
|
||||
FlattenRule()
|
||||
: has_pending_precedence(false),
|
||||
pending_precedence(0),
|
||||
has_pending_associativity(false),
|
||||
pending_associativity(AssociativityNone) {}
|
||||
|
||||
void apply_to(const rules::Symbol *sym) {
|
||||
production.push_back(
|
||||
ProductionStep(*sym, current_precedence(), current_associativity()));
|
||||
|
||||
if (has_pending_precedence) {
|
||||
precedence_stack.push_back(pending_precedence);
|
||||
has_pending_precedence = false;
|
||||
}
|
||||
if (has_pending_associativity) {
|
||||
associativity_stack.push_back(pending_associativity);
|
||||
has_pending_associativity = false;
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *metadata) {
|
||||
int precedence = metadata->value_for(rules::PRECEDENCE);
|
||||
int associativity = metadata->value_for(rules::ASSOCIATIVITY);
|
||||
|
||||
if (precedence != 0) {
|
||||
pending_precedence = precedence;
|
||||
has_pending_precedence = true;
|
||||
}
|
||||
|
||||
if (associativity != 0) {
|
||||
pending_associativity = static_cast<Associativity>(associativity);
|
||||
has_pending_associativity = true;
|
||||
}
|
||||
|
||||
apply(metadata->rule);
|
||||
|
||||
if (precedence != 0)
|
||||
precedence_stack.pop_back();
|
||||
|
||||
if (associativity != 0)
|
||||
associativity_stack.pop_back();
|
||||
}
|
||||
|
||||
void apply_to(const rules::Seq *seq) {
|
||||
apply(seq->left);
|
||||
apply(seq->right);
|
||||
}
|
||||
|
||||
private:
|
||||
int current_precedence() {
|
||||
if (precedence_stack.empty())
|
||||
return 0;
|
||||
else
|
||||
return precedence_stack.back();
|
||||
}
|
||||
|
||||
Associativity current_associativity() {
|
||||
if (associativity_stack.empty())
|
||||
return AssociativityNone;
|
||||
else
|
||||
return associativity_stack.back();
|
||||
}
|
||||
};
|
||||
|
||||
Production flatten_rule(const rule_ptr &rule) {
|
||||
FlattenRule flattener;
|
||||
flattener.apply(rule);
|
||||
return flattener.production;
|
||||
}
|
||||
|
||||
struct ProductionSlice {
|
||||
vector<ProductionStep>::const_iterator start;
|
||||
vector<ProductionStep>::const_iterator end;
|
||||
|
||||
bool operator==(const ProductionSlice &other) const {
|
||||
if (end - start != other.end - other.start)
|
||||
return false;
|
||||
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
|
||||
if (!(iter1->symbol == iter2->symbol &&
|
||||
iter1->precedence == iter2->precedence &&
|
||||
iter1->associativity == iter2->associativity))
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void assign_rule_ids(Production *production,
|
||||
vector<ProductionSlice> *unique_slices) {
|
||||
auto end = production->end();
|
||||
|
||||
for (auto iter = production->begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{ iter, end };
|
||||
auto existing_id =
|
||||
find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
iter->rule_id = unique_slices->size();
|
||||
} else {
|
||||
iter->rule_id = existing_id - unique_slices->cbegin() + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
SyntaxGrammar result;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.ubiquitous_tokens = grammar.ubiquitous_tokens;
|
||||
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
vector<Production> productions;
|
||||
for (const rule_ptr &rule_component : extract_choices(variable.rule))
|
||||
productions.push_back(flatten_rule(rule_component));
|
||||
result.variables.push_back(
|
||||
SyntaxVariable(variable.name, variable.type, productions));
|
||||
}
|
||||
|
||||
vector<ProductionSlice> unique_slices;
|
||||
for (SyntaxVariable &variable : result.variables)
|
||||
for (Production &production : variable.productions)
|
||||
assign_rule_ids(&production, &unique_slices);
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
13
src/compiler/prepare_grammar/flatten_grammar.h
Normal file
13
src/compiler/prepare_grammar/flatten_grammar.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar;
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
24
src/compiler/prepare_grammar/initial_syntax_grammar.h
Normal file
24
src/compiler/prepare_grammar/initial_syntax_grammar.h
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
#ifndef COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
#define COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InitialSyntaxGrammar {
|
||||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
|
@ -56,10 +56,9 @@ pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &gramma
|
|||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
|
||||
result.rules.push_back({
|
||||
pair.first, new_rule,
|
||||
pair.first[0] == '_' ? RuleEntryTypeHidden : RuleEntryTypeNamed,
|
||||
});
|
||||
result.variables.push_back(Variable(
|
||||
pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
new_rule));
|
||||
}
|
||||
|
||||
for (auto &rule : grammar.ubiquitous_tokens()) {
|
||||
|
|
|
|||
|
|
@ -7,9 +7,6 @@
|
|||
#include "compiler/prepare_grammar/interned_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class Grammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::pair<InternedGrammar, const GrammarError *> intern_symbols(const Grammar &);
|
||||
|
|
|
|||
|
|
@ -5,15 +5,16 @@
|
|||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
struct InternedGrammar {
|
||||
std::vector<RuleEntry> rules;
|
||||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> ubiquitous_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -1,10 +1,12 @@
|
|||
#include "compiler/prepare_grammar/prepare_grammar.h"
|
||||
#include <tuple>
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -28,7 +30,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
// Replace `Repeat` rules with pairs of recursive rules
|
||||
SyntaxGrammar syntax_grammar = expand_repeats(get<0>(extract_result));
|
||||
InitialSyntaxGrammar syntax_grammar = expand_repeats(get<0>(extract_result));
|
||||
|
||||
// Expand `String` and `Pattern` rules into full rule trees
|
||||
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
|
|
@ -37,7 +39,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(syntax_grammar, lex_grammar, nullptr);
|
||||
return make_tuple(flatten_grammar(syntax_grammar), lex_grammar, nullptr);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -2,7 +2,8 @@
|
|||
#define COMPILER_PREPARE_GRAMMAR_PREPARE_GRAMMAR_H_
|
||||
|
||||
#include <tuple>
|
||||
#include "compiler/prepared_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
|
|||
|
|
@ -1,40 +0,0 @@
|
|||
#ifndef COMPILER_PREPARED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
enum RuleEntryType {
|
||||
RuleEntryTypeNamed,
|
||||
RuleEntryTypeAnonymous,
|
||||
RuleEntryTypeHidden,
|
||||
RuleEntryTypeAuxiliary,
|
||||
};
|
||||
|
||||
struct RuleEntry {
|
||||
std::string name;
|
||||
rule_ptr rule;
|
||||
RuleEntryType type;
|
||||
};
|
||||
|
||||
class SyntaxGrammar {
|
||||
public:
|
||||
std::vector<RuleEntry> rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
};
|
||||
|
||||
class LexicalGrammar {
|
||||
public:
|
||||
std::vector<RuleEntry> rules;
|
||||
std::vector<rule_ptr> separators;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
|
@ -15,5 +15,9 @@ Symbol START() {
|
|||
return Symbol(-3);
|
||||
}
|
||||
|
||||
Symbol NONE() {
|
||||
return Symbol(-4);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace rules {
|
|||
Symbol ERROR();
|
||||
Symbol END_OF_INPUT();
|
||||
Symbol START();
|
||||
Symbol NONE();
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -130,6 +130,79 @@ class RuleFn : private Visitor {
|
|||
T value_;
|
||||
};
|
||||
|
||||
template <>
|
||||
class RuleFn<void> : private Visitor {
|
||||
public:
|
||||
void apply(const rule_ptr &rule) {
|
||||
rule->accept(this);
|
||||
}
|
||||
|
||||
protected:
|
||||
virtual void default_apply(const Rule *rule) {}
|
||||
|
||||
virtual void apply_to(const Blank *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const CharacterSet *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Choice *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Metadata *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Pattern *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Repeat *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Seq *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const String *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const NamedSymbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const CharacterSet *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Choice *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Metadata *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Pattern *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Repeat *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Seq *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const String *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const NamedSymbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const Symbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
protected:
|
||||
virtual rule_ptr default_apply(const Rule *rule);
|
||||
|
|
|
|||
63
src/compiler/syntax_grammar.cc
Normal file
63
src/compiler/syntax_grammar.cc
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS_TOKEN_ONLY({
|
||||
Production({ ProductionStep(rules::Symbol(0, true), 0, AssociativityNone) }),
|
||||
});
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS({
|
||||
Production({ ProductionStep(rules::Symbol(0), 0, AssociativityNone) }),
|
||||
});
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS({});
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
: name(name), productions(productions), type(type) {}
|
||||
|
||||
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
||||
Associativity associativity)
|
||||
: symbol(symbol),
|
||||
precedence(precedence),
|
||||
associativity(associativity),
|
||||
rule_id(0) {}
|
||||
|
||||
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
||||
Associativity associativity, int rule_id)
|
||||
: symbol(symbol),
|
||||
precedence(precedence),
|
||||
associativity(associativity),
|
||||
rule_id(rule_id) {}
|
||||
|
||||
bool ProductionStep::operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
rule_id == other.rule_id && associativity == other.associativity;
|
||||
}
|
||||
|
||||
const vector<Production> &SyntaxGrammar::productions(
|
||||
const rules::Symbol &symbol) const {
|
||||
if (symbol == rules::START()) {
|
||||
if (variables.empty())
|
||||
return START_PRODUCTIONS_TOKEN_ONLY;
|
||||
else
|
||||
return START_PRODUCTIONS;
|
||||
} else if (symbol.is_built_in() || symbol.is_token) {
|
||||
return NO_PRODUCTIONS;
|
||||
} else {
|
||||
return variables[symbol.index].productions;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
47
src/compiler/syntax_grammar.h
Normal file
47
src/compiler/syntax_grammar.h
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
#ifndef COMPILER_PREPARED_GRAMMAR_H_
|
||||
#define COMPILER_PREPARED_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ProductionStep {
|
||||
ProductionStep(const rules::Symbol &, int, Associativity);
|
||||
ProductionStep(const rules::Symbol &, int, Associativity, int);
|
||||
bool operator==(const ProductionStep &) const;
|
||||
|
||||
rules::Symbol symbol;
|
||||
int precedence;
|
||||
Associativity associativity;
|
||||
int rule_id;
|
||||
};
|
||||
|
||||
typedef std::vector<ProductionStep> Production;
|
||||
|
||||
struct SyntaxVariable {
|
||||
SyntaxVariable(const std::string &, VariableType,
|
||||
const std::vector<Production> &);
|
||||
|
||||
std::string name;
|
||||
std::vector<Production> productions;
|
||||
VariableType type;
|
||||
};
|
||||
|
||||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
|
||||
struct SyntaxGrammar {
|
||||
const std::vector<Production> &productions(const rules::Symbol &) const;
|
||||
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARED_GRAMMAR_H_
|
||||
11
src/compiler/variable.cc
Normal file
11
src/compiler/variable.cc
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
#include "compiler/variable.h"
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
|
||||
Variable::Variable(const string &name, VariableType type, const rule_ptr &rule)
|
||||
: name(name), rule(rule), type(type) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
26
src/compiler/variable.h
Normal file
26
src/compiler/variable.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef COMPILER_VARIABLE_H_
|
||||
#define COMPILER_VARIABLE_H_
|
||||
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
enum VariableType {
|
||||
VariableTypeHidden,
|
||||
VariableTypeAuxiliary,
|
||||
VariableTypeAnonymous,
|
||||
VariableTypeNamed,
|
||||
};
|
||||
|
||||
struct Variable {
|
||||
Variable(const std::string &, VariableType, const rule_ptr &);
|
||||
|
||||
std::string name;
|
||||
rule_ptr rule;
|
||||
VariableType type;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_VARIABLE_H_
|
||||
Loading…
Add table
Add a link
Reference in a new issue