Separate syntax rules into flat lists of symbols
This way, every ParseItem can be associated with a particular production for its non-terminal. That lets us keep track of which productions are involved in shift/reduce conflicts.
This commit is contained in:
parent
68a0e16d1e
commit
52daffb3f3
37 changed files with 842 additions and 426 deletions
|
|
@ -15,7 +15,6 @@
|
|||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/first_symbols.cc',
|
||||
'src/compiler/build_tables/get_metadata.cc',
|
||||
'src/compiler/build_tables/item.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
|
|
@ -33,12 +32,15 @@
|
|||
'src/compiler/parse_table.cc',
|
||||
'src/compiler/prepare_grammar/expand_repeats.cc',
|
||||
'src/compiler/prepare_grammar/expand_tokens.cc',
|
||||
'src/compiler/prepare_grammar/extract_choices.cc',
|
||||
'src/compiler/prepare_grammar/extract_tokens.cc',
|
||||
'src/compiler/prepare_grammar/flatten_grammar.cc',
|
||||
'src/compiler/prepare_grammar/intern_symbols.cc',
|
||||
'src/compiler/prepare_grammar/is_token.cc',
|
||||
'src/compiler/prepare_grammar/parse_regex.cc',
|
||||
'src/compiler/prepare_grammar/prepare_grammar.cc',
|
||||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/prepare_grammar/initial_syntax_grammar.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/rules/blank.cc',
|
||||
'src/compiler/rules/built_in_symbols.cc',
|
||||
|
|
|
|||
|
|
@ -2,7 +2,6 @@
|
|||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/action_takes_precedence.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
|
|
|||
|
|
@ -12,11 +12,11 @@ describe("build_conflict", []() {
|
|||
Conflict conflict("");
|
||||
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "in_progress_rule1", i_token(0) },
|
||||
{ "in_progress_rule2", i_token(0) },
|
||||
{ "reduced_rule", i_token(0) },
|
||||
{ "other_ruel1", i_token(0) },
|
||||
{ "other_rule2", i_token(0) },
|
||||
{ "in_progress_rule1", {} },
|
||||
{ "in_progress_rule2", {} },
|
||||
{ "reduced_rule", {} },
|
||||
{ "other_ruel1", {} },
|
||||
{ "other_rule2", {} },
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
|
|
@ -30,15 +30,15 @@ describe("build_conflict", []() {
|
|||
ParseAction::Reduce(Symbol(2), 1, 0), // reduced_rule
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), blank(), 2), // in_progress_rule1
|
||||
ParseItem(Symbol(0), 0, 0, 2), // in_progress_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), blank(), 2), // in_progress_rule2
|
||||
ParseItem(Symbol(1), 0, 0, 2), // in_progress_rule2
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(3), blank(), 0), // other_rule1
|
||||
ParseItem(Symbol(3), 0, 0, 0), // other_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
}),
|
||||
|
|
@ -58,11 +58,11 @@ describe("build_conflict", []() {
|
|||
ParseAction::Shift(2, set<int>()),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), blank(), 2), // in_progress_rule1
|
||||
ParseItem(Symbol(0), 0, 0, 2), // in_progress_rule1
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), blank(), 2), // in_progress_rule2
|
||||
ParseItem(Symbol(1), 0, 0, 2), // in_progress_rule2
|
||||
set<Symbol>({ Symbol(2, SymbolOptionToken) })
|
||||
},
|
||||
}),
|
||||
|
|
|
|||
|
|
@ -12,9 +12,25 @@ START_TEST
|
|||
|
||||
describe("build_parse_table", []() {
|
||||
SyntaxGrammar parse_grammar({
|
||||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
{
|
||||
"rule0",
|
||||
{
|
||||
Production({ {Symbol(1), 0, 1} }, 0),
|
||||
Production({ {Symbol(2), 0, 2} }, 0)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
{
|
||||
Production({ {Symbol(0, SymbolOptionToken), 0, 3} }, 0)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
{
|
||||
Production({ {Symbol(1, SymbolOptionToken), 0, 4} }, 0)
|
||||
}
|
||||
},
|
||||
}, {}, { Symbol(2, SymbolOptionToken) });
|
||||
|
||||
LexicalGrammar lex_grammar({
|
||||
|
|
|
|||
|
|
@ -1,103 +0,0 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
using namespace build_tables;
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("first_symbols", []() {
|
||||
const SyntaxGrammar null_grammar;
|
||||
|
||||
describe("for a sequence AB", [&]() {
|
||||
it("ignores B when A cannot be blank", [&]() {
|
||||
auto rule = seq({ i_token(0), i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
choice({
|
||||
i_token(0),
|
||||
blank() }),
|
||||
i_token(1) });
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(A's right hand side) when A is a non-terminal", [&]() {
|
||||
auto rule = choice({
|
||||
seq({
|
||||
i_token(0),
|
||||
i_token(1) }),
|
||||
i_sym(0) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(2),
|
||||
i_token(3),
|
||||
i_token(4) }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(2, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
|
||||
it("includes first_symbols(B) when A is a non-terminal and its expansion can be blank", [&]() {
|
||||
auto rule = seq({
|
||||
i_sym(0),
|
||||
i_token(1) });
|
||||
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
i_token(0),
|
||||
blank() }) }
|
||||
}, {});
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(0, SymbolOptionToken),
|
||||
Symbol(1, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there are left-recursive rules", [&]() {
|
||||
it("terminates", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "rule0", choice({
|
||||
seq({ i_sym(0), i_token(10) }),
|
||||
i_token(11),
|
||||
}) },
|
||||
}, {});
|
||||
|
||||
auto rule = i_sym(0);
|
||||
|
||||
AssertThat(first_symbols(rule, grammar), Equals(set<Symbol>({
|
||||
Symbol(0),
|
||||
Symbol(11, SymbolOptionToken)
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
it("ignores metadata rules", [&]() {
|
||||
auto rule = make_shared<Metadata>(i_token(3), map<rules::MetadataKey, int>());
|
||||
|
||||
AssertThat(first_symbols(rule, null_grammar), Equals(set<Symbol>({
|
||||
Symbol(3, SymbolOptionToken),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -10,30 +10,61 @@ START_TEST
|
|||
|
||||
describe("item_set_closure", []() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "E", seq({
|
||||
i_sym(1),
|
||||
i_token(11) }) },
|
||||
{ "T", seq({
|
||||
i_token(12),
|
||||
i_token(13) }) },
|
||||
}, {});
|
||||
{
|
||||
"rule0",
|
||||
{
|
||||
Production({
|
||||
{Symbol(1), 0, 100},
|
||||
{Symbol(11, SymbolOptionToken), 0, 101}
|
||||
}, 107),
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule1",
|
||||
{
|
||||
Production({
|
||||
{Symbol(12, SymbolOptionToken), 0, 102},
|
||||
{Symbol(13, SymbolOptionToken), 0, 103}
|
||||
}, 108),
|
||||
Production({
|
||||
{Symbol(2), 0, 104},
|
||||
}, 109)
|
||||
}
|
||||
},
|
||||
{
|
||||
"rule2",
|
||||
{
|
||||
Production({
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
{Symbol(15, SymbolOptionToken), 0, 106}
|
||||
}, 110)
|
||||
}
|
||||
},
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
ParseItemSet item_set = item_set_closure(
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
grammar
|
||||
);
|
||||
ParseItem(Symbol(0), 0, 100, 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
grammar);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(1), grammar.rule(Symbol(1)), 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 100, 0),
|
||||
set<Symbol>({ Symbol(10, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 102, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 1, 104, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), 0, 105, 0),
|
||||
set<Symbol>({ Symbol(11, SymbolOptionToken) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -12,63 +12,79 @@ describe("char_transitions(LexItemSet)", []() {
|
|||
describe("when two items in the set have transitions on the same character", [&]() {
|
||||
it("merges the transitions by computing the union of the two item sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
LexItem(Symbol(1), CharacterSet().include('a', 'f').copy()),
|
||||
LexItem(Symbol(2), CharacterSet().include('e', 'x').copy())
|
||||
});
|
||||
|
||||
AssertThat(char_transitions(set1), Equals(map<CharacterSet, LexItemSet>({
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('a', 'd'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('e', 'f'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
{
|
||||
CharacterSet().include('g', 'x'),
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("sym_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", blank() },
|
||||
{ "B", i_token(21) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
describe("sym_transitions(ParseItemSet, InitialSyntaxGrammar)", [&]() {
|
||||
it("computes the closure of the new item sets", [&]() {
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), seq({ i_token(22), i_sym(1) }), 3),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
SyntaxGrammar grammar({
|
||||
{
|
||||
"A", {
|
||||
Production({
|
||||
{Symbol(11, SymbolOptionToken), 0, 101},
|
||||
{Symbol(12, SymbolOptionToken), 0, 102},
|
||||
{Symbol(13, SymbolOptionToken), 0, 103},
|
||||
{Symbol(1), 0, 104},
|
||||
{Symbol(14, SymbolOptionToken), 0, 105},
|
||||
}, 1)
|
||||
},
|
||||
},
|
||||
{
|
||||
"B", {
|
||||
Production({
|
||||
{Symbol(15, SymbolOptionToken), 0, 106},
|
||||
}, 2)
|
||||
},
|
||||
}
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
ParseItemSet set1({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 103, 2),
|
||||
set<Symbol>({ Symbol(16, SymbolOptionToken) })
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(sym_transitions(set1, grammar), Equals(map<Symbol, ParseItemSet>({
|
||||
{
|
||||
Symbol(22, SymbolOptionToken),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), i_sym(1), 4),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) }),
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), i_token(21), 0),
|
||||
set<Symbol>({ Symbol(23, SymbolOptionToken) })
|
||||
},
|
||||
})
|
||||
},
|
||||
{
|
||||
Symbol(13, SymbolOptionToken),
|
||||
ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), 0, 104, 3),
|
||||
set<Symbol>({ Symbol(16, SymbolOptionToken) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), 0, 106, 0),
|
||||
set<Symbol>({ Symbol(14, SymbolOptionToken) })
|
||||
},
|
||||
})
|
||||
},
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
using namespace rules;
|
||||
using build_tables::rule_can_be_blank;
|
||||
|
|
@ -54,27 +53,6 @@ describe("rule_can_be_blank", [&]() {
|
|||
rule = make_shared<rules::Metadata>(sym("one"), map<rules::MetadataKey, int>());
|
||||
AssertThat(rule_can_be_blank(rule), IsFalse());
|
||||
});
|
||||
|
||||
describe("checking recursively (by expanding non-terminals)", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
{ "A", choice({
|
||||
seq({ i_sym(0), i_token(11) }),
|
||||
blank() }) },
|
||||
{ "B", choice({
|
||||
seq({ i_sym(1), i_token(12) }),
|
||||
i_token(13) }) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
it("terminates for left-recursive rules that can be blank", [&]() {
|
||||
rule = i_sym(0);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsTrue());
|
||||
});
|
||||
|
||||
it("terminates for left-recursive rules that can't be blank", [&]() {
|
||||
rule = i_sym(1);
|
||||
AssertThat(rule_can_be_blank(rule, grammar), IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -15,6 +15,15 @@ using std::initializer_list;
|
|||
using std::pair;
|
||||
using tree_sitter::rules::rule_ptr;
|
||||
|
||||
template<typename T, typename Func>
|
||||
std::vector<typename std::result_of<Func(T)>::type>
|
||||
collect(const std::vector<T> &v, Func f) {
|
||||
vector<typename std::result_of<Func(T)>::type> result;
|
||||
for (const T &item : v)
|
||||
result.push_back(f(item));
|
||||
return result;
|
||||
}
|
||||
|
||||
template<typename K>
|
||||
class rule_map : public map<K, rule_ptr> {
|
||||
public:
|
||||
|
|
|
|||
|
|
@ -36,8 +36,8 @@ inline std::ostream& operator<<(std::ostream &stream, const std::set<T> &set) {
|
|||
return stream << ")";
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T> &set) {
|
||||
template<typename T, typename H, typename E>
|
||||
inline std::ostream& operator<<(std::ostream &stream, const std::unordered_set<T, H, E> &set) {
|
||||
stream << std::string("(set: ");
|
||||
bool started = false;
|
||||
for (auto item : set) {
|
||||
|
|
|
|||
|
|
@ -1,16 +1,17 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
using prepare_grammar::expand_repeats;
|
||||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(0)) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
i_token(10),
|
||||
repeat(i_token(11)) }) },
|
||||
|
|
@ -50,7 +51,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
|
||||
}, {}, set<Symbol>());
|
||||
|
||||
|
|
@ -68,7 +69,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", seq({
|
||||
repeat(i_token(10)),
|
||||
repeat(i_token(11)) }) },
|
||||
|
|
@ -93,7 +94,7 @@ describe("expand_repeats", []() {
|
|||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
SyntaxGrammar grammar({
|
||||
InitialSyntaxGrammar grammar({
|
||||
{ "rule0", repeat(i_token(10)) },
|
||||
{ "rule1", repeat(i_token(11)) },
|
||||
}, {}, set<Symbol>());
|
||||
|
|
|
|||
58
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
58
spec/compiler/prepare_grammar/extract_choices_spec.cc
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_choices;
|
||||
|
||||
describe("extract_choices", []() {
|
||||
it("expands rules containing choices into multiple rules", [&]() {
|
||||
auto rule = seq({
|
||||
sym("a"),
|
||||
choice({ sym("b"), sym("c"), sym("d") }),
|
||||
sym("e")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("b"), sym("e") }),
|
||||
seq({ sym("a"), sym("c"), sym("e") }),
|
||||
seq({ sym("a"), sym("d"), sym("e") }),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles metadata rules", [&]() {
|
||||
auto rule = prec(5, choice({ sym("b"), sym("c"), sym("d") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
prec(5, sym("b")),
|
||||
prec(5, sym("c")),
|
||||
prec(5, sym("d")),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles nested choices", [&]() {
|
||||
auto rule = choice({
|
||||
seq({ choice({ sym("a"), sym("b") }), sym("c") }),
|
||||
sym("d")
|
||||
});
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
seq({ sym("a"), sym("c") }),
|
||||
seq({ sym("b"), sym("c") }),
|
||||
sym("d"),
|
||||
})));
|
||||
});
|
||||
|
||||
it("handles repeats", [&]() {
|
||||
auto rule = repeat(choice({ sym("a"), sym("b") }));
|
||||
|
||||
AssertThat(extract_choices(rule), Equals(rule_vector({
|
||||
repeat(sym("a")),
|
||||
repeat(sym("b")),
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
|
|
@ -8,10 +8,11 @@ START_TEST
|
|||
|
||||
using namespace rules;
|
||||
using prepare_grammar::extract_tokens;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves string rules into the lexical grammar", [&]() {
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> result =
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> result =
|
||||
extract_tokens(Grammar({
|
||||
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
|
||||
}));
|
||||
|
|
|
|||
109
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
109
spec/compiler/prepare_grammar/flatten_grammar_spec.cc
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
#include "compiler/compiler_spec_helper.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/helpers/containers.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
using namespace rules;
|
||||
using prepare_grammar::flatten_grammar;
|
||||
using prepare_grammar::InitialSyntaxGrammar;
|
||||
|
||||
describe("flatten_grammar", []() {
|
||||
InitialSyntaxGrammar input_grammar({
|
||||
{ "rule1", seq({
|
||||
i_sym(1),
|
||||
choice({ i_sym(2), i_sym(3) }),
|
||||
i_sym(4) }) },
|
||||
{ "rule2", seq({
|
||||
i_sym(1),
|
||||
prec(50, seq({
|
||||
i_sym(2),
|
||||
choice({
|
||||
prec(100, seq({
|
||||
i_sym(3),
|
||||
i_sym(4)
|
||||
})),
|
||||
i_sym(5),
|
||||
}),
|
||||
i_sym(6) })),
|
||||
i_sym(7) }) },
|
||||
}, {});
|
||||
|
||||
it("turns each rule into a list of possible symbol sequences", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto get_symbol_lists = [&](int rule_index) {
|
||||
return collect(grammar.rules[rule_index].second, [](Production p) {
|
||||
return collect(p.entries, [](ProductionEntry e) {
|
||||
return e.symbol;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
AssertThat(grammar.rules[0].first, Equals("rule1"));
|
||||
AssertThat(grammar.rules[1].first, Equals("rule2"));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_lists(0),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(4) },
|
||||
{ Symbol(1), Symbol(3), Symbol(4) }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_symbol_lists(1),
|
||||
Equals(vector<vector<Symbol>>({
|
||||
{ Symbol(1), Symbol(2), Symbol(3), Symbol(4), Symbol(6), Symbol(7) },
|
||||
{ Symbol(1), Symbol(2), Symbol(5), Symbol(6), Symbol(7) }
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each symbol with the precedence binding it to its previous neighbor", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto get_precedence_lists = [&](int rule_index) {
|
||||
return collect(grammar.rules[rule_index].second, [](Production p) {
|
||||
return collect(p.entries, [](ProductionEntry e) {
|
||||
return e.precedence;
|
||||
});
|
||||
});
|
||||
};
|
||||
|
||||
AssertThat(
|
||||
get_precedence_lists(0),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 0 },
|
||||
{ 0, 0, 0 }
|
||||
})));
|
||||
|
||||
AssertThat(
|
||||
get_precedence_lists(1),
|
||||
Equals(vector<vector<int>>({
|
||||
{ 0, 0, 50, 100, 50, 0 },
|
||||
{ 0, 0, 50, 50, 0 }
|
||||
})));
|
||||
});
|
||||
|
||||
it("associates each unique subsequence of symbols and precedences with a rule_id", [&]() {
|
||||
SyntaxGrammar grammar = flatten_grammar(input_grammar);
|
||||
|
||||
auto rule_id = [&](int rule_index, int production_index, int symbol_index) {
|
||||
return grammar.rules[rule_index].second[production_index].rule_id_at(symbol_index);
|
||||
};
|
||||
|
||||
// Rule 1: last symbol is the same for both productions.
|
||||
AssertThat(rule_id(0, 0, 0), !Equals(rule_id(0, 1, 0)));
|
||||
AssertThat(rule_id(0, 0, 1), !Equals(rule_id(0, 1, 1)));
|
||||
AssertThat(rule_id(0, 0, 2), Equals(rule_id(0, 1, 2)));
|
||||
|
||||
// Rule 2: last two symbols are the same for both productions.
|
||||
AssertThat(rule_id(1, 0, 0), !Equals(rule_id(1, 1, 0)));
|
||||
AssertThat(rule_id(1, 0, 1), !Equals(rule_id(1, 1, 1)));
|
||||
AssertThat(rule_id(1, 0, 4), Equals(rule_id(1, 1, 3)));
|
||||
AssertThat(rule_id(1, 0, 5), Equals(rule_id(1, 1, 4)));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -5,7 +5,6 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
|
|||
|
|
@ -40,12 +40,8 @@ class ParseTableBuilder {
|
|||
: grammar(grammar), lex_grammar(lex_grammar) {}
|
||||
|
||||
pair<ParseTable, vector<Conflict>> build() {
|
||||
auto start_symbol = grammar.rules.empty()
|
||||
? make_shared<Symbol>(0, rules::SymbolOptionToken)
|
||||
: make_shared<Symbol>(0);
|
||||
ParseItem start_item(rules::START(), start_symbol, 0);
|
||||
add_parse_state(
|
||||
item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
||||
ParseItem start_item(rules::START(), 0, -2, 0);
|
||||
add_parse_state(item_set_closure(start_item, { rules::END_OF_INPUT() }, grammar));
|
||||
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
|
|
@ -100,12 +96,13 @@ class ParseTableBuilder {
|
|||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
|
||||
if (item.is_done()) {
|
||||
|
||||
if (item_is_done(item)) {
|
||||
ParseAction action =
|
||||
(item.lhs == rules::START())
|
||||
? ParseAction::Accept()
|
||||
: ParseAction::Reduce(item.lhs, item.consumed_symbol_count,
|
||||
item.precedence());
|
||||
item_precedence(item));
|
||||
|
||||
for (const auto &lookahead_sym : lookahead_symbols)
|
||||
if (should_add_action(state_id, lookahead_sym, action, ParseItemSet()))
|
||||
|
|
@ -170,11 +167,19 @@ class ParseTableBuilder {
|
|||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (item.consumed_symbol_count > 0)
|
||||
result.insert(item.precedence());
|
||||
result.insert(item_precedence(item));
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
bool item_is_done(const ParseItem &item) {
|
||||
return item.consumed_symbol_count == grammar.productions(item.lhs)[item.production_index].size();
|
||||
}
|
||||
|
||||
int item_precedence(const ParseItem &item) {
|
||||
return grammar.productions(item.lhs)[item.production_index].precedence_at(item.consumed_symbol_count - 1);
|
||||
}
|
||||
|
||||
void record_conflict(const Symbol &sym, const ParseAction &left,
|
||||
const ParseAction &right, const ParseItemSet &item_set) {
|
||||
conflicts.insert(
|
||||
|
|
|
|||
|
|
@ -1,68 +0,0 @@
|
|||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
using rules::Symbol;
|
||||
|
||||
class FirstSymbols : public rules::RuleFn<set<Symbol>> {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<Symbol> visited_symbols;
|
||||
|
||||
public:
|
||||
explicit FirstSymbols(const SyntaxGrammar *grammar) : grammar(grammar) {}
|
||||
|
||||
private:
|
||||
set<Symbol> apply_to(const Symbol *rule) {
|
||||
auto insertion_result = visited_symbols.insert(*rule);
|
||||
if (!insertion_result.second)
|
||||
return set<Symbol>();
|
||||
|
||||
set<Symbol> result({ *rule });
|
||||
if (!rule->is_token()) {
|
||||
set<Symbol> &&symbols = apply(grammar->rule(*rule));
|
||||
result.insert(symbols.begin(), symbols.end());
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Choice *rule) {
|
||||
set<Symbol> result;
|
||||
for (const auto &element : rule->elements) {
|
||||
auto &&element_symbols = apply(element);
|
||||
result.insert(element_symbols.begin(), element_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
set<Symbol> apply_to(const rules::Seq *rule) {
|
||||
auto &&result = apply(rule->left);
|
||||
if (rule_can_be_blank(rule->left, *grammar)) {
|
||||
auto &&right_symbols = apply(rule->right);
|
||||
result.insert(right_symbols.begin(), right_symbols.end());
|
||||
}
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
set<Symbol> first_symbols(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar) {
|
||||
return FirstSymbols(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,24 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
#define COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
||||
#include <set>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
/*
|
||||
* Returns the set of symbols that can appear at the beginning of a sentential
|
||||
* form derivable from a given rule in a given grammar.
|
||||
*/
|
||||
std::set<rules::Symbol> first_symbols(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_FIRST_SYMBOLS_H_
|
||||
|
|
@ -3,7 +3,6 @@
|
|||
#include <vector>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/build_tables/first_symbols.h"
|
||||
#include "compiler/build_tables/rule_transitions.h"
|
||||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include "compiler/build_tables/item.h"
|
||||
|
|
@ -34,24 +33,41 @@ const ParseItemSet item_set_closure(const ParseItem &starting_item,
|
|||
size_t previous_size = lookahead_symbols.size();
|
||||
lookahead_symbols.insert(new_lookahead_symbols.begin(),
|
||||
new_lookahead_symbols.end());
|
||||
|
||||
if (lookahead_symbols.size() == previous_size)
|
||||
continue;
|
||||
|
||||
for (const auto &pair : sym_transitions(item.rule)) {
|
||||
const Symbol &symbol = pair.first;
|
||||
const rule_ptr &next_rule = pair.second;
|
||||
const Production &item_production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (item_production.size() <= item.consumed_symbol_count)
|
||||
continue;
|
||||
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
Symbol symbol = item_production.symbol_at(item.consumed_symbol_count);
|
||||
if (symbol.is_token() || symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
set<Symbol> next_lookahead_symbols = first_symbols(next_rule, grammar);
|
||||
if (rule_can_be_blank(next_rule, grammar))
|
||||
next_lookahead_symbols.insert(lookahead_symbols.begin(),
|
||||
lookahead_symbols.end());
|
||||
set<Symbol> next_lookahead_symbols;
|
||||
if (item.consumed_symbol_count + 1 >= item_production.size()) {
|
||||
next_lookahead_symbols = lookahead_symbols;
|
||||
} else {
|
||||
vector<Symbol> symbols_to_process({ item_production.symbol_at(item.consumed_symbol_count + 1) });
|
||||
|
||||
items_to_process.push_back({ ParseItem(symbol, grammar.rule(symbol), 0),
|
||||
next_lookahead_symbols });
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol following_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
if (!next_lookahead_symbols.insert(following_symbol).second)
|
||||
continue;
|
||||
|
||||
for (const auto &production : grammar.productions(following_symbol))
|
||||
symbols_to_process.push_back(production.symbol_at(0));
|
||||
}
|
||||
}
|
||||
|
||||
size_t i = 0;
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
items_to_process.push_back({
|
||||
ParseItem(symbol, i, production.rule_id_at(0), 0),
|
||||
next_lookahead_symbols
|
||||
});
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -21,18 +21,30 @@ map<Symbol, ParseItemSet> sym_transitions(const ParseItemSet &item_set,
|
|||
for (const auto &pair : item_set) {
|
||||
const ParseItem &item = pair.first;
|
||||
const set<Symbol> &lookahead_symbols = pair.second;
|
||||
for (auto &transition : sym_transitions(item.rule)) {
|
||||
ParseItem new_item(item.lhs, transition.second,
|
||||
item.consumed_symbol_count + 1);
|
||||
merge_sym_transition<ParseItemSet>(
|
||||
&result, { transition.first,
|
||||
item_set_closure(new_item, lookahead_symbols, grammar) },
|
||||
[](ParseItemSet *left, const ParseItemSet *right) {
|
||||
for (auto &pair : *right)
|
||||
left->operator[](pair.first)
|
||||
.insert(pair.second.begin(), pair.second.end());
|
||||
});
|
||||
}
|
||||
const auto &productions = grammar.productions(item.lhs);
|
||||
if (productions.empty())
|
||||
continue;
|
||||
|
||||
const Production &production = grammar.productions(item.lhs)[item.production_index];
|
||||
if (production.size() <= item.consumed_symbol_count)
|
||||
continue;
|
||||
|
||||
const Symbol &symbol = production.symbol_at(item.consumed_symbol_count);
|
||||
ParseItem new_item(
|
||||
item.lhs,
|
||||
item.production_index,
|
||||
production.rule_id_at(item.consumed_symbol_count + 1),
|
||||
item.consumed_symbol_count + 1
|
||||
);
|
||||
|
||||
merge_sym_transition<ParseItemSet>(
|
||||
&result,
|
||||
{ symbol, item_set_closure(new_item, { lookahead_symbols }, grammar) },
|
||||
[](ParseItemSet *left, const ParseItemSet *right) {
|
||||
for (auto &pair : *right)
|
||||
left->operator[](pair.first)
|
||||
.insert(pair.second.begin(), pair.second.end());
|
||||
});
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,25 +1,32 @@
|
|||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::ostream;
|
||||
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs, const rules::rule_ptr rule,
|
||||
size_t consumed_symbol_count)
|
||||
: Item(lhs, rule), consumed_symbol_count(consumed_symbol_count) {}
|
||||
ParseItem::ParseItem(const rules::Symbol &lhs, size_t production_index,
|
||||
int rule_id, size_t consumed_symbol_count)
|
||||
: lhs(lhs), production_index(production_index),
|
||||
rule_id(rule_id),
|
||||
consumed_symbol_count(consumed_symbol_count) {}
|
||||
|
||||
bool ParseItem::operator==(const ParseItem &other) const {
|
||||
return (lhs == other.lhs) &&
|
||||
(consumed_symbol_count == other.consumed_symbol_count) &&
|
||||
(rule == other.rule || rule->operator==(*other.rule));
|
||||
(rule_id == other.rule_id) &&
|
||||
(consumed_symbol_count == other.consumed_symbol_count);
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ParseItem &item) {
|
||||
return stream << string("(item ") << item.lhs << string(" ") << *item.rule
|
||||
<< string(")");
|
||||
return stream << string("(item lhs:") << item.lhs
|
||||
<< string(" index:") << to_string(item.production_index)
|
||||
<< string(" remaining_rule:") << to_string(item.rule_id)
|
||||
<< string(" consumed:") << to_string(item.consumed_symbol_count)
|
||||
<< string(")");
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -9,11 +9,15 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class ParseItem : public Item {
|
||||
class ParseItem {
|
||||
public:
|
||||
ParseItem(const rules::Symbol &lhs, rules::rule_ptr rule,
|
||||
const size_t consumed_symbol_count);
|
||||
ParseItem(const rules::Symbol &lhs, size_t production_index,
|
||||
int rule_id, size_t consumed_symbol_count);
|
||||
bool operator==(const ParseItem &other) const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
size_t production_index;
|
||||
int rule_id;
|
||||
size_t consumed_symbol_count;
|
||||
};
|
||||
|
||||
|
|
@ -30,8 +34,8 @@ template <>
|
|||
struct hash<tree_sitter::build_tables::ParseItem> {
|
||||
size_t operator()(const tree_sitter::build_tables::ParseItem &item) const {
|
||||
return hash<tree_sitter::rules::Symbol>()(item.lhs) ^
|
||||
hash<tree_sitter::rules::rule_ptr>()(item.rule) ^
|
||||
hash<size_t>()(item.consumed_symbol_count);
|
||||
hash<int>()(item.rule_id) ^
|
||||
hash<size_t>()(item.consumed_symbol_count);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,5 @@
|
|||
#include "compiler/build_tables/rule_can_be_blank.h"
|
||||
#include <set>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
|
|
@ -12,8 +10,6 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::set;
|
||||
|
||||
class CanBeBlank : public rules::RuleFn<bool> {
|
||||
protected:
|
||||
bool apply_to(const rules::Blank *) { return true; }
|
||||
|
|
@ -34,36 +30,9 @@ class CanBeBlank : public rules::RuleFn<bool> {
|
|||
bool apply_to(const rules::Metadata *rule) { return apply(rule->rule); }
|
||||
};
|
||||
|
||||
class CanBeBlankRecursive : public CanBeBlank {
|
||||
const SyntaxGrammar *grammar;
|
||||
set<rules::Symbol> visited_symbols;
|
||||
using CanBeBlank::visit;
|
||||
|
||||
public:
|
||||
explicit CanBeBlankRecursive(const SyntaxGrammar *grammar)
|
||||
: grammar(grammar) {}
|
||||
|
||||
private:
|
||||
using CanBeBlank::apply_to;
|
||||
|
||||
bool apply_to(const rules::Symbol *rule) {
|
||||
if (visited_symbols.find(*rule) == visited_symbols.end()) {
|
||||
visited_symbols.insert(*rule);
|
||||
return !rule->is_token() && apply(grammar->rule(*rule));
|
||||
} else {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule) {
|
||||
return CanBeBlank().apply(rule);
|
||||
}
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar) {
|
||||
return CanBeBlankRecursive(&grammar).apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -4,14 +4,9 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule);
|
||||
bool rule_can_be_blank(const rules::rule_ptr &rule,
|
||||
const SyntaxGrammar &grammar);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
|
@ -50,7 +50,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
vector<pair<string, rules::rule_ptr>> aux_rules;
|
||||
};
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
||||
vector<pair<string, rules::rule_ptr>> rules, aux_rules(grammar.aux_rules);
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
|
|
@ -60,7 +60,7 @@ SyntaxGrammar expand_repeats(const SyntaxGrammar &grammar) {
|
|||
expander.aux_rules.end());
|
||||
}
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
return InitialSyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,12 +4,11 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class SyntaxGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
SyntaxGrammar expand_repeats(const SyntaxGrammar &);
|
||||
class InitialSyntaxGrammar;
|
||||
|
||||
InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
58
src/compiler/prepare_grammar/extract_choices.cc
Normal file
58
src/compiler/prepare_grammar/extract_choices.cc
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::make_shared;
|
||||
using std::vector;
|
||||
using rules::rule_ptr;
|
||||
|
||||
class ExtractChoices : public rules::RuleFn<vector<rule_ptr>> {
|
||||
vector<rule_ptr> default_apply(const rules::Rule *rule) {
|
||||
return vector<rule_ptr>({ rule->copy() });
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Seq *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto left_entry : apply(rule->left))
|
||||
for (auto right_entry : apply(rule->right))
|
||||
result.push_back(rules::Seq::build({ left_entry, right_entry }));
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Metadata *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto entry : apply(rule->rule))
|
||||
result.push_back(make_shared<rules::Metadata>(entry, rule->value));
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Choice *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto element : rule->elements)
|
||||
for (auto entry : apply(element))
|
||||
result.push_back(entry);
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> apply_to(const rules::Repeat *rule) {
|
||||
vector<rule_ptr> result;
|
||||
for (auto element : apply(rule->content))
|
||||
result.push_back(make_shared<rules::Repeat>(element));
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
std::vector<rules::rule_ptr> extract_choices(const rules::rule_ptr &rule) {
|
||||
return ExtractChoices().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
15
src/compiler/prepare_grammar/extract_choices.h
Normal file
15
src/compiler/prepare_grammar/extract_choices.h
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::vector<rules::rule_ptr> extract_choices(const rules::rule_ptr &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_PREPARE_GRAMMAR_EXTRACT_CHOICES_H_
|
||||
|
|
@ -5,7 +5,7 @@
|
|||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/string.h"
|
||||
|
|
@ -92,14 +92,14 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
vector<pair<string, rule_ptr>> tokens;
|
||||
};
|
||||
|
||||
static tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> ubiq_token_err(
|
||||
static tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> ubiq_token_err(
|
||||
const string &msg) {
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(),
|
||||
return make_tuple(InitialSyntaxGrammar(), LexicalGrammar(),
|
||||
new GrammarError(GrammarErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + msg));
|
||||
}
|
||||
|
||||
tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const Grammar &grammar) {
|
||||
vector<pair<string, rule_ptr>> rules, tokens;
|
||||
vector<rule_ptr> separators;
|
||||
|
|
@ -139,7 +139,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
|||
}
|
||||
}
|
||||
|
||||
return make_tuple(SyntaxGrammar(rules, {}, ubiquitous_tokens),
|
||||
return make_tuple(InitialSyntaxGrammar(rules, {}, ubiquitous_tokens),
|
||||
LexicalGrammar(tokens, extractor.tokens, separators),
|
||||
nullptr);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,12 +7,13 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
class Grammar;
|
||||
class SyntaxGrammar;
|
||||
class LexicalGrammar;
|
||||
|
||||
namespace prepare_grammar {
|
||||
|
||||
std::tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
class InitialSyntaxGrammar;
|
||||
|
||||
std::tuple<InitialSyntaxGrammar, LexicalGrammar, const GrammarError *> extract_tokens(
|
||||
const Grammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
145
src/compiler/prepare_grammar/flatten_grammar.cc
Normal file
145
src/compiler/prepare_grammar/flatten_grammar.cc
Normal file
|
|
@ -0,0 +1,145 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::find;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using rules::rule_ptr;
|
||||
|
||||
class FlattenRule : public rules::RuleFn<void> {
|
||||
public:
|
||||
bool has_pending_precedence;
|
||||
int pending_precedence;
|
||||
vector<int> precedence_stack;
|
||||
vector<ProductionEntry> entries;
|
||||
|
||||
FlattenRule() : has_pending_precedence(false), pending_precedence(0) {}
|
||||
|
||||
void apply_to(const rules::Symbol *sym) {
|
||||
entries.push_back({ *sym, current_precedence(), 0 });
|
||||
if (has_pending_precedence) {
|
||||
precedence_stack.push_back(pending_precedence);
|
||||
has_pending_precedence = false;
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *metadata) {
|
||||
int precedence = metadata->value_for(rules::PRECEDENCE);
|
||||
if (precedence != 0) {
|
||||
pending_precedence = precedence;
|
||||
has_pending_precedence = true;
|
||||
apply(metadata->rule);
|
||||
precedence_stack.pop_back();
|
||||
} else {
|
||||
apply(metadata->rule);
|
||||
}
|
||||
}
|
||||
|
||||
void apply_to(const rules::Seq *seq) {
|
||||
apply(seq->left);
|
||||
apply(seq->right);
|
||||
}
|
||||
|
||||
private:
|
||||
int current_precedence() {
|
||||
if (precedence_stack.empty())
|
||||
return 0;
|
||||
else
|
||||
return *precedence_stack.rbegin();
|
||||
}
|
||||
};
|
||||
|
||||
Production flatten_rule(const rule_ptr &rule) {
|
||||
FlattenRule flattener;
|
||||
flattener.apply(rule);
|
||||
return Production(flattener.entries, 0);
|
||||
}
|
||||
|
||||
struct ProductionSlice {
|
||||
vector<ProductionEntry>::const_iterator start;
|
||||
vector<ProductionEntry>::const_iterator end;
|
||||
int end_precedence;
|
||||
|
||||
bool operator==(const ProductionSlice &other) const {
|
||||
if (end_precedence != other.end_precedence) return false;
|
||||
if (end - start != other.end - other.start) return false;
|
||||
for (auto iter1 = start, iter2 = other.start; iter1 != end; ++iter1, ++iter2)
|
||||
if (!(iter1->symbol == iter2->symbol) || iter1->precedence != iter2->precedence)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
};
|
||||
|
||||
void assign_rule_ids(Production *production, vector<ProductionSlice> *unique_slices) {
|
||||
auto &entries = production->entries;
|
||||
auto end = entries.end();
|
||||
|
||||
for (auto iter = entries.begin(); iter != end; ++iter) {
|
||||
ProductionSlice slice{iter, end, 0};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
iter->rule_id = unique_slices->size() - 1;
|
||||
} else {
|
||||
iter->rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
ProductionSlice slice{end, end, production->precedence_at(production->size() - 1)};
|
||||
auto existing_id = find(unique_slices->cbegin(), unique_slices->cend(), slice);
|
||||
if (existing_id == unique_slices->end()) {
|
||||
unique_slices->push_back(slice);
|
||||
production->end_rule_id = unique_slices->size() - 1;
|
||||
} else {
|
||||
production->end_rule_id = existing_id - unique_slices->cbegin();
|
||||
}
|
||||
}
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
vector<pair<string, vector<Production>>> rules, aux_rules;
|
||||
|
||||
for (const auto &pair : grammar.rules) {
|
||||
vector<Production> productions;
|
||||
for (const auto &rule_component : extract_choices(pair.second))
|
||||
productions.push_back(flatten_rule(rule_component));
|
||||
rules.push_back({ pair.first, productions });
|
||||
}
|
||||
|
||||
for (const auto &pair : grammar.aux_rules) {
|
||||
vector<Production> productions;
|
||||
for (const auto &rule_component : extract_choices(pair.second))
|
||||
productions.push_back(flatten_rule(rule_component));
|
||||
aux_rules.push_back({ pair.first, productions });
|
||||
}
|
||||
|
||||
if (rules.empty()) {
|
||||
rules.push_back({
|
||||
"START",
|
||||
{ Production({ {rules::Symbol(0, rules::SymbolOptionToken), 0, 0} }, 0) }
|
||||
});
|
||||
}
|
||||
|
||||
vector<ProductionSlice> unique_slices;
|
||||
for (auto &pair : rules)
|
||||
for (Production &production : pair.second)
|
||||
assign_rule_ids(&production, &unique_slices);
|
||||
for (auto &pair : aux_rules)
|
||||
for (Production &production : pair.second)
|
||||
assign_rule_ids(&production, &unique_slices);
|
||||
|
||||
return SyntaxGrammar(rules, aux_rules, grammar.ubiquitous_tokens);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
13
src/compiler/prepare_grammar/flatten_grammar.h
Normal file
13
src/compiler/prepare_grammar/flatten_grammar.h
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#include <string>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
class InitialSyntaxGrammar;
|
||||
|
||||
SyntaxGrammar flatten_grammar(const InitialSyntaxGrammar &);
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
37
src/compiler/prepare_grammar/initial_syntax_grammar.cc
Normal file
37
src/compiler/prepare_grammar/initial_syntax_grammar.cc
Normal file
|
|
@ -0,0 +1,37 @@
|
|||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
InitialSyntaxGrammar::InitialSyntaxGrammar() {}
|
||||
|
||||
InitialSyntaxGrammar::InitialSyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
InitialSyntaxGrammar::InitialSyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens)
|
||||
: rules(rules), aux_rules(aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
|
||||
const rules::rule_ptr &InitialSyntaxGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
: rules[symbol.index].second;
|
||||
}
|
||||
|
||||
const string &InitialSyntaxGrammar::rule_name(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].first
|
||||
: rules[symbol.index].first;
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
36
src/compiler/prepare_grammar/initial_syntax_grammar.h
Normal file
36
src/compiler/prepare_grammar/initial_syntax_grammar.h
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#ifndef COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
#define COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <utility>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
||||
class InitialSyntaxGrammar {
|
||||
public:
|
||||
InitialSyntaxGrammar();
|
||||
InitialSyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
InitialSyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens);
|
||||
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
|
@ -1,9 +1,11 @@
|
|||
#include "compiler/prepare_grammar/prepare_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "compiler/prepare_grammar/expand_tokens.h"
|
||||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "compiler/prepare_grammar/intern_symbols.h"
|
||||
#include "compiler/prepare_grammar/prepare_grammar.h"
|
||||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -29,7 +31,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
// Replace `Repeat` rules with pairs of recursive rules
|
||||
const SyntaxGrammar &syntax_grammar = expand_repeats(get<0>(extract_result));
|
||||
const InitialSyntaxGrammar &syntax_grammar = expand_repeats(get<0>(extract_result));
|
||||
|
||||
// Expand `String` and `Pattern` rules into full rule trees
|
||||
auto expand_tokens_result = expand_tokens(get<1>(extract_result));
|
||||
|
|
@ -38,7 +40,7 @@ tuple<SyntaxGrammar, LexicalGrammar, const GrammarError *> prepare_grammar(
|
|||
if (error)
|
||||
return make_tuple(SyntaxGrammar(), LexicalGrammar(), error);
|
||||
|
||||
return make_tuple(syntax_grammar, lex_grammar, nullptr);
|
||||
return make_tuple(flatten_grammar(syntax_grammar), lex_grammar, nullptr);
|
||||
}
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -3,33 +3,92 @@
|
|||
#include <string>
|
||||
#include <utility>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
static const vector<Production> START_PRODUCTIONS({
|
||||
Production({ {rules::Symbol(0), 0, -1} }, 2)
|
||||
});
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS({});
|
||||
|
||||
bool ProductionEntry::operator==(const ProductionEntry &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
rule_id == other.rule_id;
|
||||
}
|
||||
|
||||
Production::Production(const vector<ProductionEntry> &entries, int last_rule_id) :
|
||||
entries(entries), end_rule_id(last_rule_id) {}
|
||||
|
||||
int Production::precedence_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return 0;
|
||||
else
|
||||
return entries[index].precedence;
|
||||
}
|
||||
|
||||
int Production::rule_id_at(size_t index) const {
|
||||
if (index >= size())
|
||||
return end_rule_id;
|
||||
else
|
||||
return entries[index].rule_id;
|
||||
}
|
||||
|
||||
const rules::Symbol &Production::symbol_at(size_t index) const {
|
||||
return entries[index].symbol;
|
||||
}
|
||||
|
||||
size_t Production::size() const {
|
||||
return entries.size();
|
||||
}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar() {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules)
|
||||
: rules(rules), aux_rules(aux_rules) {}
|
||||
|
||||
SyntaxGrammar::SyntaxGrammar(const vector<pair<string, rules::rule_ptr>> &rules,
|
||||
const vector<pair<string, rules::rule_ptr>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens)
|
||||
SyntaxGrammar::SyntaxGrammar(
|
||||
const vector<pair<string, vector<Production>>> &rules,
|
||||
const vector<pair<string, vector<Production>>> &aux_rules,
|
||||
const set<rules::Symbol> &ubiquitous_tokens)
|
||||
: rules(rules), aux_rules(aux_rules), ubiquitous_tokens(ubiquitous_tokens) {}
|
||||
|
||||
const rules::rule_ptr &SyntaxGrammar::rule(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].second
|
||||
: rules[symbol.index].second;
|
||||
}
|
||||
|
||||
const string &SyntaxGrammar::rule_name(const rules::Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ? aux_rules[symbol.index].first
|
||||
: rules[symbol.index].first;
|
||||
}
|
||||
|
||||
const vector<Production> &SyntaxGrammar::productions(const rules::Symbol &symbol) const {
|
||||
if (symbol == rules::START())
|
||||
return START_PRODUCTIONS;
|
||||
if (symbol.is_built_in() || symbol.is_token())
|
||||
return NO_PRODUCTIONS;
|
||||
if (symbol.is_auxiliary())
|
||||
return aux_rules[symbol.index].second;
|
||||
else
|
||||
return rules[symbol.index].second;
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const ProductionEntry &entry) {
|
||||
return stream << string("(entry symbol:") << entry.symbol <<
|
||||
string(" precedence: ") << to_string(entry.precedence) <<
|
||||
string(" id: ") << to_string(entry.rule_id) << string(")");
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const Production &production) {
|
||||
stream << string("(production entries: (");
|
||||
bool started = false;
|
||||
for (const auto &entry : production.entries) {
|
||||
if (started) stream << string(" ");
|
||||
stream << entry;
|
||||
started = true;
|
||||
}
|
||||
return stream << string(") end_rule_id: ") <<
|
||||
to_string(production.end_rule_id) << string(")");
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,22 +10,41 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ProductionEntry {
|
||||
rules::Symbol symbol;
|
||||
int precedence;
|
||||
int rule_id;
|
||||
|
||||
bool operator==(const ProductionEntry &) const;
|
||||
};
|
||||
|
||||
class Production {
|
||||
public:
|
||||
std::vector<ProductionEntry> entries;
|
||||
int end_rule_id;
|
||||
Production(const std::vector<ProductionEntry> &, int);
|
||||
size_t size() const;
|
||||
const rules::Symbol &symbol_at(size_t) const;
|
||||
int precedence_at(size_t) const;
|
||||
int rule_id_at(size_t) const;
|
||||
};
|
||||
|
||||
std::ostream &operator<<(std::ostream &, const ProductionEntry &);
|
||||
std::ostream &operator<<(std::ostream &, const Production &);
|
||||
|
||||
class SyntaxGrammar {
|
||||
public:
|
||||
SyntaxGrammar();
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
SyntaxGrammar(
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
const std::vector<std::pair<std::string, std::vector<Production>>> &rules,
|
||||
const std::vector<std::pair<std::string, std::vector<Production>>> &aux_rules,
|
||||
const std::set<rules::Symbol> &ubiquitous_tokens);
|
||||
|
||||
const std::string &rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr &rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
const std::vector<Production> &productions(const rules::Symbol &) const;
|
||||
|
||||
std::vector<std::pair<std::string, std::vector<Production>>> rules;
|
||||
std::vector<std::pair<std::string, std::vector<Production>>> aux_rules;
|
||||
std::set<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue