🎨
This commit is contained in:
parent
686dc0997c
commit
abf8a4f2c2
28 changed files with 313 additions and 356 deletions
|
|
@ -25,7 +25,6 @@
|
|||
'src/compiler/compile.cc',
|
||||
'src/compiler/generate_code/c_code.cc',
|
||||
'src/compiler/lex_table.cc',
|
||||
'src/compiler/lexical_grammar.cc',
|
||||
'src/compiler/parse_grammar.cc',
|
||||
'src/compiler/parse_table.cc',
|
||||
'src/compiler/precedence_range.cc',
|
||||
|
|
@ -42,7 +41,6 @@
|
|||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/rule.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/variable.cc',
|
||||
'src/compiler/rules/blank.cc',
|
||||
'src/compiler/rules/built_in_symbols.cc',
|
||||
'src/compiler/rules/character_range.cc',
|
||||
|
|
|
|||
|
|
@ -20,15 +20,18 @@ describe("recovery_tokens(rule)", []() {
|
|||
};
|
||||
|
||||
grammar.variables = {
|
||||
LexicalVariable("var0", VariableTypeNamed, character({}, false), false),
|
||||
LexicalVariable("var1", VariableTypeNamed, seq({
|
||||
LexicalVariable{"var0", VariableTypeNamed, character({}, false), false},
|
||||
LexicalVariable{"var1", VariableTypeNamed, seq({
|
||||
character({ 'a', 'b' }),
|
||||
character({}, false),
|
||||
character({ 'c', 'd' }),
|
||||
}), false),
|
||||
}), false},
|
||||
};
|
||||
|
||||
AssertThat(get_compatible_tokens(grammar).recovery_tokens, Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
AssertThat(
|
||||
get_compatible_tokens(grammar).recovery_tokens,
|
||||
Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) })
|
||||
);
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -26,13 +26,13 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
|
|
@ -40,13 +40,13 @@ describe("ParseItemSetBuilder", []() {
|
|||
Production({
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule2", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
|
|
@ -85,19 +85,19 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
it("handles rules with empty productions", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}),
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -11,141 +12,159 @@ using prepare_grammar::expand_repeats;
|
|||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, i_sym(1)),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(1)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
i_token(0),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11)
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), i_sym(2) }),
|
||||
seq({ i_token(2), i_sym(2) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
i_sym(2),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(4) }),
|
||||
i_token(4),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(10) }),
|
||||
i_token(10),
|
||||
})),
|
||||
Variable("rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
|
||||
Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
|
||||
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, i_sym(2)),
|
||||
Variable("rule1", VariableTypeNamed, i_sym(3)),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"rule1", VariableTypeNamed, i_sym(3)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(10) }),
|
||||
i_token(10),
|
||||
})),
|
||||
Variable("rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(3), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,9 @@ describe("expand_tokens", []() {
|
|||
|
||||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable {
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -34,8 +34,8 @@ describe("expand_tokens", []() {
|
|||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -53,9 +53,9 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable {
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
str("\u03B1 \u03B2"),
|
||||
|
|
@ -67,8 +67,8 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
metadata(seq({
|
||||
|
|
@ -84,9 +84,9 @@ describe("expand_tokens", []() {
|
|||
|
||||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable {
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -103,8 +103,8 @@ describe("expand_tokens", []() {
|
|||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -118,9 +118,9 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable {
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
pattern("[^\u03B1-\u03B4]*"),
|
||||
|
|
@ -132,8 +132,8 @@ describe("expand_tokens", []() {
|
|||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
|
|
@ -143,9 +143,9 @@ describe("expand_tokens", []() {
|
|||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable {
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
|
|||
|
|
@ -16,9 +16,9 @@ using prepare_grammar::InitialSyntaxGrammar;
|
|||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
|
|
@ -26,10 +26,10 @@ describe("extract_tokens", []() {
|
|||
i_sym(2),
|
||||
token(repeat1(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))),
|
||||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
|
||||
}))},
|
||||
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
@ -42,8 +42,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
|
|
@ -63,41 +63,41 @@ describe("extract_tokens", []() {
|
|||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
}))),
|
||||
}))},
|
||||
|
||||
Variable("rule_C", VariableTypeNamed, choice({ i_token(4), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(2))),
|
||||
})));
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable("ab", VariableTypeAnonymous, str("ab"), true),
|
||||
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
|
||||
// Patterns become hidden rules.
|
||||
LexicalVariable("/cd*/", VariableTypeAuxiliary, pattern("cd*"), false),
|
||||
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
LexicalVariable("/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
str("ef"),
|
||||
str("gh")
|
||||
})), false),
|
||||
})), false},
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
LexicalVariable("rule_B", VariableTypeNamed, pattern("ij+"), false),
|
||||
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
|
||||
|
||||
// Strings become anonymous rules.
|
||||
LexicalVariable("kl", VariableTypeAnonymous, str("kl"), true),
|
||||
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
Variable{"rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})),
|
||||
})},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
@ -118,18 +118,18 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
|
||||
Variable("rule_B", VariableTypeNamed, str("cd")),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("cd")},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })),
|
||||
Variable("rule_B", VariableTypeNamed, i_token(1)),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
|
||||
Variable{"rule_B", VariableTypeNamed, i_token(1)},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
|
|
@ -142,9 +142,9 @@ describe("extract_tokens", []() {
|
|||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
Variable{"rule_A", VariableTypeNamed, str("ok")},
|
||||
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
|
||||
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
|
||||
},
|
||||
{
|
||||
str(" ")
|
||||
|
|
@ -165,12 +165,17 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("handling extra tokens", [&]() {
|
||||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
}, {
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
|
|
@ -182,12 +187,17 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, {
|
||||
str("y"),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
|
|
@ -195,13 +205,18 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
Variable("rule_C", VariableTypeNamed, str("z")),
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
Variable{"rule_C", VariableTypeNamed, str("z")},
|
||||
},
|
||||
{
|
||||
i_sym(2),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
|
|
@ -214,8 +229,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
}, { i_sym(1) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
|
|
@ -226,8 +241,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any extra tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
}, { choice({ i_sym(1), blank() }) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
|
|
@ -241,8 +256,8 @@ describe("extract_tokens", []() {
|
|||
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ using prepare_grammar::flatten_rule;
|
|||
|
||||
describe("flatten_grammar", []() {
|
||||
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable(
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -30,7 +30,7 @@ describe("flatten_grammar", []() {
|
|||
})),
|
||||
i_sym(7),
|
||||
})
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.name, Equals("test"));
|
||||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
|
|
@ -54,14 +54,14 @@ describe("flatten_grammar", []() {
|
|||
});
|
||||
|
||||
it("uses the last assigned precedence", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable(
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test1",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
}))
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
|
|
@ -70,13 +70,13 @@ describe("flatten_grammar", []() {
|
|||
})
|
||||
})))
|
||||
|
||||
result = flatten_rule(Variable(
|
||||
result = flatten_rule(Variable{
|
||||
"test2",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
}))
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
|
|
|
|||
|
|
@ -15,27 +15,32 @@ using prepare_grammar::intern_symbols;
|
|||
|
||||
describe("intern_symbols", []() {
|
||||
it("replaces named symbols with numerically-indexed symbols", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("_z") }) },
|
||||
{ "y", sym("_z") },
|
||||
{ "_z", str("stuff") }
|
||||
}, {}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("_z") })},
|
||||
{"y", sym("_z")},
|
||||
{"_z", str("stuff")}
|
||||
}, {}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
|
||||
Variable("y", VariableTypeNamed, i_sym(2)),
|
||||
Variable("_z", VariableTypeHidden, str("stuff")),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>{
|
||||
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
|
||||
Variable{"y", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"_z", VariableTypeHidden, str("stuff")},
|
||||
}));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar{{
|
||||
{ "x", sym("y") },
|
||||
}, {}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", sym("y")},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -44,13 +49,17 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {
|
||||
sym("z")
|
||||
}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")}
|
||||
},
|
||||
{
|
||||
sym("z")
|
||||
},
|
||||
{}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -60,29 +69,34 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
|
||||
it("records any rule names that match external token names", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {}, {}, {
|
||||
"w",
|
||||
"z"
|
||||
}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
"w",
|
||||
"z"
|
||||
}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>({
|
||||
{
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
|
||||
ExternalToken{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
},
|
||||
{
|
||||
ExternalToken{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol(2, Symbol::NonTerminal)
|
||||
}
|
||||
})))
|
||||
},
|
||||
}))
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -96,8 +96,8 @@ struct SyntaxVariable;
|
|||
struct LexicalVariable;
|
||||
struct AdvanceAction;
|
||||
struct AcceptTokenAction;
|
||||
class ParseAction;
|
||||
class ParseState;
|
||||
struct ParseAction;
|
||||
struct ParseState;
|
||||
struct ExternalToken;
|
||||
struct ProductionStep;
|
||||
struct PrecedenceRange;
|
||||
|
|
|
|||
|
|
@ -24,7 +24,6 @@ using std::map;
|
|||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::make_shared;
|
||||
using std::unordered_map;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
|
|
@ -62,14 +61,16 @@ class LexTableBuilder {
|
|||
|
||||
private:
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
|
||||
parse_state->lex_state_id = add_lex_state(
|
||||
item_set_for_terminals(parse_state->terminal_entries)
|
||||
);
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
const auto &pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
LexStateId state_id = lex_table.states.size();
|
||||
lex_table.states.push_back(LexState());
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
|
|
@ -83,13 +84,13 @@ class LexTableBuilder {
|
|||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
const LexItemSet::Transition &transition = pair.second;
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (conflict_manager.resolve(transition.destination, action,
|
||||
current_action)) {
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.state(state_id).advance_actions[characters] = action;
|
||||
lex_table.states[state_id].advance_actions[characters] = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -102,9 +103,9 @@ class LexTableBuilder {
|
|||
item.lhs.is_built_in() ||
|
||||
lex_grammar.variables[item.lhs.index].is_string);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (conflict_manager.resolve(action, current_action))
|
||||
lex_table.state(state_id).accept_action = action;
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -6,7 +6,7 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
class ParseTable;
|
||||
struct ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
|
|
|
|||
|
|
@ -57,9 +57,9 @@ class ParseTableBuilder {
|
|||
Symbol(0, Symbol::Terminal) :
|
||||
Symbol(0, Symbol::NonTerminal);
|
||||
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
});
|
||||
Production start_production{
|
||||
ProductionStep{start_symbol, 0, rules::AssociativityNone},
|
||||
};
|
||||
|
||||
// Placeholder for error state
|
||||
add_parse_state(ParseItemSet());
|
||||
|
|
@ -150,7 +150,8 @@ class ParseTableBuilder {
|
|||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
ParseStateId state_id = parse_table.states.size();
|
||||
parse_table.states.push_back(ParseState());
|
||||
parse_state_ids[item_set] = state_id;
|
||||
parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature();
|
||||
item_sets_to_process.push_back({ std::move(item_set), state_id });
|
||||
|
|
|
|||
|
|
@ -15,7 +15,9 @@ using std::vector;
|
|||
using std::make_tuple;
|
||||
|
||||
tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar
|
||||
) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
const CompileError error = parse_table_result.second;
|
||||
|
|
|
|||
|
|
@ -26,8 +26,6 @@ using std::vector;
|
|||
using util::escape_char;
|
||||
using rules::Symbol;
|
||||
|
||||
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
|
||||
|
||||
static const map<char, string> REPLACEMENTS({
|
||||
{ '~', "TILDE" },
|
||||
{ '`', "BQUOTE" },
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ namespace tree_sitter {
|
|||
|
||||
struct LexicalGrammar;
|
||||
struct SyntaxGrammar;
|
||||
class LexTable;
|
||||
class ParseTable;
|
||||
struct LexTable;
|
||||
struct ParseTable;
|
||||
|
||||
namespace generate_code {
|
||||
|
||||
|
|
|
|||
|
|
@ -44,26 +44,10 @@ bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
|
|||
|
||||
LexState::LexState() : is_token_start(false) {}
|
||||
|
||||
set<CharacterSet> LexState::expected_inputs() const {
|
||||
set<CharacterSet> result;
|
||||
for (auto &pair : advance_actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool LexState::operator==(const LexState &other) const {
|
||||
return advance_actions == other.advance_actions &&
|
||||
accept_action == other.accept_action &&
|
||||
is_token_start == other.is_token_start;
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
LexState &LexTable::state(LexStateId id) {
|
||||
return states[id];
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -13,17 +13,9 @@ namespace tree_sitter {
|
|||
|
||||
typedef int64_t LexStateId;
|
||||
|
||||
typedef enum {
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeAcceptFragile,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
struct AdvanceAction {
|
||||
AdvanceAction();
|
||||
AdvanceAction(size_t, PrecedenceRange, bool);
|
||||
|
||||
bool operator==(const AdvanceAction &other) const;
|
||||
|
||||
LexStateId state_index;
|
||||
|
|
@ -34,7 +26,6 @@ struct AdvanceAction {
|
|||
struct AcceptTokenAction {
|
||||
AcceptTokenAction();
|
||||
AcceptTokenAction(rules::Symbol, int, bool);
|
||||
|
||||
bool is_present() const;
|
||||
bool operator==(const AcceptTokenAction &action) const;
|
||||
|
||||
|
|
@ -43,16 +34,8 @@ struct AcceptTokenAction {
|
|||
bool is_string;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class LexState {
|
||||
public:
|
||||
struct LexState {
|
||||
LexState();
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool operator==(const LexState &) const;
|
||||
|
||||
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
|
||||
|
|
@ -60,10 +43,7 @@ class LexState {
|
|||
bool is_token_start;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
struct LexTable {
|
||||
std::vector<LexState> states;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
|
||||
LexicalVariable::LexicalVariable(
|
||||
const string &name, VariableType type, const rule_ptr &rule, bool is_string)
|
||||
: name(name), rule(rule), type(type), is_string(is_string) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -10,11 +10,9 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct LexicalVariable {
|
||||
LexicalVariable(const std::string &, VariableType, const rule_ptr &, bool);
|
||||
|
||||
std::string name;
|
||||
rule_ptr rule;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
bool is_string;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -148,13 +148,6 @@ bool ParseState::has_shift_action() const {
|
|||
return (!nonterminal_entries.empty());
|
||||
}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &entry : terminal_entries)
|
||||
result.insert(entry.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
|
||||
for (auto &entry : terminal_entries)
|
||||
for (ParseAction &action : entry.second.actions)
|
||||
|
|
@ -169,18 +162,6 @@ bool ParseState::operator==(const ParseState &other) const {
|
|||
nonterminal_entries == other.nonterminal_entries;
|
||||
}
|
||||
|
||||
set<Symbol> ParseTable::all_symbols() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : symbols)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseStateId ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol lookahead,
|
||||
ParseAction action) {
|
||||
|
|
|
|||
|
|
@ -23,13 +23,11 @@ enum ParseActionType {
|
|||
ParseActionTypeRecover,
|
||||
};
|
||||
|
||||
class ParseAction {
|
||||
struct ParseAction {
|
||||
ParseAction();
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
const Production *);
|
||||
|
||||
public:
|
||||
ParseAction();
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(ParseStateId state_index);
|
||||
|
|
@ -39,7 +37,6 @@ class ParseAction {
|
|||
static ParseAction ShiftExtra();
|
||||
bool operator==(const ParseAction &) const;
|
||||
bool operator<(const ParseAction &) const;
|
||||
|
||||
rules::Associativity associativity() const;
|
||||
int precedence() const;
|
||||
|
||||
|
|
@ -47,30 +44,26 @@ class ParseAction {
|
|||
bool extra;
|
||||
bool fragile;
|
||||
ParseStateId state_index;
|
||||
|
||||
rules::Symbol symbol;
|
||||
size_t consumed_symbol_count;
|
||||
const Production *production;
|
||||
};
|
||||
|
||||
struct ParseTableEntry {
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
|
||||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
|
||||
inline bool operator!=(const ParseTableEntry &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
};
|
||||
|
||||
class ParseState {
|
||||
public:
|
||||
struct ParseState {
|
||||
ParseState();
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
bool merge(const ParseState &);
|
||||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
|
|
@ -87,10 +80,7 @@ struct ParseTableSymbolMetadata {
|
|||
bool structural;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
public:
|
||||
std::set<rules::Symbol> all_symbols() const;
|
||||
ParseStateId add_state();
|
||||
struct ParseTable {
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
|
||||
|
|
|
|||
|
|
@ -41,10 +41,17 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(
|
||||
Variable(helper_rule_name, VariableTypeAuxiliary,
|
||||
Choice::build({ Seq::build({ repeat_symbol.copy(), inner_rule }),
|
||||
inner_rule })));
|
||||
aux_rules.push_back(Variable{
|
||||
helper_rule_name,
|
||||
VariableTypeAuxiliary,
|
||||
Choice::build({
|
||||
Seq::build({
|
||||
repeat_symbol.copy(),
|
||||
inner_rule,
|
||||
}),
|
||||
inner_rule,
|
||||
})
|
||||
});
|
||||
return repeat_symbol.copy();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -107,8 +107,11 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
*/
|
||||
vector<Variable> processed_variables;
|
||||
for (const Variable &variable : grammar.variables)
|
||||
processed_variables.push_back(
|
||||
Variable(variable.name, variable.type, extractor.apply(variable.rule)));
|
||||
processed_variables.push_back(Variable{
|
||||
variable.name,
|
||||
variable.type,
|
||||
extractor.apply(variable.rule)
|
||||
});
|
||||
lexical_grammar.variables = extractor.tokens;
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -25,8 +25,11 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
Production production;
|
||||
|
||||
void apply_to(const rules::Symbol *sym) {
|
||||
production.push_back(ProductionStep(*sym, precedence_stack.back(),
|
||||
associativity_stack.back()));
|
||||
production.push_back(ProductionStep{
|
||||
*sym,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
});
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *metadata) {
|
||||
|
|
@ -85,7 +88,7 @@ SyntaxVariable flatten_rule(const Variable &variable) {
|
|||
}
|
||||
}
|
||||
|
||||
return SyntaxVariable(variable.name, variable.type, productions);
|
||||
return SyntaxVariable{variable.name, variable.type, productions};
|
||||
}
|
||||
|
||||
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -7,18 +7,6 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
: name(name), productions(productions), type(type) {}
|
||||
|
||||
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
||||
rules::Associativity associativity)
|
||||
: symbol(symbol), precedence(precedence), associativity(associativity) {}
|
||||
|
||||
bool ExternalToken::operator==(const ExternalToken &other) const {
|
||||
return name == other.name && type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
|
|
|
|||
|
|
@ -11,15 +11,14 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct ExternalToken {
|
||||
bool operator==(const ExternalToken &) const;
|
||||
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
bool operator==(const ExternalToken &) const;
|
||||
};
|
||||
|
||||
struct ProductionStep {
|
||||
ProductionStep(const rules::Symbol &, int, rules::Associativity);
|
||||
bool operator==(const ProductionStep &) const;
|
||||
|
||||
rules::Symbol symbol;
|
||||
|
|
@ -30,12 +29,9 @@ struct ProductionStep {
|
|||
typedef std::vector<ProductionStep> Production;
|
||||
|
||||
struct SyntaxVariable {
|
||||
SyntaxVariable(const std::string &, VariableType,
|
||||
const std::vector<Production> &);
|
||||
|
||||
std::string name;
|
||||
std::vector<Production> productions;
|
||||
VariableType type;
|
||||
std::vector<Production> productions;
|
||||
};
|
||||
|
||||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
#include "compiler/variable.h"
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
|
||||
Variable::Variable(const string &name, VariableType type, const rule_ptr &rule)
|
||||
: name(name), rule(rule), type(type) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -15,11 +15,9 @@ enum VariableType {
|
|||
};
|
||||
|
||||
struct Variable {
|
||||
Variable(const std::string &, VariableType, const rule_ptr &);
|
||||
|
||||
std::string name;
|
||||
rule_ptr rule;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue