Merge pull request #63 from tree-sitter/avoid-lexing-issues-when-merging-states
Avoid introducing new lexical conflicts when merging parse states
This commit is contained in:
commit
352e678c12
100 changed files with 2517 additions and 2168 deletions
|
|
@ -11,13 +11,12 @@
|
|||
'externals/json-parser',
|
||||
],
|
||||
'sources': [
|
||||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/recovery_tokens.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lex_table_builder.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
'src/compiler/build_tables/parse_item_set_builder.cc',
|
||||
|
|
@ -41,7 +40,6 @@
|
|||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/rule.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/variable.cc',
|
||||
'src/compiler/rules/blank.cc',
|
||||
'src/compiler/rules/built_in_symbols.cc',
|
||||
'src/compiler/rules/character_range.cc',
|
||||
|
|
|
|||
|
|
@ -1,34 +0,0 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("recovery_tokens(rule)", []() {
|
||||
it("includes rules that can only begin and end with an explicit set of characters", [&]() {
|
||||
LexicalGrammar grammar;
|
||||
grammar.separators = {
|
||||
character({ ' ' }),
|
||||
};
|
||||
|
||||
grammar.variables = {
|
||||
Variable("var0", VariableTypeNamed, character({}, false)),
|
||||
Variable("var1", VariableTypeNamed, seq({
|
||||
character({ 'a', 'b' }),
|
||||
character({}, false),
|
||||
character({ 'c', 'd' }),
|
||||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -20,6 +20,10 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
before_each([&]() {
|
||||
conflict_manager = LexConflictManager();
|
||||
});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
AssertThat(update, IsTrue());
|
||||
|
|
@ -65,6 +69,7 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
describe("advance/accept-token conflicts", [&]() {
|
||||
describe("when the token to accept has higher precedence", [&]() {
|
||||
it("prefers the accept-token action", [&]() {
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(update, IsFalse());
|
||||
AssertThat(conflict_manager.possible_extensions, IsEmpty());
|
||||
|
|
@ -72,13 +77,9 @@ describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
|||
});
|
||||
|
||||
describe("when the token to accept does not have a higher precedence", [&]() {
|
||||
it("favors the advance action", [&]() {
|
||||
it("favors the advance action and adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the in-progress tokens as possible extensions of the discarded token", [&]() {
|
||||
conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(conflict_manager.possible_extensions[sym3.index], Contains(sym4.index));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,11 +13,10 @@ START_TEST
|
|||
|
||||
describe("LexItem", []() {
|
||||
describe("completion_status()", [&]() {
|
||||
it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
|
||||
it("indicates whether the item is done and its precedence", [&]() {
|
||||
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
|
||||
AssertThat(item1.completion_status().is_done, IsFalse());
|
||||
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item1.completion_status().is_string, IsFalse());
|
||||
|
||||
MetadataParams params;
|
||||
params.precedence = 3;
|
||||
|
|
@ -30,12 +29,10 @@ describe("LexItem", []() {
|
|||
|
||||
AssertThat(item2.completion_status().is_done, IsTrue());
|
||||
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
|
||||
AssertThat(item2.completion_status().is_string, IsTrue());
|
||||
|
||||
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
|
||||
AssertThat(item3.completion_status().is_done, IsTrue());
|
||||
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item3.completion_status().is_string, IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -12,12 +12,13 @@ using namespace rules;
|
|||
START_TEST
|
||||
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
vector<Variable> lexical_variables;
|
||||
vector<LexicalVariable> lexical_variables;
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
lexical_variables.push_back(Variable{
|
||||
lexical_variables.push_back({
|
||||
"token_" + to_string(i),
|
||||
VariableTypeNamed,
|
||||
blank(),
|
||||
false
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -25,13 +26,13 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
|
|
@ -39,13 +40,13 @@ describe("ParseItemSetBuilder", []() {
|
|||
Production({
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule2", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
|
|
@ -84,19 +85,19 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
it("handles rules with empty productions", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}),
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/prepare_grammar/expand_repeats.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -11,141 +12,159 @@ using prepare_grammar::expand_repeats;
|
|||
|
||||
describe("expand_repeats", []() {
|
||||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(0))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, i_sym(1)),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(1)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(0) }),
|
||||
i_token(0),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of sequences", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11)
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("replaces repeats inside of choices", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
i_token(10),
|
||||
i_sym(1),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("does not create redundant auxiliary rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), repeat1(i_token(4)) }),
|
||||
seq({ i_token(2), repeat1(i_token(4)) }),
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, choice({
|
||||
seq({ i_token(1), i_sym(2) }),
|
||||
seq({ i_token(2), i_sym(2) }),
|
||||
})),
|
||||
Variable("rule1", VariableTypeNamed, seq({
|
||||
})},
|
||||
Variable{"rule1", VariableTypeNamed, seq({
|
||||
i_token(3),
|
||||
i_sym(2),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(4) }),
|
||||
i_token(4),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace multiple repeats in the same rule", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, seq({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
})),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(1), i_token(10) }),
|
||||
i_token(10),
|
||||
})),
|
||||
Variable("rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule0_repeat2", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
|
||||
it("can replace repeats in multiple rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
|
||||
Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
|
||||
}, {}, {}, {}};
|
||||
InitialSyntaxGrammar grammar{
|
||||
{
|
||||
Variable{"rule0", VariableTypeNamed, repeat1(i_token(10))},
|
||||
Variable{"rule1", VariableTypeNamed, repeat1(i_token(11))},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
AssertThat(result.variables, Equals(vector<Variable>({
|
||||
Variable("rule0", VariableTypeNamed, i_sym(2)),
|
||||
Variable("rule1", VariableTypeNamed, i_sym(3)),
|
||||
Variable("rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
AssertThat(result.variables, Equals(vector<Variable>{
|
||||
Variable{"rule0", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"rule1", VariableTypeNamed, i_sym(3)},
|
||||
Variable{"rule0_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(2), i_token(10) }),
|
||||
i_token(10),
|
||||
})),
|
||||
Variable("rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
})},
|
||||
Variable{"rule1_repeat1", VariableTypeAuxiliary, choice({
|
||||
seq({ i_sym(3), i_token(11) }),
|
||||
i_token(11),
|
||||
})),
|
||||
})));
|
||||
})},
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -15,89 +15,149 @@ describe("expand_tokens", []() {
|
|||
|
||||
describe("string rules", [&]() {
|
||||
it("replaces strings with sequences of character sets", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
})),
|
||||
}, {}};
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
str("xyz"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), string_token_params),
|
||||
i_sym(11),
|
||||
})),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
metadata(seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
}), string_token_params),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("handles strings containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("\u03B1 \u03B2")),
|
||||
}, {}};
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
str("\u03B1 \u03B2"),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), string_token_params)),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
metadata(seq({
|
||||
character({ 945 }),
|
||||
character({ ' ' }),
|
||||
character({ 946 }),
|
||||
}), string_token_params),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
describe("regexp rules", [&]() {
|
||||
it("replaces regexps with the equivalent rule tree", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
})),
|
||||
}, {}};
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
pattern("x*"),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
})),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
i_sym(10),
|
||||
repeat(character({ 'x' })),
|
||||
i_sym(11),
|
||||
}),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("handles regexps containing non-ASCII UTF8 characters", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
Variable("rule_A", VariableTypeNamed, pattern("[^\u03B1-\u03B4]*")),
|
||||
}, {}};
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
pattern("[^\u03B1-\u03B4]*"),
|
||||
false
|
||||
}
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat(character({ 945, 946, 947, 948 }, false))),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<LexicalVariable>{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
repeat(character({ 945, 946, 947, 948 }, false)),
|
||||
false
|
||||
}
|
||||
}));
|
||||
});
|
||||
|
||||
it("returns an error when the grammar contains an invalid regex", [&]() {
|
||||
LexicalGrammar grammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}))
|
||||
}, {}};
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"rule_A",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
pattern("("),
|
||||
str("xyz"),
|
||||
pattern("["),
|
||||
}),
|
||||
false
|
||||
},
|
||||
},
|
||||
{}
|
||||
};
|
||||
|
||||
auto result = expand_tokens(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -16,20 +16,25 @@ using prepare_grammar::InitialSyntaxGrammar;
|
|||
|
||||
describe("extract_tokens", []() {
|
||||
it("moves strings, patterns, and sub-rules marked as tokens into the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat1(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))),
|
||||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
|
||||
}, {}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
str("ab"),
|
||||
pattern("cd*"),
|
||||
choice({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
token(repeat1(choice({ str("ef"), str("gh") }))),
|
||||
}),
|
||||
}))},
|
||||
Variable{"rule_B", VariableTypeNamed, pattern("ij+")},
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ str("kl"), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(3))},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
|
@ -37,8 +42,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(error, Equals(CompileError::none()));
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, repeat1(seq({
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>{
|
||||
Variable{"rule_A", VariableTypeNamed, repeat1(seq({
|
||||
|
||||
// This string is now the first token in the lexical grammar.
|
||||
i_token(0),
|
||||
|
|
@ -58,83 +63,88 @@ describe("extract_tokens", []() {
|
|||
// This token rule is now the third rule in the lexical grammar.
|
||||
i_token(2),
|
||||
}),
|
||||
}))),
|
||||
}))},
|
||||
|
||||
Variable("rule_C", VariableTypeNamed, choice({ i_token(4), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(2))),
|
||||
})));
|
||||
Variable{"rule_C", VariableTypeNamed, choice({ i_token(4), blank() })},
|
||||
Variable{"rule_D", VariableTypeNamed, repeat1(i_sym(2))},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable>({
|
||||
// Strings become anonymous rules.
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
LexicalVariable{"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
|
||||
// Patterns become hidden rules.
|
||||
Variable("/cd*/", VariableTypeAuxiliary, pattern("cd*")),
|
||||
LexicalVariable{"/cd*/", VariableTypeAuxiliary, pattern("cd*"), false},
|
||||
|
||||
// Rules marked as tokens become hidden rules.
|
||||
Variable("/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
LexicalVariable{"/(ef|gh)*/", VariableTypeAuxiliary, repeat1(choice({
|
||||
str("ef"),
|
||||
str("gh")
|
||||
}))),
|
||||
})), false},
|
||||
|
||||
// This named rule was moved wholesale to the lexical grammar.
|
||||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
LexicalVariable{"rule_B", VariableTypeNamed, pattern("ij+"), false},
|
||||
|
||||
// Strings become anonymous rules.
|
||||
Variable("kl", VariableTypeAnonymous, str("kl")),
|
||||
LexicalVariable{"kl", VariableTypeAnonymous, str("kl"), true},
|
||||
})));
|
||||
});
|
||||
|
||||
it("does not create duplicate tokens in the lexical grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})),
|
||||
}, {}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({
|
||||
str("ab"),
|
||||
i_sym(0),
|
||||
str("ab"),
|
||||
})},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })),
|
||||
})));
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable> {
|
||||
Variable {"rule_A", VariableTypeNamed, seq({ i_token(0), i_sym(0), i_token(0) })},
|
||||
}));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
})))
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
}))
|
||||
});
|
||||
|
||||
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
|
||||
Variable("rule_B", VariableTypeNamed, str("cd")),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("cd")},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })},
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })),
|
||||
Variable("rule_B", VariableTypeNamed, i_token(1)),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ i_sym(1), i_token(0) })},
|
||||
Variable{"rule_B", VariableTypeNamed, i_token(1)},
|
||||
Variable{"rule_C", VariableTypeNamed, seq({ i_token(2), i_token(1) })},
|
||||
})));
|
||||
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<Variable>({
|
||||
Variable("ab", VariableTypeAnonymous, str("ab")),
|
||||
Variable("cd", VariableTypeAnonymous, str("cd")),
|
||||
Variable("ef", VariableTypeAnonymous, str("ef")),
|
||||
})));
|
||||
AssertThat(lexical_grammar.variables, Equals(vector<LexicalVariable> {
|
||||
LexicalVariable {"ab", VariableTypeAnonymous, str("ab"), true},
|
||||
LexicalVariable {"cd", VariableTypeAnonymous, str("cd"), true},
|
||||
LexicalVariable {"ef", VariableTypeAnonymous, str("ef"), true},
|
||||
}));
|
||||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
Variable{"rule_A", VariableTypeNamed, str("ok")},
|
||||
Variable{"rule_B", VariableTypeNamed, repeat(i_sym(0))},
|
||||
Variable{"rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))},
|
||||
},
|
||||
{
|
||||
str(" ")
|
||||
|
|
@ -155,12 +165,17 @@ describe("extract_tokens", []() {
|
|||
|
||||
describe("handling extra tokens", [&]() {
|
||||
it("adds inline extra tokens to the lexical grammar's separators", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
}, {
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
|
|
@ -172,12 +187,17 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("handles inline extra tokens that match tokens in the grammar", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, {
|
||||
str("y"),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
},
|
||||
{
|
||||
str("y"),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
|
|
@ -185,13 +205,18 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
Variable("rule_C", VariableTypeNamed, str("z")),
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}, {}});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("w"), str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
Variable{"rule_C", VariableTypeNamed, str("z")},
|
||||
},
|
||||
{
|
||||
i_sym(2),
|
||||
},
|
||||
{},
|
||||
{}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
|
|
@ -204,8 +229,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any extra tokens are non-token symbols", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
}, { i_sym(1) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
|
|
@ -216,8 +241,8 @@ describe("extract_tokens", []() {
|
|||
|
||||
it("returns an error if any extra tokens are non-token rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
Variable{"rule_A", VariableTypeNamed, str("x")},
|
||||
Variable{"rule_B", VariableTypeNamed, str("y")},
|
||||
}, { choice({ i_sym(1), blank() }) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
|
|
@ -231,8 +256,8 @@ describe("extract_tokens", []() {
|
|||
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
Variable{"rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })},
|
||||
Variable{"rule_B", VariableTypeNamed, seq({ str("y"), str("z") })},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ using prepare_grammar::flatten_rule;
|
|||
|
||||
describe("flatten_grammar", []() {
|
||||
it("associates each symbol with the precedence and associativity binding it to its successor", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable(
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test",
|
||||
VariableTypeNamed,
|
||||
seq({
|
||||
|
|
@ -30,7 +30,7 @@ describe("flatten_grammar", []() {
|
|||
})),
|
||||
i_sym(7),
|
||||
})
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.name, Equals("test"));
|
||||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
|
|
@ -54,14 +54,14 @@ describe("flatten_grammar", []() {
|
|||
});
|
||||
|
||||
it("uses the last assigned precedence", [&]() {
|
||||
SyntaxVariable result = flatten_rule(Variable(
|
||||
SyntaxVariable result = flatten_rule(Variable{
|
||||
"test1",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
i_sym(2),
|
||||
}))
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
|
|
@ -70,13 +70,13 @@ describe("flatten_grammar", []() {
|
|||
})
|
||||
})))
|
||||
|
||||
result = flatten_rule(Variable(
|
||||
result = flatten_rule(Variable{
|
||||
"test2",
|
||||
VariableTypeNamed,
|
||||
prec_left(101, seq({
|
||||
i_sym(1),
|
||||
}))
|
||||
));
|
||||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
|
|
|
|||
|
|
@ -15,27 +15,32 @@ using prepare_grammar::intern_symbols;
|
|||
|
||||
describe("intern_symbols", []() {
|
||||
it("replaces named symbols with numerically-indexed symbols", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("_z") }) },
|
||||
{ "y", sym("_z") },
|
||||
{ "_z", str("stuff") }
|
||||
}, {}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("_z") })},
|
||||
{"y", sym("_z")},
|
||||
{"_z", str("stuff")}
|
||||
}, {}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals(CompileError::none()));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>({
|
||||
Variable("x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })),
|
||||
Variable("y", VariableTypeNamed, i_sym(2)),
|
||||
Variable("_z", VariableTypeHidden, str("stuff")),
|
||||
})));
|
||||
AssertThat(result.first.variables, Equals(vector<Variable>{
|
||||
Variable{"x", VariableTypeNamed, choice({ i_sym(1), i_sym(2) })},
|
||||
Variable{"y", VariableTypeNamed, i_sym(2)},
|
||||
Variable{"_z", VariableTypeHidden, str("stuff")},
|
||||
}));
|
||||
});
|
||||
|
||||
describe("when there are symbols that reference undefined rules", [&]() {
|
||||
it("returns an error", []() {
|
||||
Grammar grammar{{
|
||||
{ "x", sym("y") },
|
||||
}, {}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", sym("y")},
|
||||
},
|
||||
{}, {}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -44,13 +49,17 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
|
||||
it("translates the grammar's optional 'extra_tokens' to numerical symbols", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {
|
||||
sym("z")
|
||||
}, {}, {}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")}
|
||||
},
|
||||
{
|
||||
sym("z")
|
||||
},
|
||||
{}, {}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -60,29 +69,34 @@ describe("intern_symbols", []() {
|
|||
});
|
||||
|
||||
it("records any rule names that match external token names", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {}, {}, {
|
||||
"w",
|
||||
"z"
|
||||
}};
|
||||
Grammar grammar{
|
||||
{
|
||||
{"x", choice({ sym("y"), sym("z") })},
|
||||
{"y", sym("z")},
|
||||
{"z", str("stuff")},
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
"w",
|
||||
"z"
|
||||
}
|
||||
};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>({
|
||||
{
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>{
|
||||
ExternalToken{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
},
|
||||
{
|
||||
ExternalToken{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol(2, Symbol::NonTerminal)
|
||||
}
|
||||
})))
|
||||
},
|
||||
}))
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -1,42 +0,0 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
COMMENT,
|
||||
};
|
||||
|
||||
void *tree_sitter_extra_external_tokens_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
|
||||
}
|
||||
|
||||
bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
|
||||
bool tree_sitter_extra_external_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
|
||||
while (lexer->lookahead == ' ') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '#') {
|
||||
lexer->advance(lexer, false);
|
||||
while (lexer->lookahead != '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
lexer->result_symbol = COMMENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
32
spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
vendored
Normal file
32
spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,32 @@
|
|||
================================================
|
||||
anonymous tokens defined with character classes
|
||||
================================================
|
||||
1234
|
||||
---
|
||||
|
||||
(first_rule)
|
||||
|
||||
=================================================
|
||||
anonymous tokens defined with LF escape sequence
|
||||
=================================================
|
||||
|
||||
|
||||
---
|
||||
|
||||
(first_rule)
|
||||
|
||||
=================================================
|
||||
anonymous tokens defined with CR escape sequence
|
||||
=================================================
|
||||
|
||||
---
|
||||
|
||||
(first_rule)
|
||||
|
||||
================================================
|
||||
anonymous tokens with quotes
|
||||
================================================
|
||||
'hello'
|
||||
---
|
||||
|
||||
(first_rule)
|
||||
14
spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
vendored
Normal file
14
spec/fixtures/test_grammars/anonymous_tokens_with_escaped_chars/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
{
|
||||
"name": "anonymous_tokens_with_escaped_chars",
|
||||
"rules": {
|
||||
"first_rule": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "\n"},
|
||||
{"type": "STRING", "value": "\r"},
|
||||
{"type": "STRING", "value": "'hello'"},
|
||||
{"type": "PATTERN", "value": "\\d+"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
8
spec/fixtures/test_grammars/associativity_left/corpus.txt
vendored
Normal file
8
spec/fixtures/test_grammars/associativity_left/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
===================
|
||||
chained operations
|
||||
===================
|
||||
x+y+z
|
||||
---
|
||||
(expression (math_operation
|
||||
(expression (math_operation (expression (identifier)) (expression (identifier))))
|
||||
(expression (identifier))))
|
||||
31
spec/fixtures/test_grammars/associativity_left/grammar.json
vendored
Normal file
31
spec/fixtures/test_grammars/associativity_left/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"name": "associativity_left",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "math_operation"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"math_operation": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
13
spec/fixtures/test_grammars/associativity_missing/expected_error.txt
vendored
Normal file
13
spec/fixtures/test_grammars/associativity_missing/expected_error.txt
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
Unresolved conflict for symbol sequence:
|
||||
|
||||
expression '+' expression • '+' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (math_operation expression '+' expression) • '+' …
|
||||
2: expression '+' (math_operation expression • '+' expression)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a left or right associativity in `math_operation`
|
||||
2: Add a conflict for these rules: `math_operation`
|
||||
27
spec/fixtures/test_grammars/associativity_missing/grammar.json
vendored
Normal file
27
spec/fixtures/test_grammars/associativity_missing/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
{
|
||||
"name": "associativity_missing",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "math_operation"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"math_operation": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
8
spec/fixtures/test_grammars/associativity_right/corpus.txt
vendored
Normal file
8
spec/fixtures/test_grammars/associativity_right/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,8 @@
|
|||
===================
|
||||
chained operations
|
||||
===================
|
||||
x+y+z
|
||||
---
|
||||
(expression (math_operation
|
||||
(expression (identifier))
|
||||
(expression (math_operation (expression (identifier)) (expression (identifier))))))
|
||||
31
spec/fixtures/test_grammars/associativity_right/grammar.json
vendored
Normal file
31
spec/fixtures/test_grammars/associativity_right/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
{
|
||||
"name": "associativity_right",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "math_operation"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"math_operation": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
15
spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
vendored
Normal file
15
spec/fixtures/test_grammars/conflicting_precedence/expected_error.txt
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
Unresolved conflict for symbol sequence:
|
||||
|
||||
expression '+' expression • '*' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (sum expression '+' expression) • '*' …
|
||||
2: expression '+' (product expression • '*' expression)
|
||||
3: expression '+' (other_thing expression • '*' '*')
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
|
||||
2: Specify a higher precedence in `sum` than in the other rules.
|
||||
3: Add a conflict for these rules: `sum` `product` `other_thing`
|
||||
58
spec/fixtures/test_grammars/conflicting_precedence/grammar.json
vendored
Normal file
58
spec/fixtures/test_grammars/conflicting_precedence/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,58 @@
|
|||
{
|
||||
"name": "conflicting_precedence",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "product"},
|
||||
{"type": "SYMBOL", "name": "other_thing"}
|
||||
]
|
||||
},
|
||||
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"product": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"other_thing": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "STRING", "value": "*"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
2
spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
vendored
Normal file
2
spec/fixtures/test_grammars/epsilon_rules/expected_error.txt
vendored
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
The rule `rule_2` matches the empty string.
|
||||
Tree-sitter currently does not support syntactic rules that match the empty string.
|
||||
15
spec/fixtures/test_grammars/epsilon_rules/grammar.json
vendored
Normal file
15
spec/fixtures/test_grammars/epsilon_rules/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
{
|
||||
"name": "epsilon_rules",
|
||||
|
||||
"rules": {
|
||||
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
|
||||
|
||||
"rule_2": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "rule_1"},
|
||||
{"type": "BLANK"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
41
spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
vendored
Normal file
41
spec/fixtures/test_grammars/external_and_internal_tokens/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
=========================================
|
||||
single-line statements - internal tokens
|
||||
=========================================
|
||||
|
||||
a b
|
||||
|
||||
---
|
||||
|
||||
(statement (variable) (variable) (line_break))
|
||||
|
||||
=========================================
|
||||
multi-line statements - internal tokens
|
||||
=========================================
|
||||
|
||||
a
|
||||
b
|
||||
|
||||
---
|
||||
|
||||
(statement (variable) (variable) (line_break))
|
||||
|
||||
=========================================
|
||||
single-line statements - external tokens
|
||||
=========================================
|
||||
|
||||
'hello' 'world'
|
||||
|
||||
---
|
||||
|
||||
(statement (string) (string) (line_break))
|
||||
|
||||
=========================================
|
||||
multi-line statements - external tokens
|
||||
=========================================
|
||||
|
||||
'hello'
|
||||
'world'
|
||||
|
||||
---
|
||||
|
||||
(statement (string) (string) (line_break))
|
||||
36
spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
vendored
Normal file
36
spec/fixtures/test_grammars/external_and_internal_tokens/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
{
|
||||
"name": "external_and_internal_tokens",
|
||||
|
||||
"externals": [
|
||||
"string",
|
||||
"line_break"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "SYMBOL", "name": "number"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"},
|
||||
"number": {"type": "PATTERN", "value": "\\d+"},
|
||||
"line_break": {"type": "STRING", "value": "\n"}
|
||||
}
|
||||
}
|
||||
1
spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
vendored
Normal file
1
spec/fixtures/test_grammars/external_and_internal_tokens/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar has an external scanner whose `scan` method needs to be able to check for the validity of an *internal* token. This is done by including the names of that internal token (`_line_break`) in the grammar's `externals` field.
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
#include <stdbool.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
|
|
@ -6,21 +5,17 @@ enum {
|
|||
LINE_BREAK
|
||||
};
|
||||
|
||||
void *tree_sitter_shared_external_tokens_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
void *tree_sitter_external_and_internal_tokens_external_scanner_create() { return NULL; }
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
|
||||
}
|
||||
void tree_sitter_external_and_internal_tokens_external_scanner_destroy(void *payload) {}
|
||||
|
||||
bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
void tree_sitter_external_and_internal_tokens_external_scanner_reset(void *payload) {}
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
bool tree_sitter_external_and_internal_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
|
||||
|
||||
bool tree_sitter_shared_external_tokens_external_scanner_scan(
|
||||
void tree_sitter_external_and_internal_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
|
||||
|
||||
bool tree_sitter_external_and_internal_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
|
||||
// If a line-break is a valid lookahead token, only skip spaces.
|
||||
|
|
@ -58,6 +53,3 @@ bool tree_sitter_shared_external_tokens_external_scanner_scan(
|
|||
|
||||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
10
spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
vendored
Normal file
10
spec/fixtures/test_grammars/external_extra_tokens/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
========================
|
||||
extra external tokens
|
||||
========================
|
||||
|
||||
x = # a comment
|
||||
y
|
||||
|
||||
---
|
||||
|
||||
(assignment (variable) (comment) (variable))
|
||||
25
spec/fixtures/test_grammars/external_extra_tokens/grammar.json
vendored
Normal file
25
spec/fixtures/test_grammars/external_extra_tokens/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
{
|
||||
"name": "external_extra_tokens",
|
||||
|
||||
"externals": [
|
||||
"comment"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"assignment": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "STRING", "value": "="},
|
||||
{"type": "SYMBOL", "name": "variable"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"}
|
||||
}
|
||||
}
|
||||
36
spec/fixtures/test_grammars/external_extra_tokens/scanner.c
vendored
Normal file
36
spec/fixtures/test_grammars/external_extra_tokens/scanner.c
vendored
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
COMMENT,
|
||||
};
|
||||
|
||||
void *tree_sitter_external_extra_tokens_external_scanner_create() { return NULL; }
|
||||
|
||||
void tree_sitter_external_extra_tokens_external_scanner_destroy(void *payload) {}
|
||||
|
||||
void tree_sitter_external_extra_tokens_external_scanner_reset(void *payload) {}
|
||||
|
||||
bool tree_sitter_external_extra_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
|
||||
|
||||
void tree_sitter_external_extra_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
|
||||
|
||||
bool tree_sitter_external_extra_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
|
||||
while (lexer->lookahead == ' ') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '#') {
|
||||
lexer->advance(lexer, false);
|
||||
while (lexer->lookahead != '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
lexer->result_symbol = COMMENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
22
spec/fixtures/test_grammars/external_tokens/corpus.txt
vendored
Normal file
22
spec/fixtures/test_grammars/external_tokens/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,22 @@
|
|||
========================
|
||||
simple external tokens
|
||||
=========================
|
||||
|
||||
x + %(sup (external) scanner?)
|
||||
|
||||
---
|
||||
|
||||
(expression (sum (expression (identifier)) (expression (string))))
|
||||
|
||||
==================================
|
||||
external tokens that require state
|
||||
==================================
|
||||
|
||||
%{sup {} #{x + y} {} scanner?}
|
||||
|
||||
---
|
||||
|
||||
(expression (string
|
||||
(expression (sum
|
||||
(expression (identifier))
|
||||
(expression (identifier))))))
|
||||
57
spec/fixtures/test_grammars/external_tokens/grammar.json
vendored
Normal file
57
spec/fixtures/test_grammars/external_tokens/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,57 @@
|
|||
{
|
||||
"name": "external_tokens",
|
||||
|
||||
"externals": [
|
||||
"_percent_string",
|
||||
"_percent_string_start",
|
||||
"_percent_string_end"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"string": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string_start"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "_percent_string_end"}
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\a+"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
#include <stdbool.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
|
|
@ -13,7 +12,7 @@ typedef struct {
|
|||
uint32_t depth;
|
||||
} Scanner;
|
||||
|
||||
void *tree_sitter_external_scanner_example_external_scanner_create() {
|
||||
void *tree_sitter_external_tokens_external_scanner_create() {
|
||||
Scanner *scanner = malloc(sizeof(Scanner));
|
||||
*scanner = (Scanner){
|
||||
.open_delimiter = 0,
|
||||
|
|
@ -23,7 +22,17 @@ void *tree_sitter_external_scanner_example_external_scanner_create() {
|
|||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_external_scanner_example_external_scanner_scan(
|
||||
void tree_sitter_external_tokens_external_scanner_destroy(void *payload) {
|
||||
free(payload);
|
||||
}
|
||||
|
||||
void tree_sitter_external_tokens_external_scanner_reset(void *payload) {}
|
||||
|
||||
bool tree_sitter_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) { return true; }
|
||||
|
||||
void tree_sitter_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {}
|
||||
|
||||
bool tree_sitter_external_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
Scanner *scanner = payload;
|
||||
|
||||
|
|
@ -103,16 +112,3 @@ bool tree_sitter_external_scanner_example_external_scanner_scan(
|
|||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
|
||||
}
|
||||
|
||||
bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
|
||||
free(payload);
|
||||
}
|
||||
33
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt
vendored
Normal file
33
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,33 @@
|
|||
========================
|
||||
regexes
|
||||
========================
|
||||
|
||||
/a+/
|
||||
|
||||
---
|
||||
|
||||
(expression (regex))
|
||||
|
||||
========================
|
||||
conditionals
|
||||
========================
|
||||
|
||||
(if (1) /a+/)
|
||||
|
||||
---
|
||||
|
||||
(expression (parenthesized (expression (conditional
|
||||
(parenthesized (expression (number)))
|
||||
(expression (regex))))))
|
||||
|
||||
========================
|
||||
quotients
|
||||
========================
|
||||
|
||||
((1) / 2)
|
||||
|
||||
---
|
||||
|
||||
(expression (parenthesized (expression (quotient
|
||||
(expression (parenthesized (expression (number))))
|
||||
(expression (number))))))
|
||||
65
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json
vendored
Normal file
65
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,65 @@
|
|||
{
|
||||
"name": "lexical_conflicts_due_to_state_merging",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "conditional"},
|
||||
{"type": "SYMBOL", "name": "regex"},
|
||||
{"type": "SYMBOL", "name": "quotient"},
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "SYMBOL", "name": "parenthesized"}
|
||||
]
|
||||
},
|
||||
|
||||
"conditional": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "if"},
|
||||
{"type": "SYMBOL", "name": "parenthesized"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"quotient": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "/"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"regex": {
|
||||
"type": "PATTERN",
|
||||
"value": "/[^/\n]+/"
|
||||
},
|
||||
|
||||
"number": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\d+"
|
||||
},
|
||||
|
||||
"parenthesized": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "("},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": ")"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
20
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md
vendored
Normal file
20
spec/fixtures/test_grammars/lexical_conflicts_due_to_state_merging/readme.md
vendored
Normal file
|
|
@ -0,0 +1,20 @@
|
|||
This grammar has two tokens, `regex` and `/`, which conflict: when a `/` character is encountered, the lexer can't tell if it is part of a `/` token or a `regex` by looking ahead only one character. But because these tokens are never valid in the same position, this doesn't cause any problem.
|
||||
|
||||
When merging similar parse states in order to reduce the size of the parse table, it is important that we avoid merging states in a way that causes these two tokens to both appear as valid lookahead symbols in a given state.
|
||||
|
||||
If we weren't careful, this grammar would cause that to happen, because a `regex` is valid in this state:
|
||||
|
||||
```
|
||||
(if (1) /\w+/)
|
||||
^
|
||||
```
|
||||
|
||||
and a `/` is valid in this state:
|
||||
|
||||
|
||||
```
|
||||
((1) / 2)
|
||||
^
|
||||
```
|
||||
|
||||
And these two states would otherwise be candidates for merging, because they both contain only the action `reduce(parenthesized, 3)`.
|
||||
15
spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
vendored
Normal file
15
spec/fixtures/test_grammars/precedence_on_single_child_missing/expected_error.txt
vendored
Normal file
|
|
@ -0,0 +1,15 @@
|
|||
Unresolved conflict for symbol sequence:
|
||||
|
||||
identifier • '{' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (expression identifier) • '{' …
|
||||
2: (function_call identifier • block)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a higher precedence in `function_call` than in the other rules.
|
||||
2: Specify a higher precedence in `expression` than in the other rules.
|
||||
3: Specify a left or right associativity in `expression`
|
||||
4: Add a conflict for these rules: `expression` `function_call`
|
||||
63
spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
vendored
Normal file
63
spec/fixtures/test_grammars/precedence_on_single_child_missing/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"name": "precedence_on_single_child_missing",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "{"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "}"}
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
14
spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
vendored
Normal file
14
spec/fixtures/test_grammars/precedence_on_single_child_missing/readme.md
vendored
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
This language has function calls similar to Ruby's, with no parentheses required, and optional blocks.
|
||||
|
||||
There is a shift/reduce conflict here:
|
||||
|
||||
```
|
||||
foo bar { baz }
|
||||
^
|
||||
```
|
||||
|
||||
The possible actions are:
|
||||
1. `reduce(expression, 1)` - `bar` is an expression being passed to the `foo` function.
|
||||
2. `shift` - `bar` is a function being called with the block `{ baz }`
|
||||
|
||||
The grammars `precedence_on_single_child_negative` and `precedence_on_single_child_positive` show possible resolutions to this conflict.
|
||||
12
spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
vendored
Normal file
12
spec/fixtures/test_grammars/precedence_on_single_child_negative/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
===========================
|
||||
function calls with blocks
|
||||
===========================
|
||||
|
||||
foo bar { baz }
|
||||
|
||||
---
|
||||
|
||||
(expression (function_call
|
||||
(identifier)
|
||||
(expression (identifier))
|
||||
(block (expression (identifier)))))
|
||||
63
spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
vendored
Normal file
63
spec/fixtures/test_grammars/precedence_on_single_child_negative/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"name": "precedence_on_single_child_negative",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "{"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "}"}
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
vendored
Normal file
1
spec/fixtures/test_grammars/precedence_on_single_child_negative/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a negative precedence. This causes reducing the `bar` variable to an expression to be preferred over shifting the `{` token as part of `function_call`.
|
||||
13
spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
vendored
Normal file
13
spec/fixtures/test_grammars/precedence_on_single_child_positive/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
===========================
|
||||
function calls with blocks
|
||||
===========================
|
||||
|
||||
foo bar { baz }
|
||||
|
||||
---
|
||||
|
||||
(expression (function_call
|
||||
(identifier)
|
||||
(expression (function_call
|
||||
(identifier)
|
||||
(block (expression (identifier)))))))
|
||||
63
spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
vendored
Normal file
63
spec/fixtures/test_grammars/precedence_on_single_child_positive/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
{
|
||||
"name": "precedence_on_single_child_positive",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "{"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "}"}
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
vendored
Normal file
1
spec/fixtures/test_grammars/precedence_on_single_child_positive/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar resolves the conflict shown in the `precedence_on_single_child_missing` grammar by giving `function_call` a positive precedence. This causes shifting the `{` token as part of `function_call` to be preferred over reducing the `bar` variable to an expression.
|
||||
24
spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
vendored
Normal file
24
spec/fixtures/test_grammars/precedence_on_subsequence/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,24 @@
|
|||
==========================================
|
||||
curly brace blocks with high precedence
|
||||
==========================================
|
||||
|
||||
a b {}
|
||||
|
||||
---
|
||||
|
||||
(expression (function_call
|
||||
(identifier)
|
||||
(expression (function_call (identifier) (block)))))
|
||||
|
||||
==========================================
|
||||
do blocks with low precedence
|
||||
==========================================
|
||||
|
||||
a b do end
|
||||
|
||||
---
|
||||
|
||||
(expression (function_call
|
||||
(identifier)
|
||||
(expression (identifier))
|
||||
(do_block)))
|
||||
135
spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
vendored
Normal file
135
spec/fixtures/test_grammars/precedence_on_subsequence/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,135 @@
|
|||
{
|
||||
"name": "precedence_on_subsequence",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "scope_resolution"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "do_block"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "do_block"}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"scope_resolution": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "::"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "::"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "STRING",
|
||||
"value": "{}"
|
||||
},
|
||||
|
||||
"do_block": {
|
||||
"type": "STRING",
|
||||
"value": "do end"
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
3
spec/fixtures/test_grammars/readme.md
vendored
Normal file
3
spec/fixtures/test_grammars/readme.md
vendored
Normal file
|
|
@ -0,0 +1,3 @@
|
|||
These small grammars demonstrate specific features or test for certain specific regressions.
|
||||
|
||||
For some of them, compilation is expected to fail with a given error message. For others, the resulting parser is expected to produce certain trees.
|
||||
13
spec/fixtures/test_grammars/readme_grammar/corpus.txt
vendored
Normal file
13
spec/fixtures/test_grammars/readme_grammar/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
==================================
|
||||
the readme example
|
||||
==================================
|
||||
|
||||
a + b * c
|
||||
|
||||
---
|
||||
|
||||
(expression (sum
|
||||
(expression (variable))
|
||||
(expression (product
|
||||
(expression (variable))
|
||||
(expression (variable))))))
|
||||
67
spec/fixtures/test_grammars/readme_grammar/grammar.json
vendored
Normal file
67
spec/fixtures/test_grammars/readme_grammar/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
{
|
||||
"name": "readme_grammar",
|
||||
|
||||
// Things that can appear anywhere in the language, like comments
|
||||
// and whitespace, are expressed as 'extras'.
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
|
||||
// The first rule listed in the grammar becomes the 'start rule'.
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "product"},
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "("},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": ")"}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
// Tokens like '+' and '*' are described directly within the
|
||||
// grammar's rules, as opposed to in a seperate lexer description.
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Ambiguities can be resolved at compile time by assigning precedence
|
||||
// values to rule subtrees.
|
||||
"product": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 2,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Tokens can be specified using ECMAScript regexps.
|
||||
"number": {"type": "PATTERN", "value": "\\d+"},
|
||||
"comment": {"type": "PATTERN", "value": "#.*"},
|
||||
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
|
||||
}
|
||||
}
|
||||
7
spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
vendored
Normal file
7
spec/fixtures/test_grammars/start_rule_is_blank/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
========================
|
||||
the empty string
|
||||
=======================
|
||||
|
||||
---
|
||||
|
||||
(first_rule)
|
||||
6
spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
vendored
Normal file
6
spec/fixtures/test_grammars/start_rule_is_blank/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "start_rule_is_blank",
|
||||
"rules": {
|
||||
"first_rule": {"type": "BLANK"}
|
||||
}
|
||||
}
|
||||
6
spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
vendored
Normal file
6
spec/fixtures/test_grammars/start_rule_is_token/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
===========================
|
||||
the single token
|
||||
==========================
|
||||
the-value
|
||||
---
|
||||
(first_rule)
|
||||
6
spec/fixtures/test_grammars/start_rule_is_token/grammar.json
vendored
Normal file
6
spec/fixtures/test_grammars/start_rule_is_token/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,6 @@
|
|||
{
|
||||
"name": "start_rule_is_token",
|
||||
"rules": {
|
||||
"first_rule": {"type": "STRING", "value": "the-value"}
|
||||
}
|
||||
}
|
||||
61
spec/helpers/file_helpers.cc
Normal file
61
spec/helpers/file_helpers.cc
Normal file
|
|
@ -0,0 +1,61 @@
|
|||
#include "helpers/file_helpers.h"
|
||||
#include <sys/stat.h>
|
||||
#include <errno.h>
|
||||
#include <fstream>
|
||||
#include <dirent.h>
|
||||
|
||||
using std::string;
|
||||
using std::ifstream;
|
||||
using std::istreambuf_iterator;
|
||||
using std::ofstream;
|
||||
using std::vector;
|
||||
|
||||
bool file_exists(const string &path) {
|
||||
struct stat file_stat;
|
||||
return stat(path.c_str(), &file_stat) == 0;
|
||||
}
|
||||
|
||||
int get_modified_time(const string &path) {
|
||||
struct stat file_stat;
|
||||
if (stat(path.c_str(), &file_stat) != 0) {
|
||||
if (errno != ENOENT)
|
||||
fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
|
||||
return 0;
|
||||
}
|
||||
return file_stat.st_mtime;
|
||||
}
|
||||
|
||||
string read_file(const string &path) {
|
||||
ifstream file(path);
|
||||
istreambuf_iterator<char> file_iterator(file), end_iterator;
|
||||
string content(file_iterator, end_iterator);
|
||||
file.close();
|
||||
return content;
|
||||
}
|
||||
|
||||
void write_file(const string &path, const string &content) {
|
||||
ofstream file(path);
|
||||
file << content;
|
||||
file.close();
|
||||
}
|
||||
|
||||
vector<string> list_directory(const string &path) {
|
||||
vector<string> result;
|
||||
|
||||
DIR *dir = opendir(path.c_str());
|
||||
if (!dir) {
|
||||
printf("\nTest error - no such directory '%s'", path.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
struct dirent *dir_entry;
|
||||
while ((dir_entry = readdir(dir))) {
|
||||
string name(dir_entry->d_name);
|
||||
if (name != "." && name != "..") {
|
||||
result.push_back(name);
|
||||
}
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return result;
|
||||
}
|
||||
14
spec/helpers/file_helpers.h
Normal file
14
spec/helpers/file_helpers.h
Normal file
|
|
@ -0,0 +1,14 @@
|
|||
#ifndef HELPERS_FILE_HELPERS_H_
|
||||
#define HELPERS_FILE_HELPERS_H_
|
||||
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include <sys/stat.h>
|
||||
|
||||
bool file_exists(const std::string &path);
|
||||
int get_modified_time(const std::string &path);
|
||||
std::string read_file(const std::string &path);
|
||||
void write_file(const std::string &path, const std::string &content);
|
||||
std::vector<std::string> list_directory(const std::string &path);
|
||||
|
||||
#endif // HELPERS_FILE_HELPERS_H_
|
||||
|
|
@ -1,12 +1,12 @@
|
|||
#include "spec_helper.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/file_helpers.h"
|
||||
#include <unistd.h>
|
||||
#include <dlfcn.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/wait.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <sys/stat.h>
|
||||
#include <fstream>
|
||||
#include <stdlib.h>
|
||||
#include "tree_sitter/compiler.h"
|
||||
|
|
@ -54,25 +54,10 @@ static std::string run_command(const char *cmd, const char *args[]) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool file_exists(const string &path) {
|
||||
struct stat file_stat;
|
||||
return stat(path.c_str(), &file_stat) == 0;
|
||||
}
|
||||
|
||||
static int get_modified_time(const string &path) {
|
||||
struct stat file_stat;
|
||||
if (stat(path.c_str(), &file_stat) != 0) {
|
||||
if (errno != ENOENT)
|
||||
fprintf(stderr, "Error in stat() for path: %s\n", + path.c_str());
|
||||
return 0;
|
||||
}
|
||||
return file_stat.st_mtime;
|
||||
}
|
||||
|
||||
const TSLanguage *load_language(const string &source_filename,
|
||||
const string &lib_filename,
|
||||
const string &language_name,
|
||||
string external_scanner_filename = "") {
|
||||
static const TSLanguage *load_language(const string &source_filename,
|
||||
const string &lib_filename,
|
||||
const string &language_name,
|
||||
string external_scanner_filename = "") {
|
||||
string language_function_name = "tree_sitter_" + language_name;
|
||||
string header_dir = getenv("PWD") + string("/include");
|
||||
int source_mtime = get_modified_time(source_filename);
|
||||
|
|
@ -132,9 +117,9 @@ const TSLanguage *load_language(const string &source_filename,
|
|||
return reinterpret_cast<TSLanguage *(*)()>(language_function)();
|
||||
}
|
||||
|
||||
const TSLanguage *load_compile_result(const string &name,
|
||||
const TSCompileResult &compile_result,
|
||||
string external_scanner_path) {
|
||||
const TSLanguage *load_test_language(const string &name,
|
||||
const TSCompileResult &compile_result,
|
||||
string external_scanner_path) {
|
||||
if (compile_result.error_type != TSCompileErrorTypeNone) {
|
||||
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
|
||||
return nullptr;
|
||||
|
|
@ -155,7 +140,7 @@ const TSLanguage *load_compile_result(const string &name,
|
|||
return language;
|
||||
}
|
||||
|
||||
const TSLanguage *get_test_language(const string &language_name) {
|
||||
const TSLanguage *load_real_language(const string &language_name) {
|
||||
if (loaded_languages[language_name])
|
||||
return loaded_languages[language_name];
|
||||
|
||||
|
|
@ -182,20 +167,14 @@ const TSLanguage *get_test_language(const string &language_name) {
|
|||
if (parser_mtime < grammar_mtime || parser_mtime < libcompiler_mtime) {
|
||||
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
|
||||
|
||||
ifstream grammar_file(grammar_filename);
|
||||
istreambuf_iterator<char> grammar_file_iterator(grammar_file), end_iterator;
|
||||
string grammar_json(grammar_file_iterator, end_iterator);
|
||||
grammar_file.close();
|
||||
|
||||
string grammar_json = read_file(grammar_filename);
|
||||
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
|
||||
if (result.error_type != TSCompileErrorTypeNone) {
|
||||
fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
ofstream parser_file(parser_filename);
|
||||
parser_file << result.code;
|
||||
parser_file.close();
|
||||
write_file(parser_filename, result.code);
|
||||
}
|
||||
|
||||
mkdir("out/tmp", 0777);
|
||||
|
|
|
|||
|
|
@ -5,8 +5,10 @@
|
|||
#include "tree_sitter/runtime.h"
|
||||
#include <string>
|
||||
|
||||
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
|
||||
std::string external_scanner_path = "");
|
||||
const TSLanguage *get_test_language(const std::string &language_name);
|
||||
const TSLanguage *load_real_language(const std::string &name);
|
||||
|
||||
const TSLanguage *load_test_language(const std::string &name,
|
||||
const TSCompileResult &compile_result,
|
||||
std::string external_scanner_path = "");
|
||||
|
||||
#endif // HELPERS_LOAD_LANGUAGE_H_
|
||||
|
|
|
|||
|
|
@ -1,20 +1,18 @@
|
|||
#include "helpers/read_test_entries.h"
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <fstream>
|
||||
#include <streambuf>
|
||||
#include <dirent.h>
|
||||
|
||||
#include <regex>
|
||||
#include "helpers/file_helpers.h"
|
||||
|
||||
using std::regex;
|
||||
using std::regex_search;
|
||||
using std::regex_replace;
|
||||
using std::smatch;
|
||||
using std::regex_constants::extended;
|
||||
|
||||
using std::smatch;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::ifstream;
|
||||
using std::istreambuf_iterator;
|
||||
|
||||
string fixtures_dir = "spec/fixtures/";
|
||||
|
||||
static string trim_output(const string &input) {
|
||||
string result(input);
|
||||
|
|
@ -27,7 +25,7 @@ static string trim_output(const string &input) {
|
|||
|
||||
static vector<TestEntry> parse_test_entries(string content) {
|
||||
regex header_pattern("===+\n" "([^=]+)\n" "===+\n", extended);
|
||||
regex separator_pattern("---+\n", extended);
|
||||
regex separator_pattern("---+\r?\n", extended);
|
||||
vector<string> descriptions;
|
||||
vector<string> bodies;
|
||||
|
||||
|
|
@ -55,51 +53,42 @@ static vector<TestEntry> parse_test_entries(string content) {
|
|||
body.substr(0, matches.position() - 1),
|
||||
trim_output(body.substr(matches.position() + matches[0].length()))
|
||||
});
|
||||
} else {
|
||||
puts(("Invalid corpus entry with description: " + descriptions[i]).c_str());
|
||||
abort();
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
static vector<string> list_directory(string dir_name) {
|
||||
vector<string> result;
|
||||
|
||||
DIR *dir = opendir(dir_name.c_str());
|
||||
if (!dir) {
|
||||
printf("\nTest error - no such directory '%s'", dir_name.c_str());
|
||||
return result;
|
||||
}
|
||||
|
||||
struct dirent *dir_entry;
|
||||
while ((dir_entry = readdir(dir))) {
|
||||
string name(dir_entry->d_name);
|
||||
if (name != "." && name != "..")
|
||||
result.push_back(dir_name + "/" + name);
|
||||
}
|
||||
|
||||
closedir(dir);
|
||||
return result;
|
||||
}
|
||||
|
||||
static string read_file(string filename) {
|
||||
ifstream file(filename);
|
||||
string result((istreambuf_iterator<char>(file)), istreambuf_iterator<char>());
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<TestEntry> read_corpus_entries(string language_name) {
|
||||
vector<TestEntry> read_real_language_corpus(string language_name) {
|
||||
vector<TestEntry> result;
|
||||
|
||||
string fixtures_dir = "spec/fixtures/";
|
||||
|
||||
string test_directory = fixtures_dir + "grammars/" + language_name + "/grammar_test";
|
||||
for (string &test_filename : list_directory(test_directory))
|
||||
for (TestEntry &entry : parse_test_entries(read_file(test_filename)))
|
||||
for (string &test_filename : list_directory(test_directory)) {
|
||||
for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
|
||||
result.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
string error_test_filename = fixtures_dir + "/error_corpus/" + language_name + "_errors.txt";
|
||||
for (TestEntry &entry : parse_test_entries(read_file(error_test_filename)))
|
||||
for (TestEntry &entry : parse_test_entries(read_file(error_test_filename))) {
|
||||
result.push_back(entry);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<TestEntry> read_test_language_corpus(string language_name) {
|
||||
vector<TestEntry> result;
|
||||
|
||||
string test_directory = fixtures_dir + "test_grammars/" + language_name;
|
||||
for (string &test_filename : list_directory(test_directory)) {
|
||||
for (TestEntry &entry : parse_test_entries(read_file(test_directory + "/" + test_filename))) {
|
||||
result.push_back(entry);
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
@ -10,6 +10,7 @@ struct TestEntry {
|
|||
std::string tree_string;
|
||||
};
|
||||
|
||||
std::vector<TestEntry> read_corpus_entries(std::string directory);
|
||||
std::vector<TestEntry> read_real_language_corpus(std::string name);
|
||||
std::vector<TestEntry> read_test_language_corpus(std::string name);
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -1,6 +1,8 @@
|
|||
#include "rule_helpers.h"
|
||||
#include <memory>
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
using std::make_shared;
|
||||
|
|
@ -52,4 +54,9 @@ namespace tree_sitter {
|
|||
return left.name == right.name && left.rule->operator==(*right.rule) &&
|
||||
left.type == right.type;
|
||||
}
|
||||
|
||||
bool operator==(const LexicalVariable &left, const LexicalVariable &right) {
|
||||
return left.name == right.name && left.rule->operator==(*right.rule) &&
|
||||
left.type == right.type && left.is_string == right.is_string;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,7 +15,11 @@ namespace tree_sitter {
|
|||
rule_ptr i_token(size_t index);
|
||||
rule_ptr active_prec(int precedence, rule_ptr);
|
||||
|
||||
struct Variable;
|
||||
struct LexicalVariable;
|
||||
|
||||
bool operator==(const Variable &left, const Variable &right);
|
||||
bool operator==(const LexicalVariable &left, const LexicalVariable &right);
|
||||
}
|
||||
|
||||
#endif // HELPERS_RULE_HELPERS_H_
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
|
|
@ -41,6 +42,11 @@ ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
|
|||
return stream << string("{") << variable.name << string(", ") << variable.productions << string(", ") << to_string(variable.type) << string("}");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexicalVariable &variable) {
|
||||
return stream << "{" << variable.name << ", " << variable.rule << ", " <<
|
||||
to_string(variable.type) << ", " << to_string(variable.is_string) << "}";
|
||||
}
|
||||
|
||||
std::ostream &operator<<(std::ostream &stream, const AdvanceAction &action) {
|
||||
return stream << string("#<advance ") + to_string(action.state_index) + ">";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -93,10 +93,11 @@ using std::string;
|
|||
using std::to_string;
|
||||
struct Variable;
|
||||
struct SyntaxVariable;
|
||||
struct LexicalVariable;
|
||||
struct AdvanceAction;
|
||||
struct AcceptTokenAction;
|
||||
class ParseAction;
|
||||
class ParseState;
|
||||
struct ParseAction;
|
||||
struct ParseState;
|
||||
struct ExternalToken;
|
||||
struct ProductionStep;
|
||||
struct PrecedenceRange;
|
||||
|
|
@ -107,6 +108,7 @@ ostream &operator<<(ostream &, const Rule &);
|
|||
ostream &operator<<(ostream &, const rule_ptr &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
ostream &operator<<(ostream &, const SyntaxVariable &);
|
||||
ostream &operator<<(ostream &, const LexicalVariable &);
|
||||
ostream &operator<<(ostream &, const AdvanceAction &);
|
||||
ostream &operator<<(ostream &, const AcceptTokenAction &);
|
||||
ostream &operator<<(ostream &, const ParseAction &);
|
||||
|
|
@ -119,8 +121,8 @@ namespace build_tables {
|
|||
|
||||
class LexItem;
|
||||
class LexItemSet;
|
||||
class ParseItem;
|
||||
class ParseItemSet;
|
||||
struct ParseItem;
|
||||
struct ParseItemSet;
|
||||
class LookaheadSet;
|
||||
|
||||
ostream &operator<<(ostream &, const LexItem &);
|
||||
|
|
|
|||
|
|
@ -1,847 +0,0 @@
|
|||
#include "spec_helper.h"
|
||||
#include "runtime/alloc.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/dedent.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include <map>
|
||||
|
||||
static string fill_template(string input, map<string, string> parameters) {
|
||||
string result = input;
|
||||
for (const auto &pair : parameters) {
|
||||
util::str_replace(&result, "{{" + pair.first + "}}", pair.second);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("compile_grammar", []() {
|
||||
TSDocument *document;
|
||||
|
||||
before_each([&]() {
|
||||
document = ts_document_new();
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_document_free(document);
|
||||
});
|
||||
|
||||
auto assert_root_node = [&](const string &expected_string) {
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
char *node_string = ts_node_string(root_node, document);
|
||||
AssertThat(node_string, Equals(expected_string));
|
||||
ts_free(node_string);
|
||||
};
|
||||
|
||||
describe("conflicts", [&]() {
|
||||
it("can resolve shift/reduce conflicts using associativities", [&]() {
|
||||
string grammar_template = R"JSON({
|
||||
"name": "associativity_example",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "math_operation"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"math_operation": {
|
||||
"type": "{{math_operation_prec_type}}",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
// Ambiguity, which '+' applies first?
|
||||
ts_document_set_input_string(document, "x+y+z");
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"math_operation_prec_type", "PREC"}
|
||||
}).c_str());
|
||||
|
||||
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
||||
Unresolved conflict for symbol sequence:
|
||||
|
||||
expression '+' expression • '+' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (math_operation expression '+' expression) • '+' …
|
||||
2: expression '+' (math_operation expression • '+' expression)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a left or right associativity in `math_operation`
|
||||
2: Add a conflict for these rules: `math_operation`
|
||||
)MESSAGE")));
|
||||
|
||||
result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"math_operation_prec_type", "PREC_LEFT"}
|
||||
}).c_str());
|
||||
|
||||
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (math_operation "
|
||||
"(expression (math_operation (expression (identifier)) (expression (identifier)))) "
|
||||
"(expression (identifier))))");
|
||||
|
||||
result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"math_operation_prec_type", "PREC_RIGHT"}
|
||||
}).c_str());
|
||||
|
||||
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (math_operation "
|
||||
"(expression (identifier)) "
|
||||
"(expression (math_operation (expression (identifier)) (expression (identifier))))))");
|
||||
});
|
||||
|
||||
it("can resolve shift/reduce conflicts involving single-child rules using precedence", [&]() {
|
||||
string grammar_template = R"JSON({
|
||||
"name": "associativity_example",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "PREC_RIGHT",
|
||||
"value": {{function_call_precedence}},
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "{"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "}"}
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
// Ambiguity: is the trailing block associated with `bar` or `foo`?
|
||||
ts_document_set_input_string(document, "foo bar { baz }");
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"function_call_precedence", "0"}
|
||||
}).c_str());
|
||||
|
||||
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
||||
Unresolved conflict for symbol sequence:
|
||||
|
||||
identifier • '{' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (expression identifier) • '{' …
|
||||
2: (function_call identifier • block)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a higher precedence in `function_call` than in the other rules.
|
||||
2: Specify a higher precedence in `expression` than in the other rules.
|
||||
3: Specify a left or right associativity in `expression`
|
||||
4: Add a conflict for these rules: `expression` `function_call`
|
||||
)MESSAGE")));
|
||||
|
||||
// Giving function calls lower precedence than expressions causes `bar`
|
||||
// to be treated as an expression passed to `foo`, not as a function
|
||||
// that's being called with a block.
|
||||
result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"function_call_precedence", "-1"}
|
||||
}).c_str());
|
||||
|
||||
AssertThat(result.error_message, IsNull());
|
||||
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (function_call "
|
||||
"(identifier) "
|
||||
"(expression (identifier)) "
|
||||
"(block (expression (identifier)))))");
|
||||
|
||||
// Giving function calls higher precedence than expressions causes `bar`
|
||||
// to be treated as a function that's being called with a block, not as
|
||||
// an expression passed to `foo`.
|
||||
result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
{"function_call_precedence", "1"}
|
||||
}).c_str());
|
||||
|
||||
AssertThat(result.error_message, IsNull());
|
||||
ts_document_set_language(document, load_compile_result("associativity_example", result));
|
||||
ts_document_set_input_string(document, "foo bar { baz }");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (function_call "
|
||||
"(identifier) "
|
||||
"(expression (function_call "
|
||||
"(identifier) "
|
||||
"(block (expression (identifier)))))))");
|
||||
});
|
||||
|
||||
it("handles precedence applied to specific rule subsequences (regression)", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON({
|
||||
"name": "precedence_on_subsequence",
|
||||
|
||||
"extras": [
|
||||
{"type": "STRING", "value": " "}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "function_call"},
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "scope_resolution"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"function_call": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{"type": "SYMBOL", "name": "do_block"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "block"}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "identifier"},
|
||||
{
|
||||
"type": "PREC",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "do_block"}
|
||||
]
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"scope_resolution": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "::"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "::"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
]
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"block": {
|
||||
"type": "STRING",
|
||||
"value": "{}"
|
||||
},
|
||||
|
||||
"do_block": {
|
||||
"type": "STRING",
|
||||
"value": "do end"
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
})JSON");
|
||||
|
||||
auto language = load_compile_result("precedence_on_subsequence", result);
|
||||
ts_document_set_language(document, language);
|
||||
|
||||
ts_document_set_input_string(document, "a b {}");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (function_call "
|
||||
"(identifier) "
|
||||
"(expression (function_call (identifier) (block)))))");
|
||||
|
||||
ts_document_set_input_string(document, "a b do end");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (function_call "
|
||||
"(identifier) "
|
||||
"(expression (identifier)) "
|
||||
"(do_block)))");
|
||||
});
|
||||
|
||||
it("does not allow conflicting precedences", [&]() {
|
||||
string grammar_template = R"JSON({
|
||||
"name": "conflicting_precedence_example",
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "product"},
|
||||
{"type": "SYMBOL", "name": "other_thing"}
|
||||
]
|
||||
},
|
||||
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"product": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"other_thing": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": -1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "STRING", "value": "*"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(fill_template(grammar_template, {
|
||||
}).c_str());
|
||||
|
||||
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
||||
Unresolved conflict for symbol sequence:
|
||||
|
||||
expression '+' expression • '*' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (sum expression '+' expression) • '*' …
|
||||
2: expression '+' (product expression • '*' expression)
|
||||
3: expression '+' (other_thing expression • '*' '*')
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
1: Specify a higher precedence in `product` and `other_thing` than in the other rules.
|
||||
2: Specify a higher precedence in `sum` than in the other rules.
|
||||
3: Add a conflict for these rules: `sum` `product` `other_thing`
|
||||
)MESSAGE")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar contains rules that match the empty string", [&]() {
|
||||
it("reports an error", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "empty_rules",
|
||||
|
||||
"rules": {
|
||||
"rule_1": {"type": "SYMBOL", "name": "rule_2"},
|
||||
|
||||
"rule_2": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "rule_1"},
|
||||
{"type": "BLANK"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
AssertThat(result.error_message, Equals(dedent(R"MESSAGE(
|
||||
The rule `rule_2` matches the empty string.
|
||||
Tree-sitter currently does not support syntactic rules that match the empty string.
|
||||
)MESSAGE")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("external scanners", [&]() {
|
||||
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "external_scanner_example",
|
||||
|
||||
"externals": [
|
||||
"_percent_string",
|
||||
"_percent_string_start",
|
||||
"_percent_string_end"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"string": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string_start"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "_percent_string_end"}
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\a+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"external_scanner_example",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/percent_strings.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
|
||||
|
||||
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
|
||||
});
|
||||
|
||||
it("allows external scanners to refer to tokens that are defined internally", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "shared_external_tokens",
|
||||
|
||||
"externals": [
|
||||
"string",
|
||||
"line_break"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "SYMBOL", "name": "number"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"},
|
||||
"number": {"type": "PATTERN", "value": "\\d+"},
|
||||
"line_break": {"type": "STRING", "value": "\n"}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"shared_external_tokens",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/shared_external_tokens.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "a b\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (variable) (variable) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "a \nb\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (variable) (variable) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "'hello' 'world'\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (string) (string) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "'hello' \n'world'\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (string) (string) (line_break))");
|
||||
});
|
||||
|
||||
it("allows external tokens to be used as extras", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "extra_external_tokens",
|
||||
|
||||
"externals": [
|
||||
"comment"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"assignment": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "STRING", "value": "="},
|
||||
{"type": "SYMBOL", "name": "variable"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"extra_external_tokens",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/extra_external_tokens.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "x = # a comment\n y");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(assignment (variable) (comment) (variable))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is a token", [&]() {
|
||||
it("parses the token", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "one_token_language",
|
||||
"rules": {
|
||||
"first_rule": {"type": "STRING", "value": "the-value"}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
ts_document_set_language(document, load_compile_result("one_token_language", result));
|
||||
|
||||
ts_document_set_input_string(document, "the-value");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(first_rule)");
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is blank", [&]() {
|
||||
it("parses the empty string", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "blank_language",
|
||||
"rules": {
|
||||
"first_rule": {"type": "BLANK"}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
ts_document_set_language(document, load_compile_result("blank_language", result));
|
||||
|
||||
ts_document_set_input_string(document, "");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(first_rule)");
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar contains anonymous tokens with escaped characters", [&]() {
|
||||
it("escapes the escaped characters properly in the generated parser", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "escaped_char_language",
|
||||
"rules": {
|
||||
"first_rule": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "\n"},
|
||||
{"type": "STRING", "value": "\r"},
|
||||
{"type": "STRING", "value": "'hello'"},
|
||||
{"type": "PATTERN", "value": "\\d+"}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
ts_document_set_language(document, load_compile_result("escaped_char_language", result));
|
||||
|
||||
ts_document_set_input_string(document, "1234");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(first_rule)");
|
||||
|
||||
ts_document_set_input_string(document, "\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(first_rule)");
|
||||
|
||||
ts_document_set_input_string(document, "'hello'");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(first_rule)");
|
||||
});
|
||||
});
|
||||
|
||||
describe("the grammar in the README", [&]() {
|
||||
it("parses the input in the README", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
{
|
||||
"name": "arithmetic",
|
||||
|
||||
// Things that can appear anywhere in the language, like comments
|
||||
// and whitespace, are expressed as 'extras'.
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
|
||||
// The first rule listed in the grammar becomes the 'start rule'.
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "product"},
|
||||
{"type": "SYMBOL", "name": "number"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "("},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": ")"}
|
||||
]
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
// Tokens like '+' and '*' are described directly within the
|
||||
// grammar's rules, as opposed to in a seperate lexer description.
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Ambiguities can be resolved at compile time by assigning precedence
|
||||
// values to rule subtrees.
|
||||
"product": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 2,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
// Tokens can be specified using ECMAScript regexps.
|
||||
"number": {"type": "PATTERN", "value": "\\d+"},
|
||||
"comment": {"type": "PATTERN", "value": "#.*"},
|
||||
"variable": {"type": "PATTERN", "value": "[a-zA-Z]\\w*"}
|
||||
}
|
||||
}
|
||||
)JSON");
|
||||
|
||||
const TSLanguage *language = load_compile_result("arithmetic", result);
|
||||
|
||||
ts_document_set_language(document, language);
|
||||
ts_document_set_input_string(document, "a + b * c");
|
||||
ts_document_parse(document);
|
||||
|
||||
assert_root_node(
|
||||
"(expression (sum "
|
||||
"(expression (variable)) "
|
||||
"(expression (product "
|
||||
"(expression (variable)) "
|
||||
"(expression (variable))))))");
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,185 +0,0 @@
|
|||
#include "spec_helper.h"
|
||||
#include "runtime/alloc.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/read_test_entries.h"
|
||||
#include "helpers/spy_input.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "helpers/encoding_helpers.h"
|
||||
#include "helpers/record_alloc.h"
|
||||
#include "helpers/random_helpers.h"
|
||||
#include "helpers/scope_sequence.h"
|
||||
#include <set>
|
||||
|
||||
static void assert_correct_tree_shape(const TSDocument *document, string tree_string) {
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
const char *node_string = ts_node_string(root_node, document);
|
||||
string result(node_string);
|
||||
ts_free((void *)node_string);
|
||||
AssertThat(result, Equals(tree_string));
|
||||
}
|
||||
|
||||
static void assert_consistent_sizes(TSNode node) {
|
||||
size_t child_count = ts_node_child_count(node);
|
||||
size_t start_byte = ts_node_start_byte(node);
|
||||
size_t end_byte = ts_node_end_byte(node);
|
||||
TSPoint start_point = ts_node_start_point(node);
|
||||
TSPoint end_point = ts_node_end_point(node);
|
||||
bool some_child_has_changes = false;
|
||||
|
||||
AssertThat(start_byte, !IsGreaterThan(end_byte));
|
||||
AssertThat(start_point, !IsGreaterThan(end_point));
|
||||
|
||||
size_t last_child_end_byte = start_byte;
|
||||
TSPoint last_child_end_point = start_point;
|
||||
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
size_t child_start_byte = ts_node_start_byte(child);
|
||||
TSPoint child_start_point = ts_node_start_point(child);
|
||||
|
||||
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
|
||||
assert_consistent_sizes(child);
|
||||
if (ts_node_has_changes(child))
|
||||
some_child_has_changes = true;
|
||||
|
||||
last_child_end_byte = ts_node_end_byte(child);
|
||||
last_child_end_point = ts_node_end_point(child);
|
||||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(end_point, !IsLessThan(last_child_end_point));
|
||||
}
|
||||
|
||||
if (some_child_has_changes) {
|
||||
AssertThat(ts_node_has_changes(node), IsTrue());
|
||||
}
|
||||
}
|
||||
|
||||
static void assert_correct_tree_size(TSDocument *document, string content) {
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
size_t expected_size = content.size();
|
||||
|
||||
// In the JSON grammar, the start rule (`_value`) is hidden, so the node
|
||||
// returned from `ts_document_root_node` (e.g. an `object` node), does not
|
||||
// actually point to the root of the tree. In this weird case, trailing
|
||||
// whitespace is not included in the root node's size.
|
||||
//
|
||||
// TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
|
||||
if (ts_document_language(document) == get_test_language("json") &&
|
||||
string(ts_node_type(root_node, document)) != "ERROR")
|
||||
expected_size = content.find_last_not_of("\n ") + 1;
|
||||
|
||||
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
|
||||
assert_consistent_sizes(root_node);
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("The Corpus", []() {
|
||||
vector<string> test_languages({
|
||||
"javascript",
|
||||
"json",
|
||||
"c",
|
||||
"cpp",
|
||||
"python",
|
||||
});
|
||||
|
||||
for (auto &language_name : test_languages) {
|
||||
describe(("the " + language_name + " language").c_str(), [&]() {
|
||||
TSDocument *document;
|
||||
|
||||
before_each([&]() {
|
||||
record_alloc::start();
|
||||
document = ts_document_new();
|
||||
ts_document_set_language(document, get_test_language(language_name));
|
||||
|
||||
// ts_document_set_logger(document, stderr_logger_new(true));
|
||||
// ts_document_print_debugging_graphs(document, true);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_document_free(document);
|
||||
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
||||
});
|
||||
|
||||
for (auto &entry : read_corpus_entries(language_name)) {
|
||||
SpyInput *input;
|
||||
|
||||
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
|
||||
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
|
||||
input = new SpyInput(entry.input, 3);
|
||||
ts_document_set_input(document, input->input());
|
||||
edit_sequence();
|
||||
assert_correct_tree_shape(document, entry.tree_string);
|
||||
assert_correct_tree_size(document, input->content);
|
||||
delete input;
|
||||
});
|
||||
};
|
||||
|
||||
it_handles_edit_sequence("initial parse", [&]() {
|
||||
ts_document_parse(document);
|
||||
});
|
||||
|
||||
std::set<std::pair<size_t, size_t>> deletions;
|
||||
std::set<std::pair<size_t, string>> insertions;
|
||||
|
||||
for (size_t i = 0; i < 60; i++) {
|
||||
size_t edit_position = random() % utf8_char_count(entry.input);
|
||||
size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
|
||||
string inserted_text = random_words(random() % 4 + 1);
|
||||
|
||||
if (insertions.insert({edit_position, inserted_text}).second) {
|
||||
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
|
||||
|
||||
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
|
||||
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
|
||||
ts_document_parse(document);
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
ts_document_edit(document, input->undo());
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
TSRange *ranges;
|
||||
uint32_t range_count;
|
||||
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
|
||||
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
|
||||
|
||||
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
|
||||
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
|
||||
input->content, ranges, range_count);
|
||||
ts_free(ranges);
|
||||
});
|
||||
}
|
||||
|
||||
if (deletions.insert({edit_position, deletion_size}).second) {
|
||||
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
|
||||
|
||||
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
|
||||
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
|
||||
ts_document_parse(document);
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
ts_document_edit(document, input->undo());
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
TSRange *ranges;
|
||||
uint32_t range_count;
|
||||
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
|
||||
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
|
||||
|
||||
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
|
||||
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
|
||||
input->content, ranges, range_count);
|
||||
ts_free(ranges);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
END_TEST
|
||||
181
spec/integration/real_grammars.cc
Normal file
181
spec/integration/real_grammars.cc
Normal file
|
|
@ -0,0 +1,181 @@
|
|||
#include "spec_helper.h"
|
||||
#include "runtime/alloc.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/read_test_entries.h"
|
||||
#include "helpers/spy_input.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "helpers/encoding_helpers.h"
|
||||
#include "helpers/record_alloc.h"
|
||||
#include "helpers/random_helpers.h"
|
||||
#include "helpers/scope_sequence.h"
|
||||
#include <set>
|
||||
|
||||
static void assert_consistent_sizes(TSNode node) {
|
||||
size_t child_count = ts_node_child_count(node);
|
||||
size_t start_byte = ts_node_start_byte(node);
|
||||
size_t end_byte = ts_node_end_byte(node);
|
||||
TSPoint start_point = ts_node_start_point(node);
|
||||
TSPoint end_point = ts_node_end_point(node);
|
||||
bool some_child_has_changes = false;
|
||||
|
||||
AssertThat(start_byte, !IsGreaterThan(end_byte));
|
||||
AssertThat(start_point, !IsGreaterThan(end_point));
|
||||
|
||||
size_t last_child_end_byte = start_byte;
|
||||
TSPoint last_child_end_point = start_point;
|
||||
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
size_t child_start_byte = ts_node_start_byte(child);
|
||||
TSPoint child_start_point = ts_node_start_point(child);
|
||||
|
||||
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
|
||||
assert_consistent_sizes(child);
|
||||
if (ts_node_has_changes(child))
|
||||
some_child_has_changes = true;
|
||||
|
||||
last_child_end_byte = ts_node_end_byte(child);
|
||||
last_child_end_point = ts_node_end_point(child);
|
||||
}
|
||||
|
||||
if (child_count > 0) {
|
||||
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
|
||||
AssertThat(end_point, !IsLessThan(last_child_end_point));
|
||||
}
|
||||
|
||||
if (some_child_has_changes) {
|
||||
AssertThat(ts_node_has_changes(node), IsTrue());
|
||||
}
|
||||
}
|
||||
|
||||
static void assert_correct_tree_size(TSDocument *document, string content) {
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
size_t expected_size = content.size();
|
||||
|
||||
// In the JSON grammar, the start rule (`_value`) is hidden, so the node
|
||||
// returned from `ts_document_root_node` (e.g. an `object` node), does not
|
||||
// actually point to the root of the tree. In this weird case, trailing
|
||||
// whitespace is not included in the root node's size.
|
||||
//
|
||||
// TODO: Fix this inconsistency. Maybe disallow the start rule being hidden?
|
||||
if (ts_document_language(document) == load_real_language("json") &&
|
||||
string(ts_node_type(root_node, document)) != "ERROR")
|
||||
expected_size = content.find_last_not_of("\n ") + 1;
|
||||
|
||||
AssertThat(ts_node_end_byte(root_node), Equals(expected_size));
|
||||
assert_consistent_sizes(root_node);
|
||||
}
|
||||
|
||||
START_TEST
|
||||
|
||||
vector<string> test_languages({
|
||||
"javascript",
|
||||
"json",
|
||||
"c",
|
||||
"cpp",
|
||||
"python",
|
||||
});
|
||||
|
||||
for (auto &language_name : test_languages) {
|
||||
describe(("the " + language_name + " language").c_str(), [&]() {
|
||||
TSDocument *document;
|
||||
|
||||
before_each([&]() {
|
||||
record_alloc::start();
|
||||
document = ts_document_new();
|
||||
ts_document_set_language(document, load_real_language(language_name));
|
||||
|
||||
// ts_document_set_logger(document, stderr_logger_new(true));
|
||||
// ts_document_print_debugging_graphs(document, true);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_document_free(document);
|
||||
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
||||
});
|
||||
|
||||
for (auto &entry : read_real_language_corpus(language_name)) {
|
||||
SpyInput *input;
|
||||
|
||||
auto it_handles_edit_sequence = [&](string name, std::function<void()> edit_sequence){
|
||||
it(("parses " + entry.description + ": " + name).c_str(), [&]() {
|
||||
input = new SpyInput(entry.input, 3);
|
||||
ts_document_set_input(document, input->input());
|
||||
edit_sequence();
|
||||
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
const char *node_string = ts_node_string(root_node, document);
|
||||
string result(node_string);
|
||||
ts_free((void *)node_string);
|
||||
AssertThat(result, Equals(entry.tree_string));
|
||||
|
||||
assert_correct_tree_size(document, input->content);
|
||||
delete input;
|
||||
});
|
||||
};
|
||||
|
||||
it_handles_edit_sequence("initial parse", [&]() {
|
||||
ts_document_parse(document);
|
||||
});
|
||||
|
||||
std::set<std::pair<size_t, size_t>> deletions;
|
||||
std::set<std::pair<size_t, string>> insertions;
|
||||
|
||||
for (size_t i = 0; i < 60; i++) {
|
||||
size_t edit_position = random() % utf8_char_count(entry.input);
|
||||
size_t deletion_size = random() % (utf8_char_count(entry.input) - edit_position);
|
||||
string inserted_text = random_words(random() % 4 + 1);
|
||||
|
||||
if (insertions.insert({edit_position, inserted_text}).second) {
|
||||
string description = "\"" + inserted_text + "\" at " + to_string(edit_position);
|
||||
|
||||
it_handles_edit_sequence("repairing an insertion of " + description, [&]() {
|
||||
ts_document_edit(document, input->replace(edit_position, 0, inserted_text));
|
||||
ts_document_parse(document);
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
ts_document_edit(document, input->undo());
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
TSRange *ranges;
|
||||
uint32_t range_count;
|
||||
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
|
||||
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
|
||||
|
||||
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
|
||||
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
|
||||
input->content, ranges, range_count);
|
||||
ts_free(ranges);
|
||||
});
|
||||
}
|
||||
|
||||
if (deletions.insert({edit_position, deletion_size}).second) {
|
||||
string desription = to_string(edit_position) + "-" + to_string(edit_position + deletion_size);
|
||||
|
||||
it_handles_edit_sequence("repairing a deletion of " + desription, [&]() {
|
||||
ts_document_edit(document, input->replace(edit_position, deletion_size, ""));
|
||||
ts_document_parse(document);
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
ts_document_edit(document, input->undo());
|
||||
assert_correct_tree_size(document, input->content);
|
||||
|
||||
TSRange *ranges;
|
||||
uint32_t range_count;
|
||||
ScopeSequence old_scope_sequence = build_scope_sequence(document, input->content);
|
||||
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
|
||||
|
||||
ScopeSequence new_scope_sequence = build_scope_sequence(document, input->content);
|
||||
verify_changed_ranges(old_scope_sequence, new_scope_sequence,
|
||||
input->content, ranges, range_count);
|
||||
ts_free(ranges);
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
END_TEST
|
||||
78
spec/integration/test_grammars.cc
Normal file
78
spec/integration/test_grammars.cc
Normal file
|
|
@ -0,0 +1,78 @@
|
|||
#include "spec_helper.h"
|
||||
#include "helpers/read_test_entries.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/file_helpers.h"
|
||||
#include "runtime/alloc.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
string grammars_dir_path = "spec/fixtures/test_grammars";
|
||||
vector<string> test_languages = list_directory(grammars_dir_path);
|
||||
|
||||
for (auto &language_name : test_languages) {
|
||||
if (language_name == "readme.md") continue;
|
||||
|
||||
describe(("test language: " + language_name).c_str(), [&]() {
|
||||
string directory_path = grammars_dir_path + "/" + language_name;
|
||||
string grammar_path = directory_path + "/grammar.json";
|
||||
string external_scanner_path = directory_path + "/scanner.c";
|
||||
string expected_error_path = directory_path + "/expected_error.txt";
|
||||
string corpus_path = directory_path + "/corpus.txt";
|
||||
|
||||
if (!file_exists(external_scanner_path)) {
|
||||
external_scanner_path = "";
|
||||
}
|
||||
|
||||
string grammar_json = read_file(grammar_path);
|
||||
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
|
||||
|
||||
if (file_exists(expected_error_path)) {
|
||||
it("fails with the correct error message", [&]() {
|
||||
string expected_error = read_file(expected_error_path);
|
||||
AssertThat((void *)compile_result.error_message, !IsNull());
|
||||
AssertThat(compile_result.error_message, Equals(expected_error));
|
||||
});
|
||||
|
||||
return;
|
||||
} else {
|
||||
TSDocument *document = nullptr;
|
||||
const TSLanguage *language = nullptr;
|
||||
|
||||
before_each([&]() {
|
||||
if (!language) {
|
||||
language = load_test_language(
|
||||
language_name,
|
||||
compile_result,
|
||||
external_scanner_path
|
||||
);
|
||||
}
|
||||
|
||||
document = ts_document_new();
|
||||
ts_document_set_language(document, language);
|
||||
|
||||
// ts_document_set_logger(document, stderr_logger_new(true));
|
||||
// ts_document_print_debugging_graphs(document, true);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
if (document) ts_document_free(document);
|
||||
});
|
||||
|
||||
for (auto &entry : read_test_language_corpus(language_name)) {
|
||||
it(("parses " + entry.description).c_str(), [&]() {
|
||||
ts_document_set_input_string_with_length(document, entry.input.c_str(), entry.input.size());
|
||||
ts_document_parse(document);
|
||||
|
||||
TSNode root_node = ts_document_root_node(document);
|
||||
const char *node_string = ts_node_string(root_node, document);
|
||||
string result(node_string);
|
||||
ts_free((void *)node_string);
|
||||
AssertThat(result, Equals(entry.tree_string));
|
||||
});
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
END_TEST
|
||||
|
|
@ -43,7 +43,7 @@ describe("Document", [&]() {
|
|||
before_each([&]() {
|
||||
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
|
||||
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
|
||||
ts_document_parse(document);
|
||||
|
||||
|
|
@ -152,7 +152,7 @@ describe("Document", [&]() {
|
|||
});
|
||||
|
||||
it("uses the given language for future parses", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(document);
|
||||
|
|
@ -162,10 +162,10 @@ describe("Document", [&]() {
|
|||
});
|
||||
|
||||
it("clears out any previous tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
ts_document_parse(document);
|
||||
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
|
||||
|
||||
ts_document_parse(document);
|
||||
|
|
@ -177,7 +177,7 @@ describe("Document", [&]() {
|
|||
});
|
||||
|
||||
it("does not allow setting a language with a different version number", [&]() {
|
||||
TSLanguage language = *get_test_language("json");
|
||||
TSLanguage language = *load_real_language("json");
|
||||
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
|
||||
|
||||
language.version++;
|
||||
|
|
@ -193,7 +193,7 @@ describe("Document", [&]() {
|
|||
|
||||
before_each([&]() {
|
||||
logger = new SpyLogger();
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
ts_document_set_input_string(document, "[1, 2]");
|
||||
});
|
||||
|
||||
|
|
@ -235,7 +235,7 @@ describe("Document", [&]() {
|
|||
SpyInput *input;
|
||||
|
||||
before_each([&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
input = new SpyInput("{a: null};", 3);
|
||||
ts_document_set_input(document, input->input());
|
||||
ts_document_parse(document);
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ describe("Node", []() {
|
|||
record_alloc::start();
|
||||
|
||||
document = ts_document_new();
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
ts_document_set_input_string(document, input_string.c_str());
|
||||
ts_document_parse(document);
|
||||
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ describe("Parser", [&]() {
|
|||
describe("handling errors", [&]() {
|
||||
describe("when there is an invalid substring right before a valid token", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
set_text(" [123, @@@@@, true]");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -108,7 +108,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is an unexpected string in the middle of a token", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
set_text(" [123, faaaaalse, true]");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -134,7 +134,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is one unexpected token between two valid tokens", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
set_text(" [123, true false, true]");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -153,7 +153,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is an unexpected string at the end of a token", [&]() {
|
||||
it("computes the error's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_language(document, load_real_language("json"));
|
||||
set_text(" [123, \"hi\n, true]");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -163,7 +163,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is an unterminated error", [&]() {
|
||||
it("maintains a consistent tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("a; /* b");
|
||||
assert_root_node(
|
||||
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
|
||||
|
|
@ -172,7 +172,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there are extra tokens at the end of the viable prefix", [&]() {
|
||||
it("does not include them in the error node", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text(
|
||||
"var x;\n"
|
||||
"\n"
|
||||
|
|
@ -192,7 +192,7 @@ describe("Parser", [&]() {
|
|||
describe("handling extra tokens", [&]() {
|
||||
describe("when the token appears as part of a grammar rule", [&]() {
|
||||
it("incorporates it into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("fn()\n");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -202,7 +202,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when the token appears somewhere else", [&]() {
|
||||
it("incorporates it into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text(
|
||||
"fn()\n"
|
||||
" .otherFn();");
|
||||
|
|
@ -218,7 +218,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when several extra tokens appear in a row", [&]() {
|
||||
it("incorporates them into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text(
|
||||
"fn()\n\n"
|
||||
"// This is a comment"
|
||||
|
|
@ -239,7 +239,7 @@ describe("Parser", [&]() {
|
|||
describe("editing", [&]() {
|
||||
describe("creating new tokens near the end of the input", [&]() {
|
||||
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("x * (100 + abc);");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -262,7 +262,7 @@ describe("Parser", [&]() {
|
|||
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
|
||||
chunk_size = 2;
|
||||
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("123 + 456 * (10 + x);");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -285,7 +285,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("introducing an error", [&]() {
|
||||
it("gives the error the right size", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("var x = y;");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -308,7 +308,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("into the middle of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("abc * 123;");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -327,7 +327,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("at the end of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("abc * 123;");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -346,7 +346,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("inserting text into a node containing a extra token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("123 *\n"
|
||||
"// a-comment\n"
|
||||
"abc;");
|
||||
|
|
@ -373,7 +373,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when a critical token is removed", [&]() {
|
||||
it("updates the parse tree, creating an error", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("123 * 456; 789 * 123;");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -392,7 +392,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("with external tokens", [&]() {
|
||||
it("maintains the external scanner's state during incremental parsing", [&]() {
|
||||
ts_document_set_language(document, get_test_language("python"));
|
||||
ts_document_set_language(document, load_real_language("python"));
|
||||
string text = dedent(R"PYTHON(
|
||||
if a:
|
||||
print b
|
||||
|
|
@ -420,7 +420,7 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
it("does not try to re-use nodes that are within the edited region", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("{ x: (b.c) };");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -435,7 +435,7 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
it("updates the document's parse count", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
|
||||
|
||||
set_text("{ x: (b.c) };");
|
||||
|
|
@ -449,7 +449,7 @@ describe("Parser", [&]() {
|
|||
describe("lexing", [&]() {
|
||||
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
|
||||
it("terminates them at the end of the document", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("x; // this is a comment");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -464,7 +464,7 @@ describe("Parser", [&]() {
|
|||
|
||||
it("recognizes UTF8 characters as single characters", [&]() {
|
||||
// 'ΩΩΩ — ΔΔ';
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
ts_document_set_language(document, load_real_language("javascript"));
|
||||
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
|
||||
|
||||
assert_root_node(
|
||||
|
|
|
|||
|
|
@ -1,195 +0,0 @@
|
|||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::make_shared;
|
||||
using std::unordered_map;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::CharacterSet;
|
||||
using rules::Repeat;
|
||||
using rules::Symbol;
|
||||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
|
||||
class LexTableBuilder {
|
||||
LexTable lex_table;
|
||||
ParseTable *parse_table;
|
||||
const LexicalGrammar lex_grammar;
|
||||
vector<rule_ptr> separator_rules;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
public:
|
||||
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
||||
: parse_table(parse_table), lex_grammar(lex_grammar) {
|
||||
for (const rule_ptr &rule : lex_grammar.separators)
|
||||
separator_rules.push_back(Repeat::build(rule));
|
||||
separator_rules.push_back(Blank::build());
|
||||
}
|
||||
|
||||
LexTable build() {
|
||||
for (ParseState &parse_state : parse_table->states)
|
||||
add_lex_state_for_parse_state(&parse_state);
|
||||
|
||||
mark_fragile_tokens();
|
||||
remove_duplicate_lex_states();
|
||||
|
||||
return lex_table;
|
||||
}
|
||||
|
||||
private:
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
const auto &pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.add_state();
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
const LexItemSet::Transition &transition = pair.second;
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
if (conflict_manager.resolve(transition.destination, action,
|
||||
current_action)) {
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.state(state_id).advance_actions[characters] = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
LexItem::CompletionStatus completion_status = item.completion_status();
|
||||
if (completion_status.is_done) {
|
||||
AcceptTokenAction action(item.lhs, completion_status.precedence.max,
|
||||
completion_status.is_string);
|
||||
|
||||
auto current_action = lex_table.state(state_id).accept_action;
|
||||
if (conflict_manager.resolve(action, current_action))
|
||||
lex_table.state(state_id).accept_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mark_fragile_tokens() {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_lex_states() {
|
||||
for (LexState &state : lex_table.states) {
|
||||
state.accept_action.is_string = false;
|
||||
state.accept_action.precedence = 0;
|
||||
}
|
||||
|
||||
auto replacements =
|
||||
remove_duplicate_states<LexTable>(&lex_table);
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
if (replacement != replacements.end())
|
||||
parse_state.lex_state_id = replacement->second;
|
||||
}
|
||||
}
|
||||
|
||||
LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
|
||||
LexItemSet result;
|
||||
for (const auto &pair : terminals) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_token()) {
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
|
||||
for (const rule_ptr &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
||||
|
||||
auto choice = rule->as<Choice>();
|
||||
if (choice)
|
||||
return choice->elements;
|
||||
else
|
||||
return { rule };
|
||||
}
|
||||
};
|
||||
|
||||
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
|
||||
return LexTableBuilder(table, grammar).build();
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,18 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
#define COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
class ParseTable;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
LexTable build_lex_table(ParseTable *, const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_BUILD_LEX_TABLE_H_
|
||||
|
|
@ -6,14 +6,13 @@
|
|||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/remove_duplicate_states.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -41,6 +40,7 @@ class ParseTableBuilder {
|
|||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
vector<set<Symbol::Index>> incompatible_token_indices_by_index;
|
||||
bool allow_any_conflict;
|
||||
|
||||
public:
|
||||
|
|
@ -56,9 +56,9 @@ class ParseTableBuilder {
|
|||
Symbol(0, Symbol::Terminal) :
|
||||
Symbol(0, Symbol::NonTerminal);
|
||||
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
});
|
||||
Production start_production{
|
||||
ProductionStep{start_symbol, 0, rules::AssociativityNone},
|
||||
};
|
||||
|
||||
// Placeholder for error state
|
||||
add_parse_state(ParseItemSet());
|
||||
|
|
@ -71,10 +71,11 @@ class ParseTableBuilder {
|
|||
}));
|
||||
|
||||
CompileError error = process_part_state_queue();
|
||||
if (error.type != TSCompileErrorTypeNone)
|
||||
if (error.type != TSCompileErrorTypeNone) {
|
||||
return { parse_table, error };
|
||||
}
|
||||
|
||||
parse_table.mergeable_symbols = recovery_tokens(lexical_grammar);
|
||||
compute_unmergable_token_pairs();
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
|
|
@ -110,8 +111,18 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol symbol : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
for (Symbol::Index i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
bool has_non_reciprocal_conflict = false;
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices_by_index[i]) {
|
||||
if (!incompatible_token_indices_by_index[incompatible_index].count(i)) {
|
||||
has_non_reciprocal_conflict = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!has_non_reciprocal_conflict) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
|
|
@ -148,7 +159,8 @@ class ParseTableBuilder {
|
|||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.add_state();
|
||||
ParseStateId state_id = parse_table.states.size();
|
||||
parse_table.states.push_back(ParseState());
|
||||
parse_state_ids[item_set] = state_id;
|
||||
parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature();
|
||||
item_sets_to_process.push_back({ std::move(item_set), state_id });
|
||||
|
|
@ -291,6 +303,34 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
void compute_unmergable_token_pairs() {
|
||||
incompatible_token_indices_by_index.resize(lexical_grammar.variables.size());
|
||||
|
||||
// First, assume that all tokens are mutually incompatible.
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
for (Symbol::Index j = 0; j < n; j++) {
|
||||
if (j != i) incompatible_indices.insert(j);
|
||||
}
|
||||
}
|
||||
|
||||
// For the remaining possibly-incompatible pairs of tokens, check if they
|
||||
// are actually incompatible by actually generating lexical states that
|
||||
// contain them both.
|
||||
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
|
||||
for (Symbol::Index i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
auto &incompatible_indices = incompatible_token_indices_by_index[i];
|
||||
auto iter = incompatible_indices.begin();
|
||||
while (iter != incompatible_indices.end()) {
|
||||
if (lex_table_builder->detect_conflict(i, *iter)) {
|
||||
++iter;
|
||||
} else {
|
||||
iter = incompatible_indices.erase(iter);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
map<size_t, set<ParseStateId>> state_indices_by_signature;
|
||||
|
||||
|
|
@ -302,7 +342,7 @@ class ParseTableBuilder {
|
|||
set<ParseStateId> deleted_states;
|
||||
|
||||
while (true) {
|
||||
std::map<ParseStateId, ParseStateId> state_replacements;
|
||||
map<ParseStateId, ParseStateId> state_replacements;
|
||||
|
||||
for (auto &pair : state_indices_by_signature) {
|
||||
auto &state_group = pair.second;
|
||||
|
|
@ -310,7 +350,7 @@ class ParseTableBuilder {
|
|||
for (ParseStateId i : state_group) {
|
||||
for (ParseStateId j : state_group) {
|
||||
if (j == i) break;
|
||||
if (!state_replacements.count(j) && parse_table.merge_state(j, i)) {
|
||||
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
|
||||
state_replacements.insert({ i, j });
|
||||
deleted_states.insert(i);
|
||||
break;
|
||||
|
|
@ -364,6 +404,72 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool merge_parse_state(size_t i, size_t j) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
ParseState &other = parse_table.states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
if (other.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
auto &incompatible_token_indices = incompatible_token_indices_by_index[lookahead.index];
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (Symbol::Index incompatible_index : incompatible_token_indices) {
|
||||
Symbol incompatible_symbol(incompatible_index, Symbol::Terminal);
|
||||
if (state.terminal_entries.count(incompatible_symbol)) return false;
|
||||
}
|
||||
}
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
|
|
@ -574,7 +680,7 @@ class ParseTableBuilder {
|
|||
|
||||
switch (symbol.type) {
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
const LexicalVariable &variable = lexical_grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed)
|
||||
return variable.name;
|
||||
else
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
#include "compiler/build_tables/build_tables.h"
|
||||
#include <tuple>
|
||||
#include "compiler/build_tables/build_lex_table.h"
|
||||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
#include "compiler/build_tables/build_parse_table.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
|
|
@ -15,11 +15,13 @@ using std::vector;
|
|||
using std::make_tuple;
|
||||
|
||||
tuple<ParseTable, LexTable, CompileError> build_tables(
|
||||
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
|
||||
auto parse_table_result = build_parse_table(grammar, lex_grammar);
|
||||
const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lexical_grammar
|
||||
) {
|
||||
auto parse_table_result = build_parse_table(grammar, lexical_grammar);
|
||||
ParseTable parse_table = parse_table_result.first;
|
||||
const CompileError error = parse_table_result.second;
|
||||
LexTable lex_table = build_lex_table(&parse_table, lex_grammar);
|
||||
LexTable lex_table = LexTableBuilder::create(lexical_grammar)->build(&parse_table);
|
||||
return make_tuple(parse_table, lex_table, error);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,11 +10,10 @@ namespace build_tables {
|
|||
bool LexConflictManager::resolve(const LexItemSet &item_set,
|
||||
const AdvanceAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (!old_action.is_present())
|
||||
return true;
|
||||
if (new_action.precedence_range.max >= old_action.precedence) {
|
||||
for (const LexItem &item : item_set.entries)
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
possible_extensions[old_action.symbol.index].insert(item.lhs.index);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
return false;
|
||||
|
|
@ -23,30 +22,26 @@ bool LexConflictManager::resolve(const LexItemSet &item_set,
|
|||
|
||||
bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (!old_action.is_present())
|
||||
return true;
|
||||
|
||||
int old_precedence = old_action.precedence;
|
||||
int new_precedence = new_action.precedence;
|
||||
|
||||
bool result;
|
||||
if (new_precedence > old_precedence)
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
result = true;
|
||||
else if (new_precedence < old_precedence)
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
result = false;
|
||||
else if (new_action.is_string && !old_action.is_string)
|
||||
} else if (new_action.is_string && !old_action.is_string) {
|
||||
result = true;
|
||||
else if (old_action.is_string && !new_action.is_string)
|
||||
} else if (old_action.is_string && !new_action.is_string) {
|
||||
result = false;
|
||||
else if (new_action.symbol.index < old_action.symbol.index)
|
||||
} else if (new_action.symbol.index < old_action.symbol.index) {
|
||||
result = true;
|
||||
else
|
||||
} else {
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result)
|
||||
if (result) {
|
||||
possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
|
||||
else
|
||||
} else {
|
||||
possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,19 +32,15 @@ LexItem::CompletionStatus LexItem::completion_status() const {
|
|||
CompletionStatus apply_to(const rules::Choice *rule) {
|
||||
for (const auto &element : rule->elements) {
|
||||
CompletionStatus status = apply(element);
|
||||
if (status.is_done)
|
||||
return status;
|
||||
if (status.is_done) return status;
|
||||
}
|
||||
return { false, PrecedenceRange(), false };
|
||||
return { false, PrecedenceRange() };
|
||||
}
|
||||
|
||||
CompletionStatus apply_to(const rules::Metadata *rule) {
|
||||
CompletionStatus result = apply(rule->rule);
|
||||
if (result.is_done) {
|
||||
if (result.precedence.empty && rule->params.has_precedence)
|
||||
result.precedence.add(rule->params.precedence);
|
||||
if (rule->params.is_string)
|
||||
result.is_string = true;
|
||||
if (result.is_done && result.precedence.empty && rule->params.has_precedence) {
|
||||
result.precedence.add(rule->params.precedence);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
|
@ -54,15 +50,16 @@ LexItem::CompletionStatus LexItem::completion_status() const {
|
|||
}
|
||||
|
||||
CompletionStatus apply_to(const rules::Blank *rule) {
|
||||
return { true, PrecedenceRange(), false };
|
||||
return { true, PrecedenceRange() };
|
||||
}
|
||||
|
||||
CompletionStatus apply_to(const rules::Seq *rule) {
|
||||
CompletionStatus left_status = apply(rule->left);
|
||||
if (left_status.is_done)
|
||||
if (left_status.is_done) {
|
||||
return apply(rule->right);
|
||||
else
|
||||
return { false, PrecedenceRange(), false };
|
||||
} else {
|
||||
return { false, PrecedenceRange() };
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -80,8 +77,9 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
|
|||
|
||||
LexItemSet::TransitionMap LexItemSet::transitions() const {
|
||||
TransitionMap result;
|
||||
for (const LexItem &item : entries)
|
||||
for (const LexItem &item : entries) {
|
||||
lex_item_transitions(&result, item);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -19,7 +19,6 @@ class LexItem {
|
|||
struct CompletionStatus {
|
||||
bool is_done;
|
||||
PrecedenceRange precedence;
|
||||
bool is_string;
|
||||
};
|
||||
|
||||
bool operator==(const LexItem &other) const;
|
||||
|
|
|
|||
324
src/compiler/build_tables/lex_table_builder.cc
Normal file
324
src/compiler/build_tables/lex_table_builder.cc
Normal file
|
|
@ -0,0 +1,324 @@
|
|||
#include "compiler/build_tables/lex_table_builder.h"
|
||||
#include <climits>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::unordered_map;
|
||||
using std::unique_ptr;
|
||||
using rules::Blank;
|
||||
using rules::Choice;
|
||||
using rules::CharacterSet;
|
||||
using rules::Repeat;
|
||||
using rules::Symbol;
|
||||
using rules::Metadata;
|
||||
using rules::Seq;
|
||||
|
||||
class StartingCharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
apply(rule->left);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements) apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
CharacterSet result;
|
||||
};
|
||||
|
||||
class LexTableBuilderImpl : public LexTableBuilder {
|
||||
LexTable lex_table;
|
||||
const LexicalGrammar grammar;
|
||||
vector<rule_ptr> separator_rules;
|
||||
CharacterSet first_separator_characters;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
public:
|
||||
vector<bool> shadowed_token_indices;
|
||||
|
||||
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
|
||||
StartingCharacterAggregator starting_character_aggregator;
|
||||
for (const rule_ptr &rule : grammar.separators) {
|
||||
separator_rules.push_back(Repeat::build(rule));
|
||||
starting_character_aggregator.apply(rule);
|
||||
}
|
||||
separator_rules.push_back(Blank::build());
|
||||
first_separator_characters = starting_character_aggregator.result;
|
||||
shadowed_token_indices.resize(grammar.variables.size());
|
||||
}
|
||||
|
||||
LexTable build(ParseTable *parse_table) {
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
parse_state.lex_state_id = add_lex_state(
|
||||
item_set_for_terminals(parse_state.terminal_entries)
|
||||
);
|
||||
}
|
||||
mark_fragile_tokens(parse_table);
|
||||
remove_duplicate_lex_states(parse_table);
|
||||
return lex_table;
|
||||
}
|
||||
|
||||
bool detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
clear();
|
||||
|
||||
map<Symbol, ParseTableEntry> terminals;
|
||||
terminals[Symbol(left, Symbol::Terminal)];
|
||||
terminals[Symbol(right, Symbol::Terminal)];
|
||||
|
||||
add_lex_state(item_set_for_terminals(terminals));
|
||||
|
||||
return shadowed_token_indices[right];
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
const auto &pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
LexStateId state_id = lex_table.states.size();
|
||||
lex_table.states.push_back(LexState());
|
||||
lex_state_ids[item_set] = state_id;
|
||||
add_accept_token_actions(item_set, state_id);
|
||||
add_advance_actions(item_set, state_id);
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
void clear() {
|
||||
lex_table.states.clear();
|
||||
lex_state_ids.clear();
|
||||
shadowed_token_indices.assign(grammar.variables.size(), false);
|
||||
}
|
||||
|
||||
private:
|
||||
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &pair : item_set.transitions()) {
|
||||
const CharacterSet &characters = pair.first;
|
||||
const LexItemSet::Transition &transition = pair.second;
|
||||
|
||||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (current_action.is_present()) {
|
||||
bool prefer_advancing = conflict_manager.resolve(transition.destination, action, current_action);
|
||||
bool matches_accepted_token = false;
|
||||
for (const LexItem &item : transition.destination.entries) {
|
||||
if (item.lhs == current_action.symbol) {
|
||||
matches_accepted_token = true;
|
||||
} else if (!transition.in_main_token && !item.lhs.is_built_in() && !prefer_advancing) {
|
||||
shadowed_token_indices[item.lhs.index] = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!matches_accepted_token && characters.intersects(first_separator_characters)) {
|
||||
shadowed_token_indices[current_action.symbol.index] = true;
|
||||
}
|
||||
|
||||
if (!prefer_advancing) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
action.state_index = add_lex_state(transition.destination);
|
||||
lex_table.states[state_id].advance_actions[characters] = action;
|
||||
}
|
||||
}
|
||||
|
||||
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
LexItem::CompletionStatus completion_status = item.completion_status();
|
||||
if (completion_status.is_done) {
|
||||
AcceptTokenAction action(item.lhs, completion_status.precedence.max,
|
||||
item.lhs.is_built_in() ||
|
||||
grammar.variables[item.lhs.index].is_string);
|
||||
|
||||
auto current_action = lex_table.states[state_id].accept_action;
|
||||
if (current_action.is_present()) {
|
||||
if (!conflict_manager.resolve(action, current_action)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
lex_table.states[state_id].accept_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void mark_fragile_tokens(ParseTable *parse_table) {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void remove_duplicate_lex_states(ParseTable *parse_table) {
|
||||
for (LexState &state : lex_table.states) {
|
||||
state.accept_action.is_string = false;
|
||||
state.accept_action.precedence = 0;
|
||||
}
|
||||
|
||||
map<LexStateId, LexStateId> replacements;
|
||||
|
||||
while (true) {
|
||||
map<LexStateId, LexStateId> duplicates;
|
||||
for (LexStateId i = 0, size = lex_table.states.size(); i < size; i++) {
|
||||
for (LexStateId j = 0; j < i; j++) {
|
||||
if (!duplicates.count(j) && lex_table.states[j] == lex_table.states[i]) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (duplicates.empty()) break;
|
||||
|
||||
map<size_t, size_t> new_replacements;
|
||||
for (LexStateId i = 0, size = lex_table.states.size(); i < size; i++) {
|
||||
LexStateId new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end()) {
|
||||
new_state_index = duplicate->second;
|
||||
}
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &duplicate : duplicates) {
|
||||
if (duplicate.first >= new_state_index) break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
new_state_index -= prior_removed;
|
||||
new_replacements.insert({ i, new_state_index });
|
||||
replacements.insert({ i, new_state_index });
|
||||
for (auto &replacement : replacements) {
|
||||
if (replacement.second == i) {
|
||||
replacement.second = new_state_index;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto &state : lex_table.states) {
|
||||
for (auto &entry : state.advance_actions) {
|
||||
auto new_replacement = new_replacements.find(entry.second.state_index);
|
||||
if (new_replacement != new_replacements.end()) {
|
||||
entry.second.state_index = new_replacement->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) {
|
||||
lex_table.states.erase(lex_table.states.begin() + i->first);
|
||||
}
|
||||
}
|
||||
|
||||
for (ParseState &parse_state : parse_table->states) {
|
||||
auto replacement = replacements.find(parse_state.lex_state_id);
|
||||
if (replacement != replacements.end()) {
|
||||
parse_state.lex_state_id = replacement->second;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
|
||||
LexItemSet result;
|
||||
for (const auto &pair : terminals) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_token()) {
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
|
||||
for (const rule_ptr &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
rule_ptr rule = grammar.variables[symbol.index].rule;
|
||||
|
||||
auto choice = rule->as<Choice>();
|
||||
if (choice)
|
||||
return choice->elements;
|
||||
else
|
||||
return { rule };
|
||||
}
|
||||
};
|
||||
|
||||
unique_ptr<LexTableBuilder> LexTableBuilder::create(const LexicalGrammar &grammar) {
|
||||
return unique_ptr<LexTableBuilder>(new LexTableBuilderImpl(grammar));
|
||||
}
|
||||
|
||||
LexTable LexTableBuilder::build(ParseTable *parse_table) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
|
||||
}
|
||||
|
||||
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
26
src/compiler/build_tables/lex_table_builder.h
Normal file
26
src/compiler/build_tables/lex_table_builder.h
Normal file
|
|
@ -0,0 +1,26 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
|
||||
#include <memory>
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ParseTable;
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class LexTableBuilder {
|
||||
public:
|
||||
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
|
||||
LexTable build(ParseTable *);
|
||||
bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index);
|
||||
protected:
|
||||
LexTableBuilder() = default;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
|
|
@ -12,8 +12,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class ParseItem {
|
||||
public:
|
||||
struct ParseItem {
|
||||
ParseItem();
|
||||
ParseItem(const rules::Symbol &, const Production &, unsigned int);
|
||||
|
||||
|
|
@ -36,8 +35,7 @@ class ParseItem {
|
|||
unsigned int step_index;
|
||||
};
|
||||
|
||||
class ParseItemSet {
|
||||
public:
|
||||
struct ParseItemSet {
|
||||
ParseItemSet();
|
||||
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,89 +0,0 @@
|
|||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using rules::Symbol;
|
||||
using std::set;
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
if (left)
|
||||
apply(rule->left);
|
||||
if (right)
|
||||
apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet result;
|
||||
};
|
||||
|
||||
class FirstCharacters : public CharacterAggregator<true, false> {};
|
||||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
all_separator_characters.apply(separator);
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
const Variable &variable = grammar.variables[i];
|
||||
rule_ptr rule = variable.rule;
|
||||
|
||||
FirstCharacters first_characters;
|
||||
first_characters.apply(variable.rule);
|
||||
|
||||
LastCharacters last_characters;
|
||||
last_characters.apply(variable.rule);
|
||||
|
||||
AllCharacters all_characters;
|
||||
all_characters.apply(variable.rule);
|
||||
|
||||
bool has_distinct_start =
|
||||
!first_characters.result.includes_all &&
|
||||
!first_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_distinct_end =
|
||||
!last_characters.result.includes_all &&
|
||||
!last_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_no_separators =
|
||||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,19 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <set>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
|
@ -1,65 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
template <typename TableType>
|
||||
std::map<size_t, size_t> remove_duplicate_states(TableType *table) {
|
||||
std::map<size_t, size_t> replacements;
|
||||
|
||||
while (true) {
|
||||
std::map<size_t, size_t> duplicates;
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++)
|
||||
for (size_t j = 0; j < i; j++)
|
||||
if (!duplicates.count(j) && table->merge_state(j, i)) {
|
||||
duplicates.insert({ i, j });
|
||||
break;
|
||||
}
|
||||
|
||||
if (duplicates.empty())
|
||||
break;
|
||||
|
||||
std::map<size_t, size_t> new_replacements;
|
||||
for (size_t i = 0, size = table->states.size(); i < size; i++) {
|
||||
size_t new_state_index = i;
|
||||
auto duplicate = duplicates.find(i);
|
||||
if (duplicate != duplicates.end())
|
||||
new_state_index = duplicate->second;
|
||||
|
||||
size_t prior_removed = 0;
|
||||
for (const auto &duplicate : duplicates) {
|
||||
if (duplicate.first >= new_state_index)
|
||||
break;
|
||||
prior_removed++;
|
||||
}
|
||||
|
||||
new_state_index -= prior_removed;
|
||||
new_replacements.insert({ i, new_state_index });
|
||||
replacements.insert({ i, new_state_index });
|
||||
for (auto &replacement : replacements)
|
||||
if (replacement.second == i)
|
||||
replacement.second = new_state_index;
|
||||
}
|
||||
|
||||
for (auto &state : table->states)
|
||||
state.each_referenced_state([&new_replacements](int64_t *state_index) {
|
||||
auto new_replacement = new_replacements.find(*state_index);
|
||||
if (new_replacement != new_replacements.end())
|
||||
*state_index = new_replacement->second;
|
||||
});
|
||||
|
||||
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i)
|
||||
table->states.erase(table->states.begin() + i->first);
|
||||
}
|
||||
|
||||
return replacements;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_
|
||||
|
|
@ -26,8 +26,6 @@ using std::vector;
|
|||
using util::escape_char;
|
||||
using rules::Symbol;
|
||||
|
||||
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
|
||||
|
||||
static const map<char, string> REPLACEMENTS({
|
||||
{ '~', "TILDE" },
|
||||
{ '`', "BQUOTE" },
|
||||
|
|
@ -561,7 +559,7 @@ class CCodeGenerator {
|
|||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
const LexicalVariable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::External:
|
||||
|
|
|
|||
|
|
@ -7,8 +7,8 @@ namespace tree_sitter {
|
|||
|
||||
struct LexicalGrammar;
|
||||
struct SyntaxGrammar;
|
||||
class LexTable;
|
||||
class ParseTable;
|
||||
struct LexTable;
|
||||
struct ParseTable;
|
||||
|
||||
namespace generate_code {
|
||||
|
||||
|
|
|
|||
|
|
@ -44,35 +44,10 @@ bool AcceptTokenAction::operator==(const AcceptTokenAction &other) const {
|
|||
|
||||
LexState::LexState() : is_token_start(false) {}
|
||||
|
||||
set<CharacterSet> LexState::expected_inputs() const {
|
||||
set<CharacterSet> result;
|
||||
for (auto &pair : advance_actions)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
bool LexState::operator==(const LexState &other) const {
|
||||
return advance_actions == other.advance_actions &&
|
||||
accept_action == other.accept_action &&
|
||||
is_token_start == other.is_token_start;
|
||||
}
|
||||
|
||||
void LexState::each_referenced_state(function<void(LexStateId *)> fn) {
|
||||
for (auto &entry : advance_actions)
|
||||
fn(&entry.second.state_index);
|
||||
}
|
||||
|
||||
LexStateId LexTable::add_state() {
|
||||
states.push_back(LexState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
LexState &LexTable::state(LexStateId id) {
|
||||
return states[id];
|
||||
}
|
||||
|
||||
bool LexTable::merge_state(size_t i, size_t j) {
|
||||
return states[i] == states[j];
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -13,17 +13,9 @@ namespace tree_sitter {
|
|||
|
||||
typedef int64_t LexStateId;
|
||||
|
||||
typedef enum {
|
||||
LexActionTypeError,
|
||||
LexActionTypeAccept,
|
||||
LexActionTypeAcceptFragile,
|
||||
LexActionTypeAdvance
|
||||
} LexActionType;
|
||||
|
||||
struct AdvanceAction {
|
||||
AdvanceAction();
|
||||
AdvanceAction(size_t, PrecedenceRange, bool);
|
||||
|
||||
bool operator==(const AdvanceAction &other) const;
|
||||
|
||||
LexStateId state_index;
|
||||
|
|
@ -34,7 +26,6 @@ struct AdvanceAction {
|
|||
struct AcceptTokenAction {
|
||||
AcceptTokenAction();
|
||||
AcceptTokenAction(rules::Symbol, int, bool);
|
||||
|
||||
bool is_present() const;
|
||||
bool operator==(const AcceptTokenAction &action) const;
|
||||
|
||||
|
|
@ -43,31 +34,17 @@ struct AcceptTokenAction {
|
|||
bool is_string;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
namespace std {} // namespace std
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
class LexState {
|
||||
public:
|
||||
struct LexState {
|
||||
LexState();
|
||||
std::set<rules::CharacterSet> expected_inputs() const;
|
||||
bool operator==(const LexState &) const;
|
||||
void each_referenced_state(std::function<void(LexStateId *)>);
|
||||
|
||||
std::map<rules::CharacterSet, AdvanceAction> advance_actions;
|
||||
AcceptTokenAction accept_action;
|
||||
bool is_token_start;
|
||||
};
|
||||
|
||||
class LexTable {
|
||||
public:
|
||||
LexStateId add_state();
|
||||
LexState &state(LexStateId state_id);
|
||||
struct LexTable {
|
||||
std::vector<LexState> states;
|
||||
|
||||
bool merge_state(size_t i, size_t j);
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -9,8 +9,15 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalVariable {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
bool is_string;
|
||||
};
|
||||
|
||||
struct LexicalGrammar {
|
||||
std::vector<Variable> variables;
|
||||
std::vector<LexicalVariable> variables;
|
||||
std::vector<rule_ptr> separators;
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -148,13 +148,6 @@ bool ParseState::has_shift_action() const {
|
|||
return (!nonterminal_entries.empty());
|
||||
}
|
||||
|
||||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &entry : terminal_entries)
|
||||
result.insert(entry.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseState::each_referenced_state(function<void(ParseStateId *)> fn) {
|
||||
for (auto &entry : terminal_entries)
|
||||
for (ParseAction &action : entry.second.actions)
|
||||
|
|
@ -169,18 +162,6 @@ bool ParseState::operator==(const ParseState &other) const {
|
|||
nonterminal_entries == other.nonterminal_entries;
|
||||
}
|
||||
|
||||
set<Symbol> ParseTable::all_symbols() const {
|
||||
set<Symbol> result;
|
||||
for (auto &pair : symbols)
|
||||
result.insert(pair.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseStateId ParseTable::add_state() {
|
||||
states.push_back(ParseState());
|
||||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol lookahead,
|
||||
ParseAction action) {
|
||||
|
|
@ -201,58 +182,4 @@ void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
|||
states[state_id].nonterminal_entries[lookahead] = next_state_id;
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool ParseTable::merge_state(size_t i, size_t j) {
|
||||
ParseState &state = states[i];
|
||||
ParseState &other = states[j];
|
||||
|
||||
if (state.nonterminal_entries != other.nonterminal_entries)
|
||||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(other, entry.second))
|
||||
return false;
|
||||
} else if (entry.second != other_entry->second) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -23,13 +23,11 @@ enum ParseActionType {
|
|||
ParseActionTypeRecover,
|
||||
};
|
||||
|
||||
class ParseAction {
|
||||
struct ParseAction {
|
||||
ParseAction();
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
const Production *);
|
||||
|
||||
public:
|
||||
ParseAction();
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(ParseStateId state_index);
|
||||
|
|
@ -39,7 +37,6 @@ class ParseAction {
|
|||
static ParseAction ShiftExtra();
|
||||
bool operator==(const ParseAction &) const;
|
||||
bool operator<(const ParseAction &) const;
|
||||
|
||||
rules::Associativity associativity() const;
|
||||
int precedence() const;
|
||||
|
||||
|
|
@ -47,30 +44,26 @@ class ParseAction {
|
|||
bool extra;
|
||||
bool fragile;
|
||||
ParseStateId state_index;
|
||||
|
||||
rules::Symbol symbol;
|
||||
size_t consumed_symbol_count;
|
||||
const Production *production;
|
||||
};
|
||||
|
||||
struct ParseTableEntry {
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
|
||||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
|
||||
inline bool operator!=(const ParseTableEntry &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
};
|
||||
|
||||
class ParseState {
|
||||
public:
|
||||
struct ParseState {
|
||||
ParseState();
|
||||
std::set<rules::Symbol> expected_inputs() const;
|
||||
bool operator==(const ParseState &) const;
|
||||
bool merge(const ParseState &);
|
||||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
|
|
@ -87,18 +80,12 @@ struct ParseTableSymbolMetadata {
|
|||
bool structural;
|
||||
};
|
||||
|
||||
class ParseTable {
|
||||
public:
|
||||
std::set<rules::Symbol> all_symbols() const;
|
||||
ParseStateId add_state();
|
||||
struct ParseTable {
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
||||
std::set<rules::Symbol> mergeable_symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -41,10 +41,17 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(
|
||||
Variable(helper_rule_name, VariableTypeAuxiliary,
|
||||
Choice::build({ Seq::build({ repeat_symbol.copy(), inner_rule }),
|
||||
inner_rule })));
|
||||
aux_rules.push_back(Variable{
|
||||
helper_rule_name,
|
||||
VariableTypeAuxiliary,
|
||||
Choice::build({
|
||||
Seq::build({
|
||||
repeat_symbol.copy(),
|
||||
inner_rule,
|
||||
}),
|
||||
inner_rule,
|
||||
})
|
||||
});
|
||||
return repeat_symbol.copy();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -67,11 +67,11 @@ pair<LexicalGrammar, CompileError> expand_tokens(const LexicalGrammar &grammar)
|
|||
LexicalGrammar result;
|
||||
ExpandTokens expander;
|
||||
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
for (const LexicalVariable &variable : grammar.variables) {
|
||||
auto rule = expander.apply(variable.rule);
|
||||
if (expander.error.type)
|
||||
return { result, expander.error };
|
||||
result.variables.push_back(Variable(variable.name, variable.type, rule));
|
||||
result.variables.push_back({variable.name, variable.type, rule, variable.is_string});
|
||||
}
|
||||
|
||||
for (auto &sep : grammar.separators) {
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
class TokenExtractor : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to_token(const Rule *input, VariableType entry_type) {
|
||||
rule_ptr apply_to_token(const Rule *input, VariableType entry_type, bool is_string) {
|
||||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].rule->operator==(*input)) {
|
||||
token_usage_counts[i]++;
|
||||
|
|
@ -65,29 +65,30 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
|
||||
rule_ptr rule = input->copy();
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back(Variable(token_description(rule), entry_type, rule));
|
||||
tokens.push_back({token_description(rule), entry_type, rule, is_string});
|
||||
token_usage_counts.push_back(1);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::String *rule) {
|
||||
return apply_to_token(rule, VariableTypeAnonymous);
|
||||
return apply_to_token(rule, VariableTypeAnonymous, true);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Pattern *rule) {
|
||||
return apply_to_token(rule, VariableTypeAuxiliary);
|
||||
return apply_to_token(rule, VariableTypeAuxiliary, false);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
if (rule->params.is_token)
|
||||
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary);
|
||||
else
|
||||
if (rule->params.is_token) {
|
||||
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary, false);
|
||||
} else {
|
||||
return rules::IdentityRuleFn::apply_to(rule);
|
||||
}
|
||||
}
|
||||
|
||||
public:
|
||||
vector<size_t> token_usage_counts;
|
||||
vector<Variable> tokens;
|
||||
vector<LexicalVariable> tokens;
|
||||
};
|
||||
|
||||
static CompileError extra_token_error(const string &message) {
|
||||
|
|
@ -106,8 +107,11 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
*/
|
||||
vector<Variable> processed_variables;
|
||||
for (const Variable &variable : grammar.variables)
|
||||
processed_variables.push_back(
|
||||
Variable(variable.name, variable.type, extractor.apply(variable.rule)));
|
||||
processed_variables.push_back(Variable{
|
||||
variable.name,
|
||||
variable.type,
|
||||
extractor.apply(variable.rule)
|
||||
});
|
||||
lexical_grammar.variables = extractor.tokens;
|
||||
|
||||
/*
|
||||
|
|
@ -139,8 +143,9 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
|
||||
for (const ConflictSet &conflict_set : grammar.expected_conflicts) {
|
||||
ConflictSet new_conflict_set;
|
||||
for (const Symbol &symbol : conflict_set)
|
||||
for (const Symbol &symbol : conflict_set) {
|
||||
new_conflict_set.insert(symbol_replacer.replace_symbol(symbol));
|
||||
}
|
||||
syntax_grammar.expected_conflicts.insert(new_conflict_set);
|
||||
}
|
||||
|
||||
|
|
@ -154,7 +159,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
for (const rule_ptr &rule : grammar.extra_tokens) {
|
||||
int i = 0;
|
||||
bool used_elsewhere_in_grammar = false;
|
||||
for (const Variable &variable : lexical_grammar.variables) {
|
||||
for (const LexicalVariable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule->operator==(*rule)) {
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
|
||||
used_elsewhere_in_grammar = true;
|
||||
|
|
@ -171,9 +176,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
}
|
||||
|
||||
auto symbol = rule->as<Symbol>();
|
||||
if (!symbol)
|
||||
if (!symbol) {
|
||||
return make_tuple(syntax_grammar, lexical_grammar,
|
||||
extra_token_error(rule->to_string()));
|
||||
}
|
||||
|
||||
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
|
||||
if (new_symbol.is_non_terminal()) {
|
||||
|
|
|
|||
|
|
@ -25,8 +25,11 @@ class FlattenRule : public rules::RuleFn<void> {
|
|||
Production production;
|
||||
|
||||
void apply_to(const rules::Symbol *sym) {
|
||||
production.push_back(ProductionStep(*sym, precedence_stack.back(),
|
||||
associativity_stack.back()));
|
||||
production.push_back(ProductionStep{
|
||||
*sym,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
});
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *metadata) {
|
||||
|
|
@ -85,7 +88,7 @@ SyntaxVariable flatten_rule(const Variable &variable) {
|
|||
}
|
||||
}
|
||||
|
||||
return SyntaxVariable(variable.name, variable.type, productions);
|
||||
return SyntaxVariable{variable.name, variable.type, productions};
|
||||
}
|
||||
|
||||
pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &grammar) {
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ namespace prepare_grammar {
|
|||
LexicalGrammar normalize_rules(const LexicalGrammar &input_grammar) {
|
||||
LexicalGrammar result(input_grammar);
|
||||
|
||||
for (Variable &variable : result.variables) {
|
||||
for (LexicalVariable &variable : result.variables) {
|
||||
variable.rule = rules::Choice::build(extract_choices(variable.rule));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,20 +7,6 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
: name(name), productions(productions), type(type) {}
|
||||
|
||||
ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
||||
rules::Associativity associativity)
|
||||
: symbol(symbol), precedence(precedence), associativity(associativity) {}
|
||||
|
||||
bool ExternalToken::operator==(const ExternalToken &other) const {
|
||||
return name == other.name && type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
|
|
|
|||
|
|
@ -11,15 +11,14 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct ExternalToken {
|
||||
bool operator==(const ExternalToken &) const;
|
||||
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
bool operator==(const ExternalToken &) const;
|
||||
};
|
||||
|
||||
struct ProductionStep {
|
||||
ProductionStep(const rules::Symbol &, int, rules::Associativity);
|
||||
bool operator==(const ProductionStep &) const;
|
||||
|
||||
rules::Symbol symbol;
|
||||
|
|
@ -30,12 +29,9 @@ struct ProductionStep {
|
|||
typedef std::vector<ProductionStep> Production;
|
||||
|
||||
struct SyntaxVariable {
|
||||
SyntaxVariable(const std::string &, VariableType,
|
||||
const std::vector<Production> &);
|
||||
|
||||
std::string name;
|
||||
std::vector<Production> productions;
|
||||
VariableType type;
|
||||
std::vector<Production> productions;
|
||||
};
|
||||
|
||||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
|
|
|
|||
|
|
@ -1,11 +0,0 @@
|
|||
#include "compiler/variable.h"
|
||||
#include <string>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
|
||||
Variable::Variable(const string &name, VariableType type, const rule_ptr &rule)
|
||||
: name(name), rule(rule), type(type) {}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -15,11 +15,9 @@ enum VariableType {
|
|||
};
|
||||
|
||||
struct Variable {
|
||||
Variable(const std::string &, VariableType, const rule_ptr &);
|
||||
|
||||
std::string name;
|
||||
rule_ptr rule;
|
||||
VariableType type;
|
||||
rule_ptr rule;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue