Add PREC_DYNAMIC rule for resolving runtime ambiguities

This commit is contained in:
Max Brunsfeld 2017-07-06 15:20:11 -07:00
parent cb652239f6
commit d8e9d04fe7
24 changed files with 316 additions and 83 deletions

View file

@ -42,6 +42,7 @@ typedef struct {
union {
TSStateId to_state;
struct {
short dynamic_precedence;
TSSymbol symbol;
unsigned short child_count;
};
@ -145,21 +146,30 @@ typedef struct TSLanguage {
{ .type = TSParseActionTypeShift, .extra = true } \
}
#define REDUCE(symbol_val, child_count_val) \
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = {.symbol = symbol_val, .child_count = child_count_val } \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val) \
{ \
{ \
.type = TSParseActionTypeReduce, .fragile = true, \
.params = {.symbol = symbol_val, .child_count = child_count_val } \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.fragile = true, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define ACCEPT_INPUT() \
{ \

View file

@ -55,7 +55,8 @@ class ParseTableBuilder {
Symbol::non_terminal(0);
Production start_production{
ProductionStep{start_symbol, 0, rules::AssociativityNone},
{ProductionStep{start_symbol, 0, rules::AssociativityNone}},
0
};
// Placeholder for error state
@ -281,9 +282,10 @@ class ParseTableBuilder {
for (ParseAction &action : actions) {
if (action.type == ParseActionTypeReduce) {
if (has_fragile_production(action.production))
if (has_fragile_production(action.production)) {
action.fragile = true;
action.production = NULL;
}
action.production = nullptr;
}
}
@ -586,7 +588,7 @@ class ParseTableBuilder {
}
description += " (" + symbol_name(action.symbol);
for (const ProductionStep &step : *action.production) {
for (const ProductionStep &step : action.production->steps) {
description += " " + symbol_name(step.symbol);
}
description += ")";

View file

@ -60,6 +60,10 @@ int ParseItem::precedence() const {
}
}
int ParseItem::dynamic_precedence() const {
return production->dynamic_precedence;
}
rules::Associativity ParseItem::associativity() const {
if (is_done()) {
if (production->empty()) {

View file

@ -26,6 +26,7 @@ struct ParseItem {
rules::Symbol lhs() const;
rules::Symbol next_symbol() const;
int precedence() const;
int dynamic_precedence() const;
rules::Associativity associativity() const;
bool is_done() const;

View file

@ -490,12 +490,17 @@ class CCodeGenerator {
break;
case ParseActionTypeReduce:
if (action.fragile) {
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
add("REDUCE_FRAGILE");
} else {
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
add("REDUCE");
}
add("(");
add(symbol_id(action.symbol));
add(", ");
add(to_string(action.consumed_symbol_count));
add(", " + to_string(action.dynamic_precedence));
add(")");
break;
case ParseActionTypeRecover:
add("RECOVER(" + to_string(action.state_index) + ")");

View file

@ -184,6 +184,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
}
if (type == "PREC_DYNAMIC") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
}
return "Unknown rule type: " + type;
}

View file

@ -13,25 +13,14 @@ using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
const Production *production)
: type(type),
extra(false),
fragile(false),
state_index(state_index),
symbol(symbol),
consumed_symbol_count(consumed_symbol_count),
production(production) {}
ParseAction::ParseAction()
: type(ParseActionTypeError),
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1),
symbol(rules::NONE()),
consumed_symbol_count(0),
production(nullptr) {}
state_index(-1) {}
ParseAction ParseAction::Error() {
return ParseAction();
@ -44,12 +33,17 @@ ParseAction ParseAction::Accept() {
}
ParseAction ParseAction::Shift(ParseStateId state_index) {
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
ParseAction result;
result.type = ParseActionTypeShift;
result.state_index = state_index;
return result;
}
ParseAction ParseAction::Recover(ParseStateId state_index) {
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
nullptr);
ParseAction result;
result.type = ParseActionTypeRecover;
result.state_index = state_index;
return result;
}
ParseAction ParseAction::ShiftExtra() {
@ -61,8 +55,13 @@ ParseAction ParseAction::ShiftExtra() {
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
const Production &production) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
&production);
ParseAction result;
result.type = ParseActionTypeReduce;
result.symbol = symbol;
result.consumed_symbol_count = consumed_symbol_count;
result.production = &production;
result.dynamic_precedence = production.dynamic_precedence;
return result;
}
int ParseAction::precedence() const {

View file

@ -24,9 +24,6 @@ enum ParseActionType {
struct ParseAction {
ParseAction();
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
const Production *);
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index);
@ -39,13 +36,14 @@ struct ParseAction {
rules::Associativity associativity() const;
int precedence() const;
const Production *production;
size_t consumed_symbol_count;
rules::Symbol symbol;
int dynamic_precedence;
ParseActionType type;
bool extra;
bool fragile;
ParseStateId state_index;
rules::Symbol symbol;
size_t consumed_symbol_count;
const Production *production;
};
struct ParseTableEntry {

View file

@ -26,7 +26,7 @@ class FlattenRule {
void apply(const Rule &rule) {
rule.match(
[&](const rules::Symbol &symbol) {
production.push_back(ProductionStep{
production.steps.push_back(ProductionStep{
symbol,
precedence_stack.back(),
associativity_stack.back()
@ -42,6 +42,10 @@ class FlattenRule {
associativity_stack.push_back(metadata.params.associativity);
}
if (metadata.params.dynamic_precedence > production.dynamic_precedence) {
production.dynamic_precedence = metadata.params.dynamic_precedence;
}
apply(*metadata.rule);
if (metadata.params.has_precedence) {

View file

@ -51,6 +51,12 @@ Metadata Metadata::prec_right(int precedence, const Rule &rule) {
return Metadata{rule, params};
}
Metadata Metadata::prec_dynamic(int dynamic_precedence, const Rule &rule) {
MetadataParams params;
params.dynamic_precedence = dynamic_precedence;
return Metadata{rule, params};
}
Metadata Metadata::separator(const Rule &rule) {
MetadataParams params;
params.has_precedence = true;

View file

@ -14,6 +14,7 @@ enum Associativity {
struct MetadataParams {
int precedence;
int dynamic_precedence;
Associativity associativity;
bool has_precedence;
bool has_associativity;
@ -23,8 +24,8 @@ struct MetadataParams {
bool is_main_token;
inline MetadataParams() :
precedence{0}, associativity{AssociativityNone}, has_precedence{false},
has_associativity{false}, is_token{false}, is_string{false},
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
has_precedence{false}, has_associativity{false}, is_token{false}, is_string{false},
is_active{false}, is_main_token{false} {}
inline bool operator==(const MetadataParams &other) const {
@ -33,6 +34,7 @@ struct MetadataParams {
associativity == other.associativity &&
has_precedence == other.has_precedence &&
has_associativity == other.has_associativity &&
dynamic_precedence == other.dynamic_precedence &&
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
@ -54,6 +56,7 @@ struct Metadata {
static Metadata prec(int precedence, const Rule &rule);
static Metadata prec_left(int precedence, const Rule &rule);
static Metadata prec_right(int precedence, const Rule &rule);
static Metadata prec_dynamic(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
@ -63,4 +66,4 @@ struct Metadata {
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_METADATA_H_
#endif // COMPILER_RULES_METADATA_H_

View file

@ -11,8 +11,9 @@ namespace tree_sitter {
struct ProductionStep {
inline bool operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
associativity == other.associativity;
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity;
}
rules::Symbol symbol;
@ -20,7 +21,21 @@ struct ProductionStep {
rules::Associativity associativity;
};
typedef std::vector<ProductionStep> Production;
struct Production {
std::vector<ProductionStep> steps;
int dynamic_precedence = 0;
inline bool operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
inline ProductionStep &back() { return steps.back(); }
inline const ProductionStep &back() const { return steps.back(); }
inline bool empty() const { return steps.empty(); }
inline size_t size() const { return steps.size(); }
inline const ProductionStep &operator[](int i) const { return steps[i]; }
inline const ProductionStep &at(int i) const { return steps[i]; }
};
struct SyntaxVariable {
std::string name;

View file

@ -437,22 +437,36 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
}
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
if (!left)
return true;
if (!right)
return false;
if (!left) return true;
if (!right) return false;
if (right->error_cost < left->error_cost) {
LOG("select_smaller_error symbol:%s, over_symbol:%s",
SYM_NAME(right->symbol), SYM_NAME(left->symbol));
return true;
}
if (left->error_cost < right->error_cost) {
LOG("select_smaller_error symbol:%s, over_symbol:%s",
SYM_NAME(left->symbol), SYM_NAME(right->symbol));
return false;
}
if (left->error_cost > 0) return -1;
if (right->dynamic_precedence > left->dynamic_precedence) {
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
SYM_NAME(right->symbol), right->dynamic_precedence, SYM_NAME(left->symbol),
left->dynamic_precedence);
return true;
}
if (left->dynamic_precedence > right->dynamic_precedence) {
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
SYM_NAME(left->symbol), left->dynamic_precedence, SYM_NAME(right->symbol),
right->dynamic_precedence);
return false;
}
if (left->error_cost > 0) return true;
int comparison = ts_tree_compare(left, right);
switch (comparison) {
@ -544,7 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree,
static StackPopResult parser__reduce(Parser *self, StackVersion version,
TSSymbol symbol, unsigned count,
bool fragile, bool allow_skipping) {
bool fragile, int dynamic_precedence,
bool allow_skipping) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
@ -587,6 +602,8 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
}
}
parent->dynamic_precedence += dynamic_precedence;
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(language, state, symbol);
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
@ -929,6 +946,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence
});
default:
break;
@ -939,8 +957,10 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
bool did_reduce = false;
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
ReduceAction action = self->reduce_actions.contents[i];
StackPopResult reduction =
parser__reduce(self, version, action.symbol, action.count, true, false);
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.count, true,
action.dynamic_precedence, false
);
if (reduction.stopped_at_error) {
ts_tree_array_delete(&reduction.slices.contents[0].trees);
ts_stack_remove_version(self->stack, reduction.slices.contents[0].version);
@ -1180,12 +1200,13 @@ static void parser__advance(Parser *self, StackVersion version,
unsigned child_count = action.params.child_count;
TSSymbol symbol = action.params.symbol;
unsigned dynamic_precedence = action.params.dynamic_precedence;
bool fragile = action.fragile;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
StackPopResult reduction =
parser__reduce(self, version, symbol, child_count, fragile, true);
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
StackSlice slice = *array_front(&reduction.slices);
if (reduction.stopped_at_error) {
reduction_stopped_at_error = true;

View file

@ -11,6 +11,7 @@ extern "C" {
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -150,6 +150,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->visible_child_count = 0;
self->error_cost = 0;
self->has_external_tokens = false;
self->dynamic_precedence = 0;
for (uint32_t i = 0; i < child_count; i++) {
Tree *child = children[i];
@ -165,6 +166,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
}
self->error_cost += child->error_cost;
self->dynamic_precedence += child->dynamic_precedence;
if (child->visible) {
self->visible_child_count++;

View file

@ -46,6 +46,7 @@ typedef struct Tree {
} first_leaf;
uint32_t ref_count;
int dynamic_precedence;
bool visible : 1;
bool named : 1;
bool extra : 1;

View file

@ -25,25 +25,25 @@ describe("ParseItemSetBuilder", []() {
it("adds items at the beginnings of referenced rules", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({
}, 0},
Production{{
{Symbol::non_terminal(2), 0, AssociativityNone},
})
}, 0}
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(14), 0, AssociativityNone},
{Symbol::terminal(15), 0, AssociativityNone},
})
}, 0}
}},
}, {}, {}, {}};
@ -84,17 +84,17 @@ describe("ParseItemSetBuilder", []() {
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({})
}, 0},
Production{{}, 0}
}},
}, {}, {}, {}};

View file

@ -34,21 +34,63 @@ describe("flatten_grammar", []() {
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(3), 102, AssociativityRight},
{Symbol::non_terminal(4), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}),
Production({
}, 0},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(5), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 0}
})));
});
it("stores the maximum dynamic precedence specified in each production", [&]() {
SyntaxVariable result = flatten_rule({
"test",
VariableTypeNamed,
Rule::seq({
Symbol::non_terminal(1),
Metadata::prec_dynamic(101, Rule::seq({
Symbol::non_terminal(2),
Rule::choice({
Metadata::prec_dynamic(102, Rule::seq({
Symbol::non_terminal(3),
Symbol::non_terminal(4)
})),
Symbol::non_terminal(5),
}),
Symbol::non_terminal(6),
})),
Symbol::non_terminal(7),
})
});
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(3), 0, AssociativityNone},
{Symbol::non_terminal(4), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 102},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(5), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 101}
})));
});
@ -63,10 +105,10 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
{Symbol::non_terminal(2), 101, AssociativityLeft},
})
{Symbol::non_terminal(2), 101, AssociativityLeft},
}, 0}
})));
result = flatten_rule({
@ -78,9 +120,9 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
})
}, 0}
})));
});
});

View file

@ -0,0 +1,25 @@
===============================
Declarations
===============================
int * x
---
(program (declaration
(type (identifier))
(declarator (identifier))))
===============================
Expressions
===============================
int * x * y
---
(program (expression
(expression
(expression (identifier))
(expression (identifier)))
(expression (identifier))))

View file

@ -0,0 +1,73 @@
{
"name": "dynamic_precedence",
"conflicts": [
["expression", "type"]
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"program": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "declaration"},
{"type": "SYMBOL", "name": "expression"},
]
},
"expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SYMBOL",
"name": "identifier"
}
]
}
},
"declaration": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "type"},
{"type": "SYMBOL", "name": "declarator"}
]
},
"declarator": {
"type": "PREC_DYNAMIC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "identifier"}
]
}
},
"type": {
"type": "SYMBOL",
"name": "identifier"
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1 @@
This grammar contains a conflict that is resolved at runtime. The PREC_DYNAMIC rule is used to indicate that the `declarator` rule should be preferred to the `expression` rule at runtime.

View file

@ -136,9 +136,14 @@ ostream &operator<<(ostream &stream, const Variable &variable) {
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
}
ostream &operator<<(ostream &stream, const Production &production) {
return stream << "(Production " << production.steps << " " <<
to_string(production.dynamic_precedence) << ")";
}
ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
return stream << "(Variable " << variable.name << " " << variable.productions <<
" " << to_string(variable.type) << "}";
" " << to_string(variable.type) << ")";
}
ostream &operator<<(ostream &stream, const LexicalVariable &variable) {

View file

@ -110,6 +110,7 @@ ostream &operator<<(ostream &, const InputGrammar &);
ostream &operator<<(ostream &, const CompileError &);
ostream &operator<<(ostream &, const ExternalToken &);
ostream &operator<<(ostream &, const ProductionStep &);
ostream &operator<<(ostream &, const Production &);
ostream &operator<<(ostream &, const PrecedenceRange &);
ostream &operator<<(ostream &, const Variable &);
ostream &operator<<(ostream &, const LexicalVariable &);

View file

@ -13,7 +13,7 @@ vector<string> test_languages = list_directory(grammars_dir_path);
for (auto &language_name : test_languages) {
if (language_name == "readme.md") continue;
describe(("test language: " + language_name).c_str(), [&]() {
describe(("test grammar: " + language_name).c_str(), [&]() {
string directory_path = grammars_dir_path + "/" + language_name;
string grammar_path = directory_path + "/grammar.json";
string grammar_json = read_file(grammar_path);