Merge pull request #4 from maxbrunsfeld/mb-precedence-handling-within-tokens

Make precedence more useful within tokens
This commit is contained in:
Max Brunsfeld 2015-11-01 21:50:53 -08:00
commit 4ab4fd7346
30 changed files with 1661 additions and 1492 deletions

View file

@ -14,8 +14,6 @@
'src/compiler/build_tables/build_parse_table.cc',
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/does_match_any_line.cc',
'src/compiler/build_tables/get_completion_status.cc',
'src/compiler/build_tables/get_metadata.cc',
'src/compiler/build_tables/item_set_closure.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_item_transitions.cc',

View file

@ -1,107 +0,0 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/rules/metadata.h"
#include "compiler/build_tables/get_metadata.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("get_metadata(rule, key)", []() {
MetadataKey key1 = MetadataKey(100);
MetadataKey key2 = MetadataKey(101);
rule_ptr rule;
describe("with a rule without the metadata key", [&]() {
it("returns the zero range", [&]() {
rule = sym("x");
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(0, 0)));
rule = seq({ sym("x"), sym("y") });
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(0, 0)));
rule = metadata(seq({ sym("x"), sym("y") }), {{key2, 5}});
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(0, 0)));
});
});
describe("when given a metadata rule", [&]() {
before_each([&]() {
rule = make_shared<Metadata>(sym("x"), map<MetadataKey, int>({
{ key1, 1 },
{ key2, 2 },
}));
});
it("returns the value for the given key", [&]() {
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(1, 1)));
AssertThat(get_metadata(rule, key2), Equals(MetadataRange(2, 2)));
});
it("returns 0 if the rule does not have the key", [&]() {
AssertThat(get_metadata(rule, MetadataKey(0)), Equals(MetadataRange(0, 0)));
});
describe("when the rule contains another metadata rule", [&]() {
it("also gets metadata from the inner metadata rule", [&]() {
rule = make_shared<Metadata>(make_shared<Metadata>(sym("x"), map<MetadataKey, int>({
{ key1, 1 }
})), map<MetadataKey, int>());
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(1, 1)));
});
});
});
describe("with a sequence starting with a metadata rule", [&]() {
it("returns the metadata rule's value for the key", [&]() {
rule = seq({
metadata(sym("x"), {{key1, 5}}),
sym("y")
});
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(5, 5)));
});
});
describe("with a sequence whose starting value can be blank", [&]() {
it("includes later elements of the sequence in the returned range", [&]() {
rule = seq({
repeat(metadata(sym("x"), {{key1, 3}})),
choice({ metadata(sym("x"), {{key1, 5}}), blank() }),
metadata(sym("x"), {{key1, 7}}),
metadata(sym("x"), {{key1, 9}}),
});
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(3, 7)));
});
});
describe("with a sequence whose starting value can be blank", [&]() {
it("includes later elements of the sequence in the returned range", [&]() {
rule = seq({
repeat(metadata(sym("x"), {{key1, 3}})),
choice({ metadata(sym("x"), {{key1, 5}}), blank() }),
metadata(sym("x"), {{key1, 7}}),
metadata(sym("x"), {{key1, 9}}),
});
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(3, 7)));
});
});
describe("with a choice rule", [&]() {
it("merges the ranges for the choices elements", [&]() {
rule = choice({
metadata(sym("a"), {{key1, 5}}),
metadata(sym("b"), {{key1, 3}}),
sym("c"),
metadata(sym("d"), {{key1, 1}}),
});
AssertThat(get_metadata(rule, key1), Equals(MetadataRange(1, 5)));
});
});
});
END_TEST

View file

@ -8,7 +8,7 @@ using namespace build_tables;
START_TEST
describe("LexConflictManager", []() {
describe("LexConflictManager::resolve(new_action, old_action)", []() {
LexicalGrammar lexical_grammar{{
Variable("other_token", VariableTypeNamed, pattern("[a-b]")),
Variable("lookahead_token", VariableTypeNamed, pattern("[c-d]"))
@ -29,36 +29,58 @@ describe("LexConflictManager", []() {
AssertThat(update, IsFalse());
});
describe("accept-token/advance conflicts", [&]() {
it("prefers the advance", [&]() {
update = conflict_manager.resolve(LexAction::Advance(1, { 0, 0 }), LexAction::Accept(sym3, 3));
AssertThat(update, IsTrue());
update = conflict_manager.resolve(LexAction::Accept(sym3, 3), LexAction::Advance(1, { 0, 0 }));
AssertThat(update, IsFalse());
});
});
describe("accept-token/accept-token conflicts", [&]() {
describe("when one token has a higher precedence than the other", [&]() {
it("prefers the token with the higher precedence", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
describe("when one tokens' precedence values differ", [&]() {
it("favors the token with higher precedence", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 1, false), LexAction::Accept(sym1, 2, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
update = conflict_manager.resolve(LexAction::Accept(sym1, 2, false), LexAction::Accept(sym2, 1, false));
AssertThat(update, IsTrue());
});
});
describe("when both tokens have the same precedence", [&]() {
it("prefers the token listed earlier in the grammar", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
describe("when one token is string-based and the other is regexp-based", [&]() {
it("favors the string-based token", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, true));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
update = conflict_manager.resolve(LexAction::Accept(sym2, 0, true), LexAction::Accept(sym1, 0, false));
AssertThat(update, IsTrue());
});
});
describe("when the tokens have equal precedence", [&]() {
it("favors the token listed earlier in the grammar", [&]() {
update = conflict_manager.resolve(LexAction::Accept(sym2, 0, false), LexAction::Accept(sym1, 0, false));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym1, 0, false), LexAction::Accept(sym2, 0, false));
AssertThat(update, IsTrue());
});
});
});
describe("advance/accept-token conflicts", [&]() {
describe("when the token to accept has higher precedence", [&]() {
it("prefers the accept-token action", [&]() {
update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 3, true));
AssertThat(update, IsFalse());
update = conflict_manager.resolve(LexAction::Accept(sym3, 3, true), LexAction::Advance(1, { 1, 2 }));
AssertThat(update, IsTrue());
});
});
describe("when the token to accept does not have a higher precedence", [&]() {
it("favors the advance action", [&]() {
update = conflict_manager.resolve(LexAction::Advance(1, { 1, 2 }), LexAction::Accept(sym3, 2, true));
AssertThat(update, IsTrue());
update = conflict_manager.resolve(LexAction::Accept(sym3, 2, true), LexAction::Advance(1, { 1, 2 }));
AssertThat(update, IsFalse());
});
});
});
});

View file

@ -38,6 +38,29 @@ describe("LexItem", []() {
});
});
});
describe("completion_status()", [&]() {
it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
LexItem item1(Symbol(0, true), character({ 'a', 'b', 'c' }));
AssertThat(item1.completion_status().is_done, IsFalse());
AssertThat(item1.completion_status().precedence, Equals(0));
AssertThat(item1.completion_status().is_string, IsFalse());
LexItem item2(Symbol(0, true), choice({
metadata(blank(), { {PRECEDENCE, 3}, {IS_STRING, 1} }),
character({ 'a', 'b', 'c' })
}));
AssertThat(item2.completion_status().is_done, IsTrue());
AssertThat(item2.completion_status().precedence, Equals(3));
AssertThat(item2.completion_status().is_string, IsTrue());
LexItem item3(Symbol(0, true), repeat(character({ ' ', '\t' })));
AssertThat(item3.completion_status().is_done, IsTrue());
AssertThat(item3.completion_status().precedence, Equals(0));
AssertThat(item3.completion_status().is_string, IsFalse());
});
});
});
describe("LexItemSet::transitions()", [&]() {
@ -48,12 +71,15 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
}),
PrecedenceRange()
}
}
})));
});
@ -70,16 +96,77 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
})
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange()
}
}
})));
});
it("handles sequences with nested precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, seq({
character({ 'w' }),
character({ 'x' }) })),
character({ 'y' }) })),
character({ 'z' }),
})),
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('w'),
{
LexItemSet({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
})),
}),
PrecedenceRange(4)
}
}
})));
LexItemSet item_set2({
LexItem(Symbol(1), seq({
prec(3, seq({
prec(4, character({ 'x' })),
character({ 'y' }) })),
character({ 'z' }),
})),
});
AssertThat(
item_set2.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
{
LexItemSet({
LexItem(Symbol(1), seq({
prec(3, character({ 'y' })),
character({ 'z' }),
})),
}),
PrecedenceRange(3)
}
}
})));
});
@ -98,21 +185,27 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('x'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'y' }),
character({ 'z' }),
})),
})
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'y' }),
character({ 'z' }),
})),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('y'),
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
}),
PrecedenceRange()
}
}
})));
});
@ -136,26 +229,53 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a'),
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
repeat1(seq({
character({ 'a' }),
{
LexItemSet({
LexItem(Symbol(1), seq({
character({ 'b' }),
}))
})),
LexItem(Symbol(1), character({ 'b' })),
})
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
})),
LexItem(Symbol(1), character({ 'b' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('c'),
LexItemSet({
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
}
})));
});
it("handles repeats with precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), prec(-1, repeat1(character({ 'a' }))))
});
AssertThat(
item_set.transitions(),
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a'),
{
LexItemSet({
LexItem(Symbol(1), prec(-1, repeat1(character({ 'a' })))),
LexItem(Symbol(1), prec(-1, blank())),
}),
PrecedenceRange(-1)
}
}
})));
});
@ -163,38 +283,47 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
seq({
prec(2, seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
}),
seq({
})),
prec(3, seq({
character({ 'c', 'd', 'e', 'f' }),
character({ 'y' }),
}),
})),
}))
});
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'b'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(2, character({ 'x' }))),
}),
PrecedenceRange(2)
}
},
{
CharacterSet().include('c', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(2, character({ 'x' }))),
LexItem(Symbol(1), prec(3, character({ 'y' }))),
}),
PrecedenceRange(2, 3)
}
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), prec(3, character({ 'y' }))),
}),
PrecedenceRange(3)
}
},
})));
});
@ -215,19 +344,25 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a').include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('b', 'd'),
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
}),
PrecedenceRange()
}
},
})));
});
@ -248,19 +383,25 @@ describe("LexItemSet::transitions()", [&]() {
AssertThat(
item_set.transitions(),
Equals(map<CharacterSet, LexItemSet>({
Equals(LexItemSet::TransitionMap({
{
CharacterSet().include_all().exclude('/').exclude('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('\\'),
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
})
{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
}),
PrecedenceRange()
}
},
})));
});
@ -271,25 +412,34 @@ describe("LexItemSet::transitions()", [&]() {
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
});
AssertThat(set1.transitions(), Equals(map<CharacterSet, LexItemSet>({
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
{
CharacterSet().include('a', 'd'),
LexItemSet({
LexItem(Symbol(1), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('e', 'f'),
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
},
{
CharacterSet().include('g', 'i'),
LexItemSet({
LexItem(Symbol(2), blank()),
})
{
LexItemSet({
LexItem(Symbol(2), blank()),
}),
PrecedenceRange()
}
},
})));
});

View file

@ -8,69 +8,147 @@ using namespace build_tables;
START_TEST
describe("parse_item_set_transitions(ParseItemSet, SyntaxGrammar)", [&]() {
it("computes the closure of the new item sets", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(1), 0, AssociativityNone, 103},
{Symbol(13, true), 0, AssociativityNone, 104},
})
describe("ParseItem::completion_status()", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13), 0, AssociativityNone, 103},
{Symbol(14, true), 4, AssociativityLeft, 104},
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(2), 0, AssociativityNone, 105},
{Symbol(14, true), 0, AssociativityNone, 106},
})
Production({
{Symbol(15, true), 0, AssociativityNone, 101},
{Symbol(16, true), 0, AssociativityNone, 102},
{Symbol(17, true), 5, AssociativityRight, 104},
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(15, true), 0, AssociativityNone, 105},
})
Production({}),
}),
}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
it("indicates whether the parse item is done, and its associativity and precedence", [&]() {
ParseItem item(Symbol(0), production(0, 0), 3);
AssertThat(item.completion_status().is_done, IsFalse());
AssertThat(item.completion_status().precedence, Equals(0));
AssertThat(item.completion_status().associativity, Equals(AssociativityNone));
item = ParseItem(Symbol(0), production(0, 0), 4);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(4));
AssertThat(item.completion_status().associativity, Equals(AssociativityLeft));
item = ParseItem(Symbol(0), production(0, 1), 3);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(5));
AssertThat(item.completion_status().associativity, Equals(AssociativityRight));
item = ParseItem(Symbol(0), production(0, 2), 0);
AssertThat(item.completion_status().is_done, IsTrue());
AssertThat(item.completion_status().precedence, Equals(0));
AssertThat(item.completion_status().associativity, Equals(AssociativityNone));
});
});
describe("ParseItemSet::transitions())", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable("rule_0", VariableTypeNamed, {
Production({
{Symbol(11, true), 0, AssociativityNone, 101},
{Symbol(12, true), 0, AssociativityNone, 102},
{Symbol(13), 5, AssociativityNone, 103},
{Symbol(14, true), 0, AssociativityNone, 104},
}),
Production({
{Symbol(11, true), 0, AssociativityNone, 105},
{Symbol(12, true), 0, AssociativityNone, 106},
{Symbol(15), 6, AssociativityNone, 107},
})
}, {}, {}};
}),
SyntaxVariable("rule_1", VariableTypeNamed, {
Production({
{Symbol(15), 7, AssociativityNone, 109},
{Symbol(16, true), 0, AssociativityNone, 110},
})
}),
SyntaxVariable("rule_2", VariableTypeNamed, {
Production({
{Symbol(18, true), 0, AssociativityNone, 111},
})
})
}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet set1({
it("computes the ParseItemSet that would occur after consuming each lookahead symbol, along with its precedence", [&]() {
ParseItemSet item_set({
// Two symbols into the first production for rule_0
{
ParseItem(Symbol(0), production(0, 0), 2),
LookaheadSet({ Symbol(16, true) })
LookaheadSet({ Symbol(21, true) })
},
// Two symbols into the second production for rule_0
{
ParseItem(Symbol(0), production(0, 1), 2),
LookaheadSet({ Symbol(21, true) })
},
// At the beginning of the first production for rule_1
{
ParseItem(Symbol(1), production(1, 0), 0),
LookaheadSet({ Symbol(17, true) })
LookaheadSet({ Symbol(22, true) })
},
// At the end of the first production for rule_2
{
ParseItem(Symbol(2), production(2, 0), 1),
LookaheadSet({ Symbol(17, true) })
LookaheadSet({ Symbol(22, true) })
}
});
AssertThat(set1.transitions(), Equals(map<Symbol, ParseItemSet>({
AssertThat(item_set.transitions(), Equals(ParseItemSet::TransitionMap({
// For the first item, symbol 13 is next, with precedence 5.
{
Symbol(1),
ParseItemSet({
{
ParseItem(Symbol(0), production(0, 0), 3),
LookaheadSet({ Symbol(16, true) })
}
})
Symbol(13),
{
ParseItemSet({
{
ParseItem(Symbol(0), production(0, 0), 3),
LookaheadSet({ Symbol(21, true) })
}
}),
PrecedenceRange(5, 5)
}
},
// For the second and third item, symbol 15 is next, with two different
// precedence values.
{
Symbol(2),
ParseItemSet({
{
ParseItem(Symbol(1), production(1, 0), 1),
LookaheadSet({ Symbol(17, true) })
},
})
Symbol(15),
{
ParseItemSet({
{
ParseItem(Symbol(0), production(0, 1), 3),
LookaheadSet({ Symbol(21, true) })
},
{
ParseItem(Symbol(1), production(1, 0), 1),
LookaheadSet({ Symbol(22, true) })
},
}),
PrecedenceRange(6, 7)
}
},
// The third item is at the end of its production: no transitions.
})));
});
});

View file

@ -5,7 +5,6 @@
#include "compiler/syntax_grammar.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/get_metadata.h"
namespace tree_sitter {
@ -110,7 +109,10 @@ ostream &operator<<(ostream &stream, const ProductionStep &step) {
}
ostream &operator<<(ostream &stream, const PrecedenceRange &range) {
return stream << string("{") << to_string(range.min) << string(", ") << to_string(range.max) << string("}");
if (range.empty)
return stream << string("{empty}");
else
return stream << string("{") << to_string(range.min) << string(", ") << to_string(range.max) << string("}");
}
namespace build_tables {
@ -126,7 +128,7 @@ ostream &operator<<(ostream &stream, const LexItemSet &item_set) {
ostream &operator<<(ostream &stream, const ParseItem &item) {
return stream << string("(item variable:") << to_string(item.variable_index)
<< string(" production:") << to_string((size_t)&item.production % 1000)
<< string(" production:") << to_string((size_t)item.production % 1000)
<< string(" step:") << to_string(item.step_index)
<< string(")");
}
@ -135,11 +137,6 @@ std::ostream &operator<<(std::ostream &stream, const ParseItemSet &item_set) {
return stream << item_set.entries;
}
std::ostream &operator<<(std::ostream &stream, const MetadataRange &range) {
return stream << string("{") << to_string(range.min) << string(", ")
<< to_string(range.max) << string("}");
}
std::ostream &operator<<(std::ostream &stream, const LookaheadSet &set) {
return stream << *set.entries;
}

View file

@ -111,14 +111,12 @@ ostream &operator<<(ostream &, const PrecedenceRange &);
namespace build_tables {
struct MetadataRange;
class LexItem;
class LexItemSet;
class ParseItem;
class ParseItemSet;
class LookaheadSet;
ostream &operator<<(ostream &, const MetadataRange &);
ostream &operator<<(ostream &, const LexItem &);
ostream &operator<<(ostream &, const LexItemSet &);
ostream &operator<<(ostream &, const ParseItem &);

View file

@ -29,7 +29,7 @@ describe("expand_tokens", []() {
character({ 'y' }),
character({ 'z' }),
}), {
{PRECEDENCE, 1},
{IS_STRING, 1},
{IS_TOKEN, 1},
}),
i_sym(11),
@ -50,7 +50,7 @@ describe("expand_tokens", []() {
character({ ' ' }),
character({ 946 }),
}), {
{PRECEDENCE, 1},
{IS_STRING, 1},
{IS_TOKEN, 1},
})),
})));

View file

@ -14,7 +14,8 @@ extern const Grammar c = Grammar({
{ "preproc_define", seq({
str("#define"),
sym("identifier"),
token(repeat(choice({ str("\\\n"), pattern(".") }))) }) },
optional(token(prec(-1, repeat1(choice({ str("\\\n"), pattern(".") }))))),
str("\n") }) },
{ "function_definition", seq({
optional(sym("declaration_specifiers")),

File diff suppressed because it is too large Load diff

View file

@ -594,7 +594,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) {
(lookahead == '\n') ||
(lookahead == 'g')))
ADVANCE(29);
ACCEPT_TOKEN(sym_comment);
ACCEPT_TOKEN(sym_regex);
case 28:
if (!((lookahead == 0) ||
(lookahead == '\n')))

View file

@ -6,8 +6,6 @@
#include <utility>
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
@ -51,7 +49,8 @@ class LexTableBuilder {
LexTable build() {
for (ParseState &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs(), false);
LexItemSet item_set =
build_lex_item_set(parse_state.expected_inputs(), false);
parse_state.lex_state_id = add_lex_state(item_set);
}
@ -87,11 +86,11 @@ class LexTableBuilder {
for (const rule_ptr &separator_rule : separator_rules)
result.entries.insert(LexItem(
symbol, rules::Seq::build({
rules::Metadata::build(
separator_rule,
{
{ rules::START_TOKEN, 1 }, { rules::PRECEDENCE, -1 },
}),
rules::Metadata::build(separator_rule,
{
{ rules::START_TOKEN, 1 },
{ rules::PRECEDENCE, -99999 },
}),
rule,
})));
}
@ -120,25 +119,26 @@ class LexTableBuilder {
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const auto &transition : item_set.transitions()) {
const CharacterSet &rule = transition.first;
const LexItemSet &new_item_set = transition.second;
LexStateId new_state_id = add_lex_state(new_item_set);
auto action = LexAction::Advance(
new_state_id, precedence_range_for_item_set(new_item_set));
if (conflict_manager.resolve(action,
lex_table.state(state_id).default_action))
const LexItemSet &new_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
auto current_action = lex_table.state(state_id).default_action;
auto action = LexAction::Advance(-1, precedence);
if (conflict_manager.resolve(action, current_action)) {
action.state_index = add_lex_state(new_item_set);
lex_table.state(state_id).actions[rule] = action;
}
}
}
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
for (const LexItem &item : item_set.entries) {
CompletionStatus completion_status = get_completion_status(item.rule);
LexItem::CompletionStatus completion_status = item.completion_status();
if (completion_status.is_done) {
auto current_action = lex_table.state(state_id).default_action;
auto new_action =
LexAction::Accept(item.lhs, completion_status.precedence);
if (conflict_manager.resolve(new_action, current_action))
lex_table.state(state_id).default_action = new_action;
auto action = LexAction::Accept(item.lhs, completion_status.precedence,
completion_status.is_string);
if (conflict_manager.resolve(action, current_action))
lex_table.state(state_id).default_action = action;
}
}
}
@ -148,16 +148,6 @@ class LexTableBuilder {
if (item.is_token_start())
lex_table.state(state_id).is_token_start = true;
}
PrecedenceRange precedence_range_for_item_set(const LexItemSet &item_set) const {
PrecedenceRange result;
for (const auto &item : item_set.entries) {
auto precedence_range = get_metadata(item.rule, rules::PRECEDENCE);
result.add(precedence_range.min);
result.add(precedence_range.max);
}
return result;
}
};
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {

View file

@ -8,8 +8,6 @@
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_conflict_manager.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/lexical_grammar.h"
#include "compiler/syntax_grammar.h"
@ -99,50 +97,28 @@ class ParseTableBuilder {
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &transition : item_set.transitions()) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
const ParseItemSet &next_item_set = transition.second.first;
const PrecedenceRange &precedence = transition.second.second;
ParseAction *new_action = add_action(
state_id, symbol,
ParseAction::Shift(0, precedence_values_for_item_set(next_item_set)),
item_set);
state_id, symbol, ParseAction::Shift(0, precedence), item_set);
if (new_action)
new_action->state_index = add_parse_state(next_item_set);
}
}
struct CompletionStatus {
bool is_done;
int precedence;
rules::Associativity associativity;
};
CompletionStatus get_completion_status(const ParseItem &item) {
CompletionStatus result = { false, 0, rules::AssociativityNone };
if (item.step_index == item.production->size()) {
result.is_done = true;
if (item.step_index > 0) {
const ProductionStep &last_step =
item.production->at(item.step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}
}
return result;
}
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
const auto &lookahead_symbols = pair.second;
CompletionStatus completion_status = get_completion_status(item);
if (completion_status.is_done) {
ParseItem::CompletionStatus status = item.completion_status();
if (status.is_done) {
ParseAction action =
(item.lhs() == rules::START())
? ParseAction::Accept()
: ParseAction::Reduce(Symbol(item.variable_index), item.step_index,
completion_status.precedence,
completion_status.associativity,
status.precedence, status.associativity,
*item.production);
for (const auto &lookahead_sym : *lookahead_symbols.entries)
@ -319,16 +295,6 @@ class ParseTableBuilder {
return result;
}
PrecedenceRange precedence_values_for_item_set(const ParseItemSet &item_set) {
PrecedenceRange result;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
if (item.step_index > 0)
result.add(item.production->at(item.step_index - 1).precedence);
}
return result;
}
string symbol_name(const rules::Symbol &symbol) const {
if (symbol.is_built_in()) {
if (symbol == rules::ERROR())

View file

@ -1,54 +0,0 @@
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/repeat.h"
namespace tree_sitter {
namespace build_tables {
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
protected:
CompletionStatus apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements) {
CompletionStatus status = apply(element);
if (status.is_done)
return status;
}
return { false, 0, rules::AssociativityNone };
}
CompletionStatus apply_to(const rules::Metadata *rule) {
CompletionStatus result = apply(rule->rule);
if (result.is_done && !result.associativity) {
result.precedence = rule->value_for(rules::PRECEDENCE);
result.associativity =
(rules::Associativity)(rule->value_for(rules::ASSOCIATIVITY));
}
return result;
}
CompletionStatus apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
CompletionStatus apply_to(const rules::Blank *rule) {
return { true, 0, rules::AssociativityNone };
}
CompletionStatus apply_to(const rules::Seq *rule) {
CompletionStatus left_status = apply(rule->left);
if (left_status.is_done)
return apply(rule->right);
else
return { false, 0, rules::AssociativityNone };
}
};
CompletionStatus get_completion_status(const rule_ptr &rule) {
return GetCompletionStatus().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,21 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_GET_COMPLETION_STATUS_H_
#define COMPILER_BUILD_TABLES_GET_COMPLETION_STATUS_H_
#include "tree_sitter/compiler.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
struct CompletionStatus {
bool is_done;
int precedence;
rules::Associativity associativity;
};
CompletionStatus get_completion_status(const rule_ptr &);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_GET_COMPLETION_STATUS_H_

View file

@ -1,67 +0,0 @@
#include "compiler/build_tables/get_metadata.h"
#include <utility>
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/choice.h"
#include "compiler/build_tables/rule_can_be_blank.h"
namespace tree_sitter {
namespace build_tables {
using std::pair;
MetadataRange get_metadata(const rule_ptr &rule, rules::MetadataKey key) {
class GetMetadata : public rules::RuleFn<pair<MetadataRange, bool>> {
rules::MetadataKey metadata_key;
public:
explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
protected:
pair<MetadataRange, bool> apply_to(const rules::Metadata *rule) {
pair<MetadataRange, bool> result = apply(rule->rule);
if (result.second) {
return result;
} else {
int value = rule->value_for(metadata_key);
return { { value, value }, value != 0 };
}
}
pair<MetadataRange, bool> apply_to(const rules::Choice *rule) {
pair<MetadataRange, bool> result(MetadataRange(0, 0), false);
for (const auto &element : rule->elements)
merge_result(&result, apply(element));
return result;
}
pair<MetadataRange, bool> apply_to(const rules::Seq *rule) {
pair<MetadataRange, bool> result = apply(rule->left);
if (rule_can_be_blank(rule->left))
merge_result(&result, apply(rule->right));
return result;
}
pair<MetadataRange, bool> apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
private:
void merge_result(pair<MetadataRange, bool> *left,
const pair<MetadataRange, bool> &right) {
if (right.second) {
if (!left->second || right.first.min < left->first.min)
left->first.min = right.first.min;
if (!left->second || right.first.max > left->first.max)
left->first.max = right.first.max;
left->second = true;
}
}
};
return GetMetadata(key).apply(rule).first;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,27 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_GET_METADATA_H_
#define COMPILER_BUILD_TABLES_GET_METADATA_H_
#include "compiler/rules/metadata.h"
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
struct MetadataRange {
MetadataRange() : min(0), max(0) {}
MetadataRange(int min, int max) : min(min), max(max) {}
bool operator==(const MetadataRange &other) const {
return min == other.min && max == other.max;
}
int min;
int max;
};
MetadataRange get_metadata(const rule_ptr &, rules::MetadataKey);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_GET_METADATA_H_

View file

@ -28,12 +28,19 @@ bool LexConflictManager::resolve(const LexAction &new_action,
return true;
else if (new_precedence < old_precedence)
return false;
else if (new_action.is_string && !old_action.is_string)
return true;
else if (old_action.is_string && !new_action.is_string)
return false;
else
return new_action.symbol.index < old_action.symbol.index;
}
case LexActionTypeAdvance:
return true;
if (old_precedence > new_action.precedence_range.max)
return false;
else
return true;
default:
return false;

View file

@ -1,8 +1,13 @@
#include "compiler/build_tables/lex_item.h"
#include <unordered_set>
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/lex_item_transitions.h"
#include "compiler/build_tables/rule_can_be_blank.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/metadata.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
namespace tree_sitter {
namespace build_tables {
@ -22,7 +27,68 @@ bool LexItem::operator==(const LexItem &other) const {
}
bool LexItem::is_token_start() const {
return get_metadata(rule, rules::START_TOKEN).max > 0;
class IsTokenStart : public rules::RuleFn<bool> {
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) ||
(rule_can_be_blank(rule->left) && apply(rule->right));
}
bool apply_to(const rules::Metadata *rule) {
return (rule->value_for(rules::START_TOKEN) > 0) || apply(rule->rule);
}
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
};
return IsTokenStart().apply(rule);
}
LexItem::CompletionStatus LexItem::completion_status() const {
class GetCompletionStatus : public rules::RuleFn<CompletionStatus> {
protected:
CompletionStatus apply_to(const rules::Choice *rule) {
for (const auto &element : rule->elements) {
CompletionStatus status = apply(element);
if (status.is_done)
return status;
}
return { false, 0, false };
}
CompletionStatus apply_to(const rules::Metadata *rule) {
CompletionStatus result = apply(rule->rule);
if (result.is_done) {
if (!result.precedence && rule->value_for(rules::PRECEDENCE))
result.precedence = rule->value_for(rules::PRECEDENCE);
if (rule->value_for(rules::IS_STRING))
result.is_string = true;
}
return result;
}
CompletionStatus apply_to(const rules::Repeat *rule) {
return apply(rule->content);
}
CompletionStatus apply_to(const rules::Blank *rule) {
return { true, 0, false };
}
CompletionStatus apply_to(const rules::Seq *rule) {
CompletionStatus left_status = apply(rule->left);
if (left_status.is_done)
return apply(rule->right);
else
return { false, 0, false };
}
};
return GetCompletionStatus().apply(rule);
}
size_t LexItem::Hash::operator()(const LexItem &item) const {
@ -45,8 +111,8 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
return entries == other.entries;
}
map<CharacterSet, LexItemSet> LexItemSet::transitions() const {
map<CharacterSet, LexItemSet> result;
LexItemSet::TransitionMap LexItemSet::transitions() const {
TransitionMap result;
for (const LexItem &item : entries)
lex_item_transitions(&result, item);
return result;

View file

@ -3,25 +3,35 @@
#include <unordered_set>
#include <map>
#include <utility>
#include <string>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/precedence_range.h"
namespace tree_sitter {
namespace build_tables {
class LexItem {
public:
LexItem(const rules::Symbol &lhs, rule_ptr rule);
bool operator==(const LexItem &other) const;
bool is_token_start() const;
LexItem(const rules::Symbol &, rule_ptr);
rules::Symbol lhs;
rule_ptr rule;
struct CompletionStatus {
bool is_done;
int precedence;
bool is_string;
};
struct Hash {
size_t operator()(const LexItem &) const;
};
bool operator==(const LexItem &other) const;
bool is_token_start() const;
CompletionStatus completion_status() const;
rules::Symbol lhs;
rule_ptr rule;
};
class LexItemSet {
@ -29,14 +39,17 @@ class LexItemSet {
LexItemSet();
explicit LexItemSet(const std::unordered_set<LexItem, LexItem::Hash> &);
bool operator==(const LexItemSet &) const;
std::map<rules::CharacterSet, LexItemSet> transitions() const;
std::unordered_set<LexItem, LexItem::Hash> entries;
typedef std::map<rules::CharacterSet, std::pair<LexItemSet, PrecedenceRange>>
TransitionMap;
struct Hash {
size_t operator()(const LexItemSet &) const;
};
bool operator==(const LexItemSet &) const;
TransitionMap transitions() const;
std::unordered_set<LexItem, LexItem::Hash> entries;
};
} // namespace build_tables

View file

@ -25,8 +25,9 @@ using std::vector;
using rules::CharacterSet;
class LexItemTransitions : public rules::RuleFn<void> {
map<CharacterSet, LexItemSet> *transitions;
LexItemSet::TransitionMap *transitions;
const rules::Symbol &item_lhs;
vector<int> *precedence_stack;
LexItemSet transform_item_set(const LexItemSet &item_set,
function<rule_ptr(rule_ptr)> callback) {
@ -36,23 +37,29 @@ class LexItemTransitions : public rules::RuleFn<void> {
return new_set;
}
void merge_transition(map<CharacterSet, LexItemSet> *transitions,
CharacterSet new_char_set, LexItemSet new_item_set) {
vector<pair<CharacterSet, LexItemSet>> new_entries;
void merge_transition(LexItemSet::TransitionMap *transitions,
CharacterSet new_char_set, LexItemSet new_item_set,
PrecedenceRange new_precedence_range) {
vector<pair<CharacterSet, pair<LexItemSet, PrecedenceRange>>> new_entries;
auto iter = transitions->begin();
while (iter != transitions->end()) {
CharacterSet existing_char_set = iter->first;
LexItemSet &existing_item_set = iter->second;
LexItemSet &existing_item_set = iter->second.first;
PrecedenceRange &existing_precedence_range = iter->second.second;
CharacterSet intersection = existing_char_set.remove_set(new_char_set);
if (!intersection.is_empty()) {
new_char_set.remove_set(intersection);
if (!existing_char_set.is_empty())
new_entries.push_back({ existing_char_set, existing_item_set });
new_entries.push_back(
{ existing_char_set,
{ existing_item_set, existing_precedence_range } });
existing_item_set.entries.insert(new_item_set.entries.begin(),
new_item_set.entries.end());
new_entries.push_back({ intersection, existing_item_set });
existing_precedence_range.add(new_precedence_range);
new_entries.push_back(
{ intersection, { existing_item_set, existing_precedence_range } });
transitions->erase(iter++);
} else {
iter++;
@ -62,14 +69,22 @@ class LexItemTransitions : public rules::RuleFn<void> {
transitions->insert(new_entries.begin(), new_entries.end());
if (!new_char_set.is_empty())
transitions->insert({ new_char_set, new_item_set });
transitions->insert(
{ new_char_set, { new_item_set, new_precedence_range } });
}
PrecedenceRange merge_precedence(PrecedenceRange precedence) {
if (precedence.empty && !precedence_stack->empty())
precedence.add(precedence_stack->back());
return precedence;
}
void apply_to(const CharacterSet *rule) {
merge_transition(transitions, *rule,
LexItemSet({
LexItem(item_lhs, rules::Blank::build()),
}));
}),
PrecedenceRange());
}
void apply_to(const rules::Choice *rule) {
@ -78,52 +93,68 @@ class LexItemTransitions : public rules::RuleFn<void> {
}
void apply_to(const rules::Seq *rule) {
map<CharacterSet, LexItemSet> left_transitions;
LexItemTransitions(&left_transitions, item_lhs).apply(rule->left);
for (const auto &pair : left_transitions)
LexItemSet::TransitionMap left_transitions;
LexItemTransitions(&left_transitions, this).apply(rule->left);
for (const auto &pair : left_transitions) {
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->right });
}));
}), merge_precedence(pair.second.second));
}
if (rule_can_be_blank(rule->left))
apply(rule->right);
}
void apply_to(const rules::Repeat *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->content);
LexItemSet::TransitionMap content_transitions;
LexItemTransitions(&content_transitions, this).apply(rule->content);
for (const auto &pair : content_transitions) {
merge_transition(transitions, pair.first, pair.second);
merge_transition(transitions, pair.first, pair.second.first,
merge_precedence(pair.second.second));
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Seq::build({ item_rule, rule->copy() });
}));
}), merge_precedence(pair.second.second));
}
}
void apply_to(const rules::Metadata *rule) {
map<CharacterSet, LexItemSet> content_transitions;
LexItemTransitions(&content_transitions, item_lhs).apply(rule->rule);
LexItemSet::TransitionMap content_transitions;
precedence_stack->push_back(rule->value_for(rules::PRECEDENCE));
LexItemTransitions(&content_transitions, this).apply(rule->rule);
for (const auto &pair : content_transitions)
merge_transition(
transitions, pair.first,
transform_item_set(pair.second, [&rule](rule_ptr item_rule) {
transform_item_set(pair.second.first, [&rule](rule_ptr item_rule) {
return rules::Metadata::build(item_rule, rule->value);
}));
}), pair.second.second);
precedence_stack->pop_back();
}
public:
LexItemTransitions(map<CharacterSet, LexItemSet> *transitions,
const rules::Symbol &item_lhs)
: transitions(transitions), item_lhs(item_lhs) {}
LexItemTransitions(LexItemSet::TransitionMap *transitions,
const rules::Symbol &item_lhs,
vector<int> *precedence_stack)
: transitions(transitions),
item_lhs(item_lhs),
precedence_stack(precedence_stack) {}
LexItemTransitions(LexItemSet::TransitionMap *transitions,
LexItemTransitions *other)
: transitions(transitions),
item_lhs(other->item_lhs),
precedence_stack(other->precedence_stack) {}
};
void lex_item_transitions(map<CharacterSet, LexItemSet> *transitions,
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &item) {
LexItemTransitions(transitions, item.lhs).apply(item.rule);
vector<int> precedence_stack;
LexItemTransitions(transitions, item.lhs, &precedence_stack).apply(item.rule);
}
} // namespace build_tables

View file

@ -1,8 +1,6 @@
#ifndef COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#define COMPILER_BUILD_TABLES_LEX_ITEM_TRANSITIONS_H_
#include <map>
#include <set>
#include "compiler/rules/character_set.h"
#include "compiler/rules/symbol.h"
#include "compiler/build_tables/lex_item.h"
@ -10,7 +8,7 @@
namespace tree_sitter {
namespace build_tables {
void lex_item_transitions(std::map<rules::CharacterSet, LexItemSet> *transitions,
void lex_item_transitions(LexItemSet::TransitionMap *transitions,
const LexItem &);
} // namespace build_tables

View file

@ -41,14 +41,23 @@ Symbol ParseItem::lhs() const {
return Symbol(variable_index);
}
bool ParseItem::is_done() const {
return step_index == production->size();
ParseItem::CompletionStatus ParseItem::completion_status() const {
CompletionStatus result = { false, 0, rules::AssociativityNone };
if (step_index == production->size()) {
result.is_done = true;
if (step_index > 0) {
const ProductionStep &last_step = production->at(step_index - 1);
result.precedence = last_step.precedence;
result.associativity = last_step.associativity;
}
}
return result;
}
int ParseItem::precedence() const {
if (production->empty())
return 0;
else if (is_done())
else if (completion_status().is_done)
return production->back().precedence;
else
return production->at(step_index).precedence;
@ -57,7 +66,7 @@ int ParseItem::precedence() const {
rules::Associativity ParseItem::associativity() const {
if (production->empty())
return rules::AssociativityNone;
else if (is_done())
else if (completion_status().is_done)
return production->back().associativity;
else
return production->at(step_index).associativity;
@ -66,7 +75,7 @@ rules::Associativity ParseItem::associativity() const {
pair<int, int> ParseItem::remaining_rule_id() const {
if (production->empty())
return { -2, -1 };
else if (is_done())
else if (completion_status().is_done)
return { production->back().associativity, production->back().precedence };
else
return { -1, production->at(step_index).rule_id };
@ -104,8 +113,8 @@ size_t ParseItemSet::Hash::operator()(const ParseItemSet &item_set) const {
return result;
}
map<Symbol, ParseItemSet> ParseItemSet::transitions() const {
map<Symbol, ParseItemSet> result;
ParseItemSet::TransitionMap ParseItemSet::transitions() const {
ParseItemSet::TransitionMap result;
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
@ -114,9 +123,11 @@ map<Symbol, ParseItemSet> ParseItemSet::transitions() const {
size_t step = item.step_index + 1;
Symbol symbol = item.production->at(item.step_index).symbol;
int precedence = item.production->at(item.step_index).precedence;
ParseItem new_item(item.lhs(), *item.production, step);
result[symbol].entries[new_item] = lookahead_symbols;
result[symbol].first.entries[new_item] = lookahead_symbols;
result[symbol].second.add(precedence);
}
return result;

View file

@ -7,6 +7,7 @@
#include "compiler/rules/symbol.h"
#include "compiler/rules/metadata.h"
#include "compiler/syntax_grammar.h"
#include "compiler/precedence_range.h"
namespace tree_sitter {
namespace build_tables {
@ -15,21 +16,27 @@ class ParseItem {
public:
ParseItem(const rules::Symbol &, const Production &, unsigned int);
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
std::pair<int, int> remaining_rule_id() const;
bool is_done() const;
int precedence() const;
rules::Associativity associativity() const;
int variable_index;
const Production *production;
unsigned int step_index;
struct CompletionStatus {
bool is_done;
int precedence;
rules::Associativity associativity;
};
struct Hash {
size_t operator()(const ParseItem &) const;
};
bool operator==(const ParseItem &other) const;
bool operator<(const ParseItem &other) const;
rules::Symbol lhs() const;
std::pair<int, int> remaining_rule_id() const;
int precedence() const;
rules::Associativity associativity() const;
CompletionStatus completion_status() const;
int variable_index;
const Production *production;
unsigned int step_index;
};
class ParseItemSet {
@ -37,14 +44,17 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
std::map<rules::Symbol, ParseItemSet> transitions() const;
bool operator==(const ParseItemSet &) const;
std::map<ParseItem, LookaheadSet> entries;
typedef std::map<rules::Symbol, std::pair<ParseItemSet, PrecedenceRange>>
TransitionMap;
struct Hash {
size_t operator()(const ParseItemSet &) const;
};
TransitionMap transitions() const;
bool operator==(const ParseItemSet &) const;
std::map<ParseItem, LookaheadSet> entries;
};
} // namespace build_tables

View file

@ -17,24 +17,26 @@ LexAction::LexAction()
precedence_range({ 0, 0 }) {}
LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol,
PrecedenceRange precedence_range)
PrecedenceRange precedence_range, bool is_string)
: type(type),
symbol(symbol),
state_index(state_index),
precedence_range(precedence_range) {}
precedence_range(precedence_range),
is_string(is_string) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 });
return LexAction(LexActionTypeError, -1, Symbol(-1), { 0, 0 }, false);
}
LexAction LexAction::Advance(size_t state_index,
PrecedenceRange precedence_range) {
return LexAction(LexActionTypeAdvance, state_index, Symbol(-1),
precedence_range);
precedence_range, false);
}
LexAction LexAction::Accept(Symbol symbol, int precedence) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence });
LexAction LexAction::Accept(Symbol symbol, int precedence, bool is_string) {
return LexAction(LexActionTypeAccept, -1, symbol, { precedence, precedence },
is_string);
}
bool LexAction::operator==(const LexAction &other) const {

View file

@ -19,11 +19,11 @@ typedef enum {
class LexAction {
LexAction(LexActionType type, size_t state_index, rules::Symbol symbol,
PrecedenceRange precedence_range);
PrecedenceRange precedence_range, bool is_string);
public:
LexAction();
static LexAction Accept(rules::Symbol symbol, int precedence);
static LexAction Accept(rules::Symbol symbol, int precedence, bool is_string);
static LexAction Error();
static LexAction Advance(size_t state_index, PrecedenceRange precedence_range);
bool operator==(const LexAction &action) const;
@ -32,6 +32,7 @@ class LexAction {
rules::Symbol symbol;
size_t state_index;
PrecedenceRange precedence_range;
bool is_string;
};
} // namespace tree_sitter

View file

@ -7,6 +7,9 @@ PrecedenceRange::PrecedenceRange() : min(0), max(0), empty(true) {}
PrecedenceRange::PrecedenceRange(int min, int max)
: min(min), max(max), empty(false) {}
PrecedenceRange::PrecedenceRange(int value)
: min(value), max(value), empty(false) {}
void PrecedenceRange::add(int new_value) {
if (empty) {
min = new_value;
@ -20,6 +23,13 @@ void PrecedenceRange::add(int new_value) {
}
}
void PrecedenceRange::add(const PrecedenceRange &other) {
if (!other.empty) {
add(other.min);
add(other.max);
}
}
bool PrecedenceRange::operator<(const PrecedenceRange &other) const {
if (empty)
return !other.empty;

View file

@ -5,9 +5,11 @@ namespace tree_sitter {
struct PrecedenceRange {
PrecedenceRange();
explicit PrecedenceRange(int value);
PrecedenceRange(int min, int max);
void add(int value);
void add(const PrecedenceRange &);
bool operator==(const PrecedenceRange &other) const;
bool operator<(const PrecedenceRange &other) const;

View file

@ -47,7 +47,7 @@ class ExpandTokens : public rules::IdentityRuleFn {
return make_shared<rules::Metadata>(
rules::Seq::build(elements),
std::map<rules::MetadataKey, int>({
{ rules::IS_TOKEN, 1 }, { rules::PRECEDENCE, 1 },
{ rules::IS_TOKEN, 1 }, { rules::IS_STRING, 1 },
}));
}

View file

@ -18,8 +18,9 @@ enum Associativity {
enum MetadataKey {
START_TOKEN,
PRECEDENCE,
IS_TOKEN,
ASSOCIATIVITY,
IS_TOKEN,
IS_STRING,
};
class Metadata : public Rule {