Start work on external tokens
This commit is contained in:
parent
46854cc274
commit
c966af0412
47 changed files with 723 additions and 417 deletions
|
|
@ -48,6 +48,11 @@ typedef struct {
|
|||
bool fragile : 1;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_tokens;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
|
|
@ -64,8 +69,15 @@ typedef struct TSLanguage {
|
|||
const TSSymbolMetadata *symbol_metadata;
|
||||
const unsigned short *parse_table;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const TSStateId *lex_states;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
const TSSymbol *external_token_symbol_map;
|
||||
const bool *external_token_lists;
|
||||
struct {
|
||||
void * (*create)();
|
||||
bool (*scan)(TSLexer *, const bool *symbol_whitelist);
|
||||
void (*destroy)(void *);
|
||||
} external_scanner;
|
||||
} TSLanguage;
|
||||
|
||||
/*
|
||||
|
|
@ -146,21 +158,22 @@ typedef struct TSLanguage {
|
|||
{ .type = TSParseActionTypeAccept } \
|
||||
}
|
||||
|
||||
#define EXPORT_LANGUAGE(language_name) \
|
||||
static TSLanguage language = { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_states = ts_lex_states, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
}; \
|
||||
\
|
||||
const TSLanguage *language_name() { \
|
||||
return &language; \
|
||||
}
|
||||
|
||||
#define GET_LANGUAGE(...) \
|
||||
static TSLanguage language = { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_modes = ts_lex_modes, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
.external_token_lists = (const bool *)ts_external_token_lists, \
|
||||
.external_token_symbol_map = ts_external_token_symbol_map, \
|
||||
.external_scanner = {__VA_ARGS__} \
|
||||
}; \
|
||||
return &language \
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@
|
|||
'src/compiler/rules/character_range.cc',
|
||||
'src/compiler/rules/character_set.cc',
|
||||
'src/compiler/rules/choice.cc',
|
||||
'src/compiler/rules/external_token.cc',
|
||||
'src/compiler/rules/metadata.cc',
|
||||
'src/compiler/rules/named_symbol.cc',
|
||||
'src/compiler/rules/pattern.cc',
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ describe("recovery_tokens(rule)", []() {
|
|||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@ START_TEST
|
|||
describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
||||
LexConflictManager conflict_manager;
|
||||
bool update;
|
||||
Symbol sym1(0, true);
|
||||
Symbol sym2(1, true);
|
||||
Symbol sym3(2, true);
|
||||
Symbol sym4(3, true);
|
||||
Symbol sym1(0, Symbol::Terminal);
|
||||
Symbol sym2(1, Symbol::Terminal);
|
||||
Symbol sym3(2, Symbol::Terminal);
|
||||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ START_TEST
|
|||
describe("LexItem", []() {
|
||||
describe("completion_status()", [&]() {
|
||||
it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
|
||||
LexItem item1(Symbol(0, true), character({ 'a', 'b', 'c' }));
|
||||
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
|
||||
AssertThat(item1.completion_status().is_done, IsFalse());
|
||||
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item1.completion_status().is_string, IsFalse());
|
||||
|
|
@ -23,7 +23,7 @@ describe("LexItem", []() {
|
|||
params.precedence = 3;
|
||||
params.has_precedence = true;
|
||||
params.is_string = 1;
|
||||
LexItem item2(Symbol(0, true), choice({
|
||||
LexItem item2(Symbol(0, Symbol::Terminal), choice({
|
||||
metadata(blank(), params),
|
||||
character({ 'a', 'b', 'c' })
|
||||
}));
|
||||
|
|
@ -32,7 +32,7 @@ describe("LexItem", []() {
|
|||
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
|
||||
AssertThat(item2.completion_status().is_string, IsTrue());
|
||||
|
||||
LexItem item3(Symbol(0, true), repeat(character({ ' ', '\t' })));
|
||||
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
|
||||
AssertThat(item3.completion_status().is_done, IsTrue());
|
||||
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item3.completion_status().is_string, IsFalse());
|
||||
|
|
@ -43,7 +43,7 @@ describe("LexItem", []() {
|
|||
describe("LexItemSet::transitions()", [&]() {
|
||||
it("handles single characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -53,7 +53,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -67,7 +67,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
params.is_main_token = true;
|
||||
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), metadata(character({ 'x' }), params)),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -77,7 +77,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), metadata(blank(), params)),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
true
|
||||
|
|
@ -88,7 +88,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
|
|
@ -103,7 +103,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('w'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
|
|
@ -118,7 +118,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences with nested precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
prec(3, seq({
|
||||
character({ 'v' }),
|
||||
prec(4, seq({
|
||||
|
|
@ -140,7 +140,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
// The outer precedence is now 'active', because we are within its
|
||||
// contained rule.
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
|
|
@ -168,7 +168,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
Transition{
|
||||
// The inner precedence is now 'active'
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
active_prec(4, character({ 'x' })),
|
||||
character({ 'y' }) })),
|
||||
|
|
@ -193,7 +193,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, character({ 'y' })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
|
|
@ -216,7 +216,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'z' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -227,7 +227,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences where the left hand side can be blank", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ 'x' }),
|
||||
blank(),
|
||||
|
|
@ -244,7 +244,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
|
|
@ -257,7 +257,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'z' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -268,7 +268,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles blanks", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
});
|
||||
|
||||
AssertThat(item_set.transitions(), IsEmpty());
|
||||
|
|
@ -276,11 +276,11 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), repeat1(seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))),
|
||||
LexItem(Symbol(2), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -290,14 +290,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'b' }),
|
||||
repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))
|
||||
})),
|
||||
LexItem(Symbol(1), character({ 'b' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -307,8 +307,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -319,7 +319,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats with precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -329,8 +329,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1), active_prec(-1, blank())),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
|
||||
}),
|
||||
PrecedenceRange(-1),
|
||||
false
|
||||
|
|
@ -341,7 +341,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
active_prec(2, seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
|
|
@ -360,7 +360,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'b'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
}),
|
||||
PrecedenceRange(2),
|
||||
false
|
||||
|
|
@ -370,8 +370,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(2, 3),
|
||||
false
|
||||
|
|
@ -381,7 +381,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -392,7 +392,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
|
|
@ -411,7 +411,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a').include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -421,8 +421,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('b', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -433,7 +433,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
|
|
@ -452,7 +452,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include_all().exclude('/').exclude('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -462,8 +462,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -474,8 +474,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles different items with overlapping character sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
|
||||
|
|
@ -483,7 +483,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -493,8 +493,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -504,7 +504,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('g', 'i'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
|
|||
|
|
@ -27,23 +27,23 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(11, true), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, true), 0, AssociativityNone},
|
||||
{Symbol(13, true), 0, AssociativityNone},
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, true), 0, AssociativityNone},
|
||||
{Symbol(15, true), 0, AssociativityNone},
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
}, {}, {}};
|
||||
|
|
@ -54,8 +54,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 }),
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -64,20 +64,20 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 })
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -86,14 +86,14 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(11, true), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, true), 0, AssociativityNone},
|
||||
{Symbol(13, true), 0, AssociativityNone},
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}),
|
||||
|
|
@ -105,8 +105,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 }),
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -115,16 +115,16 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 })
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -133,13 +133,13 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
}, { str(" ") }, { { Symbol(1), Symbol(2) } }});
|
||||
}, { str(" ") }, { { Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) } }});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0), Symbol(1) },
|
||||
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -171,7 +171,7 @@ describe("extract_tokens", []() {
|
|||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
|
||||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
|
|
@ -186,7 +186,7 @@ describe("extract_tokens", []() {
|
|||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
{ Symbol(3, Symbol::Terminal) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
|
|
|
|||
|
|
@ -36,19 +36,19 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(3), 102, AssociativityRight},
|
||||
{Symbol(4), 101, AssociativityLeft},
|
||||
{Symbol(6), 0, AssociativityNone},
|
||||
{Symbol(7), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
|
||||
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(5), 101, AssociativityLeft},
|
||||
{Symbol(6), 0, AssociativityNone},
|
||||
{Symbol(7), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
|
@ -65,8 +65,8 @@ describe("flatten_grammar", []() {
|
|||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 101, AssociativityLeft},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ describe("flatten_grammar", []() {
|
|||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 101, AssociativityLeft},
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ START_TEST
|
|||
describe("Repeat", []() {
|
||||
describe("constructing repeats", [&]() {
|
||||
it("doesn't create redundant repeats", [&]() {
|
||||
auto sym = make_shared<Symbol>(1);
|
||||
auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
|
||||
auto repeat = Repeat::build(sym);
|
||||
auto outer_repeat = Repeat::build(repeat);
|
||||
|
||||
|
|
|
|||
13
spec/fixtures/external_scanners/external_scan.c
vendored
Normal file
13
spec/fixtures/external_scanners/external_scan.c
vendored
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
#include <stdbool.h>
|
||||
|
||||
void *ts_language_external_scanner_example_external_scanner_create() {
|
||||
puts("HELLO FROM EXTERNAL SCANNER");
|
||||
return 0;
|
||||
}
|
||||
|
||||
bool ts_language_external_scanner_example_external_scanner_scan() {
|
||||
return true;
|
||||
}
|
||||
|
||||
void ts_language_external_scanner_example_external_scanner_destroy() {
|
||||
}
|
||||
|
|
@ -67,7 +67,8 @@ static int get_modified_time(const string &path) {
|
|||
|
||||
const TSLanguage *load_language(const string &source_filename,
|
||||
const string &lib_filename,
|
||||
const string &language_name) {
|
||||
const string &language_name,
|
||||
string external_scanner_path = "") {
|
||||
string language_function_name = "ts_language_" + language_name;
|
||||
string header_dir = getenv("PWD") + string("/include");
|
||||
int source_mtime = get_modified_time(source_filename);
|
||||
|
|
@ -119,7 +120,9 @@ const TSLanguage *load_language(const string &source_filename,
|
|||
return language_fn();
|
||||
}
|
||||
|
||||
const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) {
|
||||
const TSLanguage *load_compile_result(const string &name,
|
||||
const TSCompileResult &compile_result,
|
||||
string external_scanner_path) {
|
||||
if (compile_result.error_type != TSCompileErrorTypeNone) {
|
||||
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
|
||||
return nullptr;
|
||||
|
|
@ -135,7 +138,7 @@ const TSLanguage *load_compile_result(const string &name, const TSCompileResult
|
|||
source_file << compile_result.code;
|
||||
source_file.close();
|
||||
|
||||
const TSLanguage *language = load_language(source_filename, lib_filename, name);
|
||||
auto language = load_language(source_filename, lib_filename, name, external_scanner_path);
|
||||
free(compile_result.code);
|
||||
return language;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
#include "tree_sitter/runtime.h"
|
||||
#include <string>
|
||||
|
||||
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &);
|
||||
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
|
||||
std::string external_scanner_path = "");
|
||||
const TSLanguage *get_test_language(const std::string &language_name);
|
||||
|
||||
#endif // HELPERS_LOAD_LANGUAGE_H_
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace tree_sitter {
|
|||
using std::ostream;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using rules::Symbol;
|
||||
|
||||
rule_ptr character(const set<uint32_t> &ranges) {
|
||||
return character(ranges, true);
|
||||
|
|
@ -28,11 +29,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr i_sym(size_t index) {
|
||||
return make_shared<rules::Symbol>(index);
|
||||
return make_shared<Symbol>(index, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
rule_ptr i_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, true);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) {
|
||||
|
|
|
|||
|
|
@ -10,16 +10,7 @@ namespace tree_sitter {
|
|||
|
||||
ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << " rules: " << grammar.rules;
|
||||
return stream << string("}>");
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -507,6 +507,71 @@ describe("compile_grammar", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("external scanners", [&]() {
|
||||
it("can call out to arbitrary scanner functions during parsing", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "external_scanner_example",
|
||||
|
||||
"externals": [
|
||||
"percent_string",
|
||||
"percent_string_start",
|
||||
"percent_string_end"
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"string": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "EXTERNAL_TOKEN",
|
||||
"name": "percent_string"
|
||||
},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "EXTERNAL_TOKEN",
|
||||
"name": "percent_string_start"
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
{
|
||||
"type": "EXTERNAL_TOKEN",
|
||||
"name": "percent_string_end"
|
||||
}
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\a+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"external_scanner_example",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/external_scan.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "%|hi|");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(string)");
|
||||
|
||||
ts_document_set_input_string(document, "%(1 #{two} three)");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(string (identifier))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is a token", [&]() {
|
||||
it("parses the token", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
|
|
|
|||
|
|
@ -80,10 +80,10 @@ START_TEST
|
|||
|
||||
describe("The Corpus", []() {
|
||||
vector<string> test_languages({
|
||||
"javascript",
|
||||
// "javascript",
|
||||
"json",
|
||||
"c",
|
||||
"cpp",
|
||||
// "c",
|
||||
// "cpp",
|
||||
});
|
||||
|
||||
for (auto &language_name : test_languages) {
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class LexTableBuilder {
|
|||
private:
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
|
||||
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
|
|
@ -112,24 +112,27 @@ class LexTableBuilder {
|
|||
void mark_fragile_tokens() {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(homonym)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(entry.first);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(extension)) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -150,24 +153,27 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
|
||||
LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
|
||||
LexItemSet result;
|
||||
for (const Symbol &symbol : symbols)
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol))
|
||||
for (const rule_ptr &separator_rule : separator_rules)
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
for (const auto &pair : terminals) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_token()) {
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
|
||||
for (const rule_ptr &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (!symbol.is_token)
|
||||
return {};
|
||||
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,10 @@ class ParseTableBuilder {
|
|||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol(0, Symbol::Terminal) :
|
||||
Symbol(0, Symbol::NonTerminal);
|
||||
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
});
|
||||
|
|
@ -63,7 +66,7 @@ class ParseTableBuilder {
|
|||
add_parse_state(ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ END_OF_INPUT().index }),
|
||||
LookaheadSet({ END_OF_INPUT() }),
|
||||
},
|
||||
}));
|
||||
|
||||
|
|
@ -107,21 +110,21 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol::Index index : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(index, true));
|
||||
for (const Symbol symbol : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
if (!error_state.terminal_entries.count(symbol.index)) {
|
||||
error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
|
||||
if (!error_state.terminal_entries.count(symbol)) {
|
||||
error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra());
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, false));
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
|
||||
}
|
||||
|
||||
error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
|
||||
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
|
||||
parse_table.states[0] = error_state;
|
||||
}
|
||||
|
||||
|
|
@ -130,10 +133,10 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
if (symbol.is_token) {
|
||||
error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
|
||||
} else {
|
||||
if (symbol.is_non_terminal()) {
|
||||
error_state->nonterminal_entries[symbol.index] = state;
|
||||
} else {
|
||||
error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -152,9 +155,9 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
map<Symbol::Index, ParseItemSet> terminal_successors;
|
||||
map<Symbol, ParseItemSet> terminal_successors;
|
||||
map<Symbol::Index, ParseItemSet> nonterminal_successors;
|
||||
set<Symbol::Index> lookaheads_with_conflicts;
|
||||
set<Symbol> lookaheads_with_conflicts;
|
||||
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
|
|
@ -168,7 +171,7 @@ class ParseTableBuilder {
|
|||
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
|
||||
|
||||
int precedence = item.precedence();
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
for (Symbol lookahead : *lookahead_symbols.entries) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
|
||||
// Only add the highest-precedence Reduce actions to the parse table.
|
||||
|
|
@ -203,10 +206,10 @@ class ParseTableBuilder {
|
|||
Symbol symbol = item.production->at(item.step_index).symbol;
|
||||
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
|
||||
|
||||
if (symbol.is_token) {
|
||||
terminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
if (symbol.is_non_terminal()) {
|
||||
nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
terminal_successors[symbol].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -214,7 +217,7 @@ class ParseTableBuilder {
|
|||
// Add a Shift action for each possible successor state. Shift actions for
|
||||
// terminal lookaheads can conflict with Reduce actions added previously.
|
||||
for (auto &pair : terminal_successors) {
|
||||
Symbol::Index lookahead = pair.first;
|
||||
Symbol lookahead = pair.first;
|
||||
ParseItemSet &next_item_set = pair.second;
|
||||
ParseStateId next_state_id = add_parse_state(next_item_set);
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
|
|
@ -223,7 +226,7 @@ class ParseTableBuilder {
|
|||
if (!allow_any_conflict) {
|
||||
if (had_existing_action)
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
recovery_states[Symbol(lookahead, true)].add(next_item_set);
|
||||
recovery_states[lookahead].add(next_item_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -234,10 +237,10 @@ class ParseTableBuilder {
|
|||
ParseStateId next_state = add_parse_state(next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
|
||||
if (!allow_any_conflict)
|
||||
recovery_states[Symbol(lookahead, false)].add(next_item_set);
|
||||
recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
|
||||
}
|
||||
|
||||
for (Symbol::Index lookahead : lookaheads_with_conflicts) {
|
||||
for (Symbol lookahead : lookaheads_with_conflicts) {
|
||||
string conflict = handle_conflict(item_set, state_id, lookahead);
|
||||
if (!conflict.empty()) return conflict;
|
||||
}
|
||||
|
|
@ -245,9 +248,9 @@ class ParseTableBuilder {
|
|||
ParseAction shift_extra = ParseAction::ShiftExtra();
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens) {
|
||||
if (!state.terminal_entries.count(extra_symbol.index) ||
|
||||
if (!state.terminal_entries.count(extra_symbol) ||
|
||||
state.has_shift_action() || allow_any_conflict) {
|
||||
parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra);
|
||||
parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +260,6 @@ class ParseTableBuilder {
|
|||
void mark_fragile_actions() {
|
||||
for (ParseState &state : parse_table.states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
const Symbol symbol(entry.first, true);
|
||||
auto &actions = entry.second.actions;
|
||||
|
||||
for (ParseAction &action : actions) {
|
||||
|
|
@ -359,7 +361,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol::Index lookahead) {
|
||||
Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
int reduction_precedence = entry.actions.front().precedence();
|
||||
set<ParseItem> shift_items;
|
||||
|
|
@ -468,7 +470,7 @@ class ParseTableBuilder {
|
|||
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
|
||||
}
|
||||
|
||||
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
|
||||
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
|
||||
description += "\n\n";
|
||||
|
||||
description += "Possible interpretations:\n\n";
|
||||
|
|
@ -487,7 +489,7 @@ class ParseTableBuilder {
|
|||
description += " " + symbol_name(step.symbol);
|
||||
}
|
||||
description += ")";
|
||||
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
|
||||
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
|
||||
description += "\n";
|
||||
}
|
||||
}
|
||||
|
|
@ -564,14 +566,22 @@ class ParseTableBuilder {
|
|||
return "END_OF_INPUT";
|
||||
else
|
||||
return "";
|
||||
} else if (symbol.is_token) {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed)
|
||||
return variable.name;
|
||||
else
|
||||
return "'" + variable.name + "'";
|
||||
} else {
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
|
||||
switch (symbol.type) {
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed)
|
||||
return variable.name;
|
||||
else
|
||||
return "'" + variable.name + "'";
|
||||
}
|
||||
case Symbol::NonTerminal: {
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
case Symbol::External: {
|
||||
return grammar.external_tokens[symbol.index];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ using rules::Symbol;
|
|||
|
||||
LookaheadSet::LookaheadSet() : entries(nullptr) {}
|
||||
|
||||
LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
|
||||
: entries(make_shared<set<Symbol::Index>>(symbols)) {}
|
||||
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
|
||||
: entries(make_shared<set<Symbol>>(symbols)) {}
|
||||
|
||||
bool LookaheadSet::empty() const {
|
||||
return !entries.get() || entries->empty();
|
||||
|
|
@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
|
|||
return *entries == *other.entries;
|
||||
}
|
||||
|
||||
bool LookaheadSet::contains(const Symbol::Index &symbol) const {
|
||||
bool LookaheadSet::contains(const Symbol &symbol) const {
|
||||
return entries->find(symbol) != entries->end();
|
||||
}
|
||||
|
||||
|
|
@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
|||
if (!other.entries.get())
|
||||
return false;
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
entries = make_shared<set<Symbol>>();
|
||||
size_t previous_size = entries->size();
|
||||
entries->insert(other.entries->begin(), other.entries->end());
|
||||
return entries->size() > previous_size;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert(const Symbol::Index &symbol) {
|
||||
bool LookaheadSet::insert(const Symbol &symbol) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
entries = make_shared<set<Symbol>>();
|
||||
return entries->insert(symbol).second;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ namespace build_tables {
|
|||
class LookaheadSet {
|
||||
public:
|
||||
LookaheadSet();
|
||||
explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
|
||||
explicit LookaheadSet(const std::set<rules::Symbol> &);
|
||||
|
||||
bool empty() const;
|
||||
bool operator==(const LookaheadSet &) const;
|
||||
bool contains(const rules::Symbol::Index &) const;
|
||||
bool contains(const rules::Symbol &) const;
|
||||
bool insert_all(const LookaheadSet &);
|
||||
bool insert(const rules::Symbol::Index &);
|
||||
bool insert(const rules::Symbol &);
|
||||
|
||||
std::shared_ptr<std::set<rules::Symbol::Index>> entries;
|
||||
std::shared_ptr<std::set<rules::Symbol>> entries;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
|
|||
}
|
||||
|
||||
Symbol ParseItem::lhs() const {
|
||||
return Symbol(variable_index);
|
||||
return Symbol(variable_index, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
bool ParseItem::is_done() const {
|
||||
|
|
@ -105,38 +105,6 @@ size_t ParseItemSet::unfinished_item_signature() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
ParseItemSet::ActionMap ParseItemSet::actions() const {
|
||||
ParseItemSet::ActionMap result;
|
||||
|
||||
for (const auto &pair : entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
|
||||
if (item.step_index == item.production->size()) {
|
||||
int precedence = item.precedence();
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
Action &action = result.terminal_actions[lookahead];
|
||||
if (precedence > action.completion_precedence) {
|
||||
action.completions.assign({ &item });
|
||||
} else if (precedence == action.completion_precedence) {
|
||||
action.completions.push_back({ &item });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Symbol symbol = item.production->at(item.step_index).symbol;
|
||||
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
|
||||
|
||||
if (symbol.is_token) {
|
||||
result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseItemSet::add(const ParseItemSet &other) {
|
||||
for (const auto &pair : other.entries)
|
||||
entries[pair.first].insert_all(pair.second);
|
||||
|
|
|
|||
|
|
@ -41,16 +41,6 @@ class ParseItemSet {
|
|||
ParseItemSet();
|
||||
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
struct Completion;
|
||||
struct Action;
|
||||
|
||||
struct ActionMap {
|
||||
std::map<rules::Symbol::Index, Action> terminal_actions;
|
||||
std::map<rules::Symbol::Index, ParseItemSet> nonterminal_continuations;
|
||||
};
|
||||
|
||||
ActionMap actions() const;
|
||||
|
||||
bool operator==(const ParseItemSet &) const;
|
||||
void add(const ParseItemSet &);
|
||||
size_t unfinished_item_signature() const;
|
||||
|
|
@ -58,22 +48,6 @@ class ParseItemSet {
|
|||
std::map<ParseItem, LookaheadSet> entries;
|
||||
};
|
||||
|
||||
struct ParseItemSet::Completion {
|
||||
const ParseItem *item;
|
||||
int precedence;
|
||||
rules::Associativity associativity;
|
||||
|
||||
bool operator<(const ParseItemSet::Completion &other) {
|
||||
return precedence < other.precedence;
|
||||
}
|
||||
};
|
||||
|
||||
struct ParseItemSet::Action {
|
||||
ParseItemSet continuation;
|
||||
std::vector<const ParseItem *> completions;
|
||||
int completion_precedence;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
set<Symbol::Index> processed_non_terminals;
|
||||
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, true);
|
||||
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
|
||||
Symbol symbol(i, Symbol::Terminal);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_non_terminals.clear();
|
||||
|
|
@ -42,10 +42,10 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol.index);
|
||||
if (!current_symbol.is_non_terminal()) {
|
||||
first_set.insert(current_symbol);
|
||||
} else if (processed_non_terminals.insert(current_symbol.index).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
for (const Production &production : grammar.variables[current_symbol.index].productions) {
|
||||
if (!production.empty()) {
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
|
|
@ -59,11 +59,11 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
vector<ParseItemSetComponent> components_to_process;
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
|
||||
|
||||
components_to_process.clear();
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
for (const Production &production : grammar.variables[i].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(symbol, production, 0),
|
||||
LookaheadSet(),
|
||||
|
|
@ -87,7 +87,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
|
||||
if (component_is_new) {
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol.is_built_in() || next_symbol.is_token)
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
LookaheadSet next_lookaheads;
|
||||
|
|
@ -102,7 +102,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
propagates_lookaheads = false;
|
||||
}
|
||||
|
||||
for (const Production &production : grammar.productions(next_symbol)) {
|
||||
for (const Production &production : grammar.variables[next_symbol.index].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookaheads,
|
||||
|
|
@ -130,7 +130,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
|||
const LookaheadSet &lookaheads = pair.second;
|
||||
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (!next_symbol.is_token && !next_symbol.is_built_in()) {
|
||||
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
|
|||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol::Index> result;
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
|
|
@ -79,7 +79,7 @@ set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
|||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(i);
|
||||
result.insert(Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace generate_code {
|
||||
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
|
|
@ -22,6 +23,7 @@ using std::string;
|
|||
using std::to_string;
|
||||
using std::vector;
|
||||
using util::escape_char;
|
||||
using rules::Symbol;
|
||||
|
||||
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
|
||||
|
||||
|
|
@ -73,9 +75,8 @@ class CCodeGenerator {
|
|||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
|
||||
vector<set<Symbol::Index>> external_token_id_sets;
|
||||
size_t next_parse_action_list_index;
|
||||
size_t next_in_progress_symbol_list_index;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name, const ParseTable &parse_table,
|
||||
|
|
@ -87,19 +88,25 @@ class CCodeGenerator {
|
|||
lex_table(lex_table),
|
||||
syntax_grammar(syntax_grammar),
|
||||
lexical_grammar(lexical_grammar),
|
||||
next_parse_action_list_index(0),
|
||||
next_in_progress_symbol_list_index(0) {}
|
||||
next_parse_action_list_index(0) {}
|
||||
|
||||
string code() {
|
||||
buffer = "";
|
||||
|
||||
add_includes();
|
||||
add_state_and_symbol_counts();
|
||||
add_warning_pragma();
|
||||
add_stats();
|
||||
add_symbol_enum();
|
||||
add_symbol_names_list();
|
||||
add_symbol_node_types_list();
|
||||
add_symbol_metadata_list();
|
||||
add_lex_function();
|
||||
add_lex_states_list();
|
||||
add_lex_modes_list();
|
||||
|
||||
if (!syntax_grammar.external_tokens.empty())
|
||||
add_external_token_enum();
|
||||
|
||||
add_external_token_symbol_map();
|
||||
add_external_scan_modes_list();
|
||||
add_parse_table();
|
||||
add_parser_export();
|
||||
|
||||
|
|
@ -112,10 +119,17 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_state_and_symbol_counts() {
|
||||
void add_warning_pragma() {
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_stats() {
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
|
||||
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -124,7 +138,7 @@ class CCodeGenerator {
|
|||
indent([&]() {
|
||||
size_t i = 1;
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
const Symbol &symbol = entry.first;
|
||||
if (!symbol.is_built_in()) {
|
||||
line(symbol_id(symbol) + " = " + to_string(i) + ",");
|
||||
i++;
|
||||
|
|
@ -146,11 +160,11 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_symbol_node_types_list() {
|
||||
void add_symbol_metadata_list() {
|
||||
line("static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
const Symbol &symbol = entry.first;
|
||||
line("[" + symbol_id(symbol) + "] = {");
|
||||
indent([&]() {
|
||||
switch (symbol_type(symbol)) {
|
||||
|
|
@ -198,13 +212,80 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_lex_states_list() {
|
||||
line("static TSStateId ts_lex_states[STATE_COUNT] = {");
|
||||
void add_lex_modes_list() {
|
||||
add_external_tokens_id({});
|
||||
|
||||
line("static TSLexMode ts_lex_modes[STATE_COUNT] = {");
|
||||
indent([&]() {
|
||||
size_t state_id = 0;
|
||||
for (const auto &state : parse_table.states)
|
||||
line("[" + to_string(state_id++) + "] = " +
|
||||
to_string(state.lex_state_id) + ",");
|
||||
|
||||
for (const auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {.lex_state = ");
|
||||
add(to_string(state.lex_state_id));
|
||||
|
||||
set<Symbol::Index> external_token_indices;
|
||||
for (const auto &pair : state.terminal_entries) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_external())
|
||||
external_token_indices.insert(symbol.index);
|
||||
}
|
||||
|
||||
if (!external_token_indices.empty())
|
||||
add(", .external_tokens = " + add_external_tokens_id(external_token_indices));
|
||||
add("},");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
string add_external_tokens_id(set<Symbol::Index> external_token_ids) {
|
||||
for (size_t i = 0, n = external_token_id_sets.size(); i < n; i++)
|
||||
if (external_token_id_sets[i] == external_token_ids)
|
||||
return to_string(i);
|
||||
external_token_id_sets.push_back(external_token_ids);
|
||||
return to_string(external_token_id_sets.size() - 1);
|
||||
}
|
||||
|
||||
void add_external_token_enum() {
|
||||
line("enum {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++)
|
||||
line(external_token_id(i) + ",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_external_token_symbol_map() {
|
||||
line("TSSymbol ts_external_token_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
|
||||
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_external_scan_modes_list() {
|
||||
line("static bool ts_external_token_lists[");
|
||||
add(to_string(external_token_id_sets.size()));
|
||||
add("][EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
size_t i = 0;
|
||||
for (const auto &external_token_ids : external_token_id_sets) {
|
||||
if (!external_token_ids.empty()) {
|
||||
line("[" + to_string(i) + "] = {");
|
||||
indent([&]() {
|
||||
for (Symbol::Index id : external_token_ids) {
|
||||
line("[" + external_token_id(id) + "] = true,");
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
|
|
@ -214,9 +295,6 @@ class CCodeGenerator {
|
|||
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
|
||||
|
||||
indent([&]() {
|
||||
|
|
@ -224,12 +302,12 @@ class CCodeGenerator {
|
|||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : state.nonterminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
|
||||
line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
|
||||
add(to_string(entry.second));
|
||||
add("),");
|
||||
}
|
||||
for (const auto &entry : state.terminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
|
||||
line("[" + symbol_id(entry.first) + "] = ACTIONS(");
|
||||
add(to_string(add_parse_action_list_id(entry.second)));
|
||||
add("),");
|
||||
}
|
||||
|
|
@ -242,12 +320,37 @@ class CCodeGenerator {
|
|||
line();
|
||||
add_parse_action_list();
|
||||
line();
|
||||
line("#pragma GCC diagnostic pop");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_parser_export() {
|
||||
line("EXPORT_LANGUAGE(ts_language_" + name + ");");
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
string external_scanner_name = "ts_language_" + name + "_external_scanner";
|
||||
|
||||
line("void *" + external_scanner_name + "_create();");
|
||||
line("bool " + external_scanner_name + "_scan();");
|
||||
line("void " + external_scanner_name + "_destroy();");
|
||||
line();
|
||||
|
||||
line("const TSLanguage *ts_language_" + name + "() {");
|
||||
indent([&]() {
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
line("GET_LANGUAGE(");
|
||||
indent([&]() {
|
||||
line(external_scanner_name + "_create,");
|
||||
line(external_scanner_name + "_scan,");
|
||||
line(external_scanner_name + "_destroy,");
|
||||
});
|
||||
line(");");
|
||||
}
|
||||
});
|
||||
line("}");
|
||||
} else {
|
||||
line("const TSLanguage *ts_language_" + name + "() {");
|
||||
indent([&]() {
|
||||
line("GET_LANGUAGE();");
|
||||
});
|
||||
line("}");
|
||||
}
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -379,22 +482,13 @@ class CCodeGenerator {
|
|||
return result;
|
||||
}
|
||||
|
||||
size_t add_in_progress_symbol_list_id(const set<rules::Symbol> &symbols) {
|
||||
for (const auto &pair : in_progress_symbols) {
|
||||
if (pair.second == symbols) {
|
||||
return pair.first;
|
||||
}
|
||||
}
|
||||
|
||||
size_t result = next_in_progress_symbol_list_index;
|
||||
in_progress_symbols.push_back({ result, symbols });
|
||||
next_in_progress_symbol_list_index += 1 + symbols.size();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
string external_token_id(Symbol::Index index) {
|
||||
return "ts_external_token_" + syntax_grammar.external_tokens[index];
|
||||
}
|
||||
|
||||
string symbol_id(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "ts_builtin_sym_end";
|
||||
|
||||
|
|
@ -411,25 +505,31 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) {
|
||||
string symbol_name(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "END";
|
||||
return entry_for_symbol(symbol).first;
|
||||
}
|
||||
|
||||
VariableType symbol_type(const rules::Symbol &symbol) {
|
||||
VariableType symbol_type(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return VariableTypeHidden;
|
||||
return entry_for_symbol(symbol).second;
|
||||
}
|
||||
|
||||
pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol.is_token) {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
} else {
|
||||
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
pair<string, VariableType> entry_for_symbol(const Symbol &symbol) {
|
||||
switch (symbol.type) {
|
||||
case Symbol::NonTerminal: {
|
||||
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::External: {
|
||||
return { syntax_grammar.external_tokens[symbol.index], VariableTypeAnonymous };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ struct Grammar {
|
|||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -119,6 +119,16 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
}
|
||||
}
|
||||
|
||||
if (type == "EXTERNAL_TOKEN") {
|
||||
json_value token_name_json = rule_json->operator[]("name");
|
||||
if (token_name_json.type != json_string) {
|
||||
error_message = "External token name must be a string";
|
||||
goto error;
|
||||
}
|
||||
|
||||
return { external_token(token_name_json.u.string.ptr), "" };
|
||||
}
|
||||
|
||||
if (type == "PATTERN") {
|
||||
json_value value_json = rule_json->operator[]("value");
|
||||
if (value_json.type == json_string) {
|
||||
|
|
@ -210,7 +220,7 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
string error_message;
|
||||
string name;
|
||||
Grammar grammar;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
|
||||
|
||||
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
|
||||
char parse_error[json_error_max];
|
||||
|
|
@ -302,6 +312,25 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
}
|
||||
|
||||
external_tokens_json = grammar_json->operator[]("externals");
|
||||
if (external_tokens_json.type != json_none) {
|
||||
if (external_tokens_json.type != json_array) {
|
||||
error_message = "External tokens must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
|
||||
json_value *token_name_json = external_tokens_json.u.array.values[i];
|
||||
if (token_name_json->type != json_string) {
|
||||
error_message = "External token values must be strings";
|
||||
goto error;
|
||||
}
|
||||
|
||||
string token_name = token_name_json->u.string.ptr;
|
||||
grammar.external_tokens.push_back(token_name);
|
||||
}
|
||||
}
|
||||
|
||||
json_value_free(grammar_json);
|
||||
return { name, grammar, "" };
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/parse_table.h"
|
||||
#include <string>
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ ParseAction::ParseAction()
|
|||
extra(false),
|
||||
fragile(false),
|
||||
state_index(-1),
|
||||
symbol(Symbol(-1)),
|
||||
symbol(rules::NONE()),
|
||||
consumed_symbol_count(0),
|
||||
production(nullptr) {}
|
||||
|
||||
|
|
@ -43,11 +44,11 @@ ParseAction ParseAction::Accept() {
|
|||
}
|
||||
|
||||
ParseAction ParseAction::Shift(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr);
|
||||
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Recover(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0,
|
||||
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
|
|
@ -150,9 +151,7 @@ bool ParseState::has_shift_action() const {
|
|||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &entry : terminal_entries)
|
||||
result.insert(Symbol(entry.first, true));
|
||||
for (auto &entry : nonterminal_entries)
|
||||
result.insert(Symbol(entry.first, false));
|
||||
result.insert(entry.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -182,33 +181,24 @@ ParseStateId ParseTable::add_state() {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
states[state_id].terminal_entries[index].actions.clear();
|
||||
return add_terminal_action(state_id, index, action);
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
Symbol lookahead,
|
||||
ParseAction action) {
|
||||
Symbol symbol(index, true);
|
||||
if (action.type == ParseActionTypeShift && action.extra)
|
||||
symbols[symbol].extra = true;
|
||||
symbols[lookahead].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
symbols[lookahead].structural = true;
|
||||
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[index];
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
|
||||
entry.actions.push_back(action);
|
||||
return *entry.actions.rbegin();
|
||||
}
|
||||
|
||||
void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
Symbol::Index lookahead,
|
||||
ParseStateId next_state_id) {
|
||||
Symbol symbol(index, false);
|
||||
symbols[symbol].structural = true;
|
||||
states[state_id].nonterminal_entries[index] = next_state_id;
|
||||
symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
|
||||
states[state_id].nonterminal_entries[lookahead] = next_state_id;
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
|
|
@ -226,12 +216,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(index);
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
|
|
@ -242,25 +232,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
}
|
||||
}
|
||||
|
||||
set<Symbol::Index> symbols_to_merge;
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(index)) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(index);
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol::Index &index : symbols_to_merge)
|
||||
state.terminal_entries[index] = other.terminal_entries.find(index)->second;
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class ParseState {
|
|||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
bool has_shift_action() const;
|
||||
|
||||
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
size_t shift_actions_signature;
|
||||
|
|
@ -91,15 +91,14 @@ class ParseTable {
|
|||
public:
|
||||
std::set<rules::Symbol> all_symbols() const;
|
||||
ParseStateId add_state();
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
|
||||
ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
||||
std::set<rules::Symbol::Index> mergeable_symbols;
|
||||
std::set<rules::Symbol> mergeable_symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index);
|
||||
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(
|
||||
Variable(helper_rule_name, VariableTypeAuxiliary,
|
||||
|
|
@ -65,6 +65,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
|||
result.variables = grammar.variables;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
ExpandRepeats expander(result.variables.size());
|
||||
for (auto &variable : result.variables)
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/string.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
#include "compiler/rules/external_token.h"
|
||||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/prepare_grammar/token_description.h"
|
||||
#include "compiler/prepare_grammar/is_token.h"
|
||||
|
|
@ -38,7 +39,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
map<Symbol, Symbol> replacements;
|
||||
|
||||
Symbol replace_symbol(const Symbol &symbol) {
|
||||
if (symbol.is_built_in() || symbol.is_token)
|
||||
if (!symbol.is_non_terminal())
|
||||
return symbol;
|
||||
|
||||
auto replacement_pair = replacements.find(symbol);
|
||||
|
|
@ -49,7 +50,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index)
|
||||
new_index--;
|
||||
return Symbol(new_index);
|
||||
return Symbol(new_index, Symbol::NonTerminal);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -60,14 +61,14 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].rule->operator==(*input)) {
|
||||
token_usage_counts[i]++;
|
||||
return make_shared<Symbol>(i, true);
|
||||
return make_shared<Symbol>(i, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr rule = input->copy();
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back(Variable(token_description(rule), entry_type, rule));
|
||||
token_usage_counts.push_back(1);
|
||||
return make_shared<Symbol>(index, true);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::String *rule) {
|
||||
|
|
@ -78,6 +79,10 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
return apply_to_token(rule, VariableTypeAuxiliary);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::ExternalToken *rule) {
|
||||
return apply_to_token(rule, VariableTypeAuxiliary);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::Metadata *rule) {
|
||||
if (rule->params.is_token)
|
||||
return apply_to_token(rule->rule.get(), VariableTypeAuxiliary);
|
||||
|
|
@ -90,7 +95,7 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
static CompileError ubiq_token_err(const string &message) {
|
||||
static CompileError extra_token_error(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
}
|
||||
|
|
@ -122,11 +127,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
size_t i = 0;
|
||||
for (const Variable &variable : processed_variables) {
|
||||
auto symbol = variable.rule->as<Symbol>();
|
||||
if (symbol && symbol->is_token && !symbol->is_built_in() &&
|
||||
extractor.token_usage_counts[symbol->index] == 1) {
|
||||
if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
|
||||
lexical_grammar.variables[symbol->index].type = variable.type;
|
||||
lexical_grammar.variables[symbol->index].name = variable.name;
|
||||
symbol_replacer.replacements.insert({ Symbol(i), *symbol });
|
||||
symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
|
||||
} else {
|
||||
syntax_grammar.variables.push_back(variable);
|
||||
}
|
||||
|
|
@ -158,7 +162,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
bool used_elsewhere_in_grammar = false;
|
||||
for (const Variable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule->operator==(*rule)) {
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, true));
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
|
||||
used_elsewhere_in_grammar = true;
|
||||
}
|
||||
i++;
|
||||
|
|
@ -175,17 +179,20 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
auto symbol = rule->as<Symbol>();
|
||||
if (!symbol)
|
||||
return make_tuple(syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(rule->to_string()));
|
||||
extra_token_error(rule->to_string()));
|
||||
|
||||
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
|
||||
if (!new_symbol.is_token)
|
||||
if (!new_symbol.is_token()) {
|
||||
return make_tuple(
|
||||
syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
|
||||
extra_token_error(syntax_grammar.variables[new_symbol.index].name));
|
||||
}
|
||||
|
||||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
}
|
||||
|
||||
syntax_grammar.external_tokens = grammar.external_tokens;
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &gr
|
|||
SyntaxGrammar result;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
bool is_start = true;
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -16,6 +15,7 @@ struct InitialSyntaxGrammar {
|
|||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@
|
|||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/external_token.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -17,6 +18,7 @@ using std::vector;
|
|||
using std::set;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
|
@ -30,17 +32,34 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
return result;
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::ExternalToken *rule) {
|
||||
auto result = symbol_for_external_token(rule->name);
|
||||
if (!result.get()) {
|
||||
missing_external_token_name = rule->name;
|
||||
return rules::Blank::build();
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return make_shared<Symbol>(i, Symbol::NonTerminal);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
std::shared_ptr<rules::Symbol> symbol_for_external_token(string name) {
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
|
||||
if (grammar.external_tokens[i] == name)
|
||||
return make_shared<rules::Symbol>(i, Symbol::External);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
string missing_rule_name;
|
||||
string missing_external_token_name;
|
||||
};
|
||||
|
||||
CompileError missing_rule_error(string rule_name) {
|
||||
|
|
@ -48,14 +67,22 @@ CompileError missing_rule_error(string rule_name) {
|
|||
"Undefined rule '" + rule_name + "'");
|
||||
}
|
||||
|
||||
CompileError missing_external_token_error(string token_name) {
|
||||
return CompileError(TSCompileErrorTypeUndefinedSymbol,
|
||||
"Undefined external token '" + token_name + "'");
|
||||
}
|
||||
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
||||
InternedGrammar result;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
InternSymbols interner(grammar);
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
if (!interner.missing_external_token_name.empty())
|
||||
return { result, missing_external_token_error(interner.missing_external_token_name) };
|
||||
|
||||
result.variables.push_back(Variable(
|
||||
pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
|
|
@ -66,6 +93,8 @@ pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
|||
auto new_rule = interner.apply(rule);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
if (!interner.missing_external_token_name.empty())
|
||||
return { result, missing_external_token_error(interner.missing_external_token_name) };
|
||||
result.extra_tokens.push_back(new_rule);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ struct InternedGrammar {
|
|||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ rule_ptr prec_left(int precedence, const rule_ptr &);
|
|||
rule_ptr prec_right(const rule_ptr &);
|
||||
rule_ptr prec_right(int precedence, const rule_ptr &);
|
||||
rule_ptr token(const rule_ptr &rule);
|
||||
rule_ptr external_token(const std::string &);
|
||||
|
||||
} // namespace std
|
||||
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
|
||||
Symbol END_OF_INPUT() {
|
||||
return Symbol(-1, true);
|
||||
return Symbol(-1, Symbol::Terminal);
|
||||
}
|
||||
|
||||
Symbol START() {
|
||||
return Symbol(-2);
|
||||
return Symbol(-2, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
Symbol NONE() {
|
||||
return Symbol(-3);
|
||||
return Symbol(-3, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
39
src/compiler/rules/external_token.cc
Normal file
39
src/compiler/rules/external_token.cc
Normal file
|
|
@ -0,0 +1,39 @@
|
|||
#include "compiler/rules/external_token.h"
|
||||
#include <string>
|
||||
#include "compiler/rules/visitor.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::string;
|
||||
using std::hash;
|
||||
|
||||
ExternalToken::ExternalToken(const string &name) : name(name) {}
|
||||
|
||||
rule_ptr ExternalToken::build(const string &name) {
|
||||
return std::make_shared<ExternalToken>(name);
|
||||
}
|
||||
|
||||
bool ExternalToken::operator==(const Rule &rule) const {
|
||||
auto other = rule.as<ExternalToken>();
|
||||
return other && other->name == name;
|
||||
}
|
||||
|
||||
size_t ExternalToken::hash_code() const {
|
||||
return hash<string>()(name);
|
||||
}
|
||||
|
||||
rule_ptr ExternalToken::copy() const {
|
||||
return std::make_shared<ExternalToken>(*this);
|
||||
}
|
||||
|
||||
string ExternalToken::to_string() const {
|
||||
return string("(sym '") + name + "')";
|
||||
}
|
||||
|
||||
void ExternalToken::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
27
src/compiler/rules/external_token.h
Normal file
27
src/compiler/rules/external_token.h
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef COMPILER_RULES_EXTERNAL_TOKEN_H_
|
||||
#define COMPILER_RULES_EXTERNAL_TOKEN_H_
|
||||
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
class ExternalToken : public Rule {
|
||||
public:
|
||||
explicit ExternalToken(const std::string &);
|
||||
static rule_ptr build(const std::string &);
|
||||
|
||||
bool operator==(const Rule &other) const;
|
||||
size_t hash_code() const;
|
||||
rule_ptr copy() const;
|
||||
std::string to_string() const;
|
||||
void accept(Visitor *visitor) const;
|
||||
|
||||
std::string name;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_EXTERNAL_TOKEN_H_
|
||||
|
|
@ -13,6 +13,7 @@
|
|||
#include "compiler/rules/pattern.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/external_token.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -105,4 +106,8 @@ rule_ptr token(const rule_ptr &rule) {
|
|||
return metadata(rule, params);
|
||||
}
|
||||
|
||||
rule_ptr external_token(const string &name) {
|
||||
return rules::ExternalToken::build(name);
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -11,12 +11,10 @@ using std::string;
|
|||
using std::to_string;
|
||||
using util::hash_combine;
|
||||
|
||||
Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
|
||||
|
||||
Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
|
||||
Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.is_token == is_token);
|
||||
return (other.index == index) && (other.type == type);
|
||||
}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
|
|
@ -27,7 +25,7 @@ bool Symbol::operator==(const Rule &rule) const {
|
|||
size_t Symbol::hash_code() const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, index);
|
||||
hash_combine(&result, is_token);
|
||||
hash_combine<int>(&result, type);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -36,14 +34,20 @@ rule_ptr Symbol::copy() const {
|
|||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
string name = is_token ? "token" : "sym";
|
||||
return "(" + name + " " + std::to_string(index) + ")";
|
||||
switch (type) {
|
||||
case Symbol::Terminal:
|
||||
return "(terminal " + std::to_string(index) + ")";
|
||||
case Symbol::NonTerminal:
|
||||
return "(non-terminal " + std::to_string(index) + ")";
|
||||
case Symbol::External:
|
||||
return "(external " + std::to_string(index) + ")";
|
||||
}
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (is_token && !other.is_token)
|
||||
if (type < other.type)
|
||||
return true;
|
||||
if (!is_token && other.is_token)
|
||||
if (other.type < type)
|
||||
return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
|
@ -56,6 +60,18 @@ bool Symbol::is_built_in() const {
|
|||
return is_built_in(index);
|
||||
}
|
||||
|
||||
bool Symbol::is_token() const {
|
||||
return type == Symbol::Terminal;
|
||||
}
|
||||
|
||||
bool Symbol::is_external() const {
|
||||
return type == Symbol::External;
|
||||
}
|
||||
|
||||
bool Symbol::is_non_terminal() const {
|
||||
return type == Symbol::NonTerminal;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,9 +11,13 @@ class Symbol : public Rule {
|
|||
public:
|
||||
typedef int Index;
|
||||
|
||||
typedef enum {
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
External,
|
||||
} Type;
|
||||
|
||||
explicit Symbol(Index index);
|
||||
Symbol(Index index, bool is_token);
|
||||
Symbol(Index index, Type type);
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
|
@ -26,9 +30,12 @@ class Symbol : public Rule {
|
|||
bool operator<(const Symbol &other) const;
|
||||
static bool is_built_in(Index);
|
||||
bool is_built_in() const;
|
||||
bool is_token() const;
|
||||
bool is_external() const;
|
||||
bool is_non_terminal() const;
|
||||
|
||||
Index index;
|
||||
bool is_token;
|
||||
Type type;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ class String;
|
|||
class Symbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
class ExternalToken;
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
|
|
@ -29,6 +30,7 @@ class Visitor {
|
|||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const NamedSymbol *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual void visit(const ExternalToken *rule) = 0;
|
||||
virtual ~Visitor();
|
||||
};
|
||||
|
||||
|
|
@ -86,6 +88,10 @@ class RuleFn : private Visitor {
|
|||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
|
@ -126,6 +132,10 @@ class RuleFn : private Visitor {
|
|||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const ExternalToken *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
|
@ -170,6 +180,9 @@ class RuleFn<void> : private Visitor {
|
|||
virtual void apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
apply_to(rule);
|
||||
|
|
@ -201,6 +214,9 @@ class RuleFn<void> : private Visitor {
|
|||
void visit(const Symbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const ExternalToken *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ using std::pair;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS;
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
: name(name), productions(productions), type(type) {}
|
||||
|
|
@ -28,13 +26,4 @@ bool ProductionStep::operator==(const ProductionStep &other) const {
|
|||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
const vector<Production> &SyntaxGrammar::productions(
|
||||
const rules::Symbol &symbol) const {
|
||||
if (symbol.is_built_in() || symbol.is_token) {
|
||||
return NO_PRODUCTIONS;
|
||||
} else {
|
||||
return variables[symbol.index].productions;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -33,11 +33,10 @@ struct SyntaxVariable {
|
|||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
|
||||
struct SyntaxGrammar {
|
||||
const std::vector<Production> &productions(const rules::Symbol &) const;
|
||||
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -161,7 +161,7 @@ static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
|
|||
|
||||
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
|
||||
TableEntry *table_entry) {
|
||||
if (tree->first_leaf.lex_state == self->language->lex_states[state])
|
||||
if (tree->first_leaf.lex_state == self->language->lex_modes[state].lex_state)
|
||||
return true;
|
||||
if (!table_entry->is_reusable)
|
||||
return false;
|
||||
|
|
@ -209,7 +209,7 @@ static bool parser__condense_stack(Parser *self) {
|
|||
}
|
||||
|
||||
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
||||
TSStateId start_state = self->language->lex_states[parse_state];
|
||||
TSStateId start_state = self->language->lex_modes[parse_state].lex_state;
|
||||
TSStateId current_state = start_state;
|
||||
Length start_position = self->lexer.current_position;
|
||||
LOG("lex state:%d", start_state);
|
||||
|
|
@ -729,6 +729,9 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
|
|||
LOG("new_parse");
|
||||
}
|
||||
|
||||
if (self->language->external_scanner.create)
|
||||
self->language->external_scanner.create();
|
||||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
ts_stack_clear(self->stack);
|
||||
self->reusable_node = (ReusableNode){ previous_tree, 0 };
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue