Merge pull request #54 from tree-sitter/external-scanners

External scanners
This commit is contained in:
Max Brunsfeld 2017-01-31 11:46:49 -08:00 committed by GitHub
commit 3edb5dbdd9
75 changed files with 2162 additions and 1123 deletions

View file

@ -176,11 +176,11 @@ tokens, like `(` and `+`. This is useful when analyzing the meaning of a documen
#include "tree_sitter/runtime.h"
// Declare the language function that was generated from your grammar.
TSLanguage *ts_language_arithmetic();
TSLanguage *tree_sitter_arithmetic();
int main() {
TSDocument *document = ts_document_new();
ts_document_set_language(document, ts_language_arithmetic());
ts_document_set_language(document, tree_sitter_arithmetic());
ts_document_set_input_string(document, "a + b * 5");
ts_document_parse(document);

View file

@ -40,6 +40,14 @@
"pattern": "^[a-zA-Z_]\\w*$"
}
}
},
"externals": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*$"
}
}
},

View file

@ -10,7 +10,8 @@ typedef enum {
TSCompileErrorTypeInvalidGrammar,
TSCompileErrorTypeInvalidRegex,
TSCompileErrorTypeUndefinedSymbol,
TSCompileErrorTypeInvalidUbiquitousToken,
TSCompileErrorTypeInvalidExtraToken,
TSCompileErrorTypeInvalidExternalToken,
TSCompileErrorTypeLexConflict,
TSCompileErrorTypeParseConflict,
TSCompileErrorTypeEpsilonRule,

View file

@ -12,6 +12,8 @@ extern "C" {
typedef unsigned short TSSymbol;
typedef unsigned short TSStateId;
typedef uint8_t TSExternalTokenState[16];
#define ts_builtin_sym_error ((TSSymbol)-1)
#define ts_builtin_sym_end 0
@ -23,7 +25,7 @@ typedef struct {
} TSSymbolMetadata;
typedef struct {
void (*advance)(void *, TSStateId, bool);
void (*advance)(void *, bool);
int32_t lookahead;
TSSymbol result_symbol;
} TSLexer;
@ -48,6 +50,11 @@ typedef struct {
bool fragile : 1;
} TSParseAction;
typedef struct {
uint16_t lex_state;
uint16_t external_lex_state;
} TSLexMode;
typedef union {
TSParseAction action;
struct {
@ -58,14 +65,26 @@ typedef union {
} TSParseActionEntry;
typedef struct TSLanguage {
uint32_t version;
uint32_t symbol_count;
uint32_t token_count;
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const unsigned short *parse_table;
const TSParseActionEntry *parse_actions;
const TSStateId *lex_states;
const TSLexMode *lex_modes;
bool (*lex_fn)(TSLexer *, TSStateId);
struct {
const bool *states;
const TSSymbol *symbol_map;
void *(*create)();
void (*destroy)(void *);
void (*reset)(void *);
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
bool (*serialize)(void *, TSExternalTokenState);
void (*deserialize)(void *, const TSExternalTokenState);
} external_scanner;
} TSLanguage;
/*
@ -79,14 +98,14 @@ typedef struct TSLanguage {
#define ADVANCE(state_value) \
{ \
lexer->advance(lexer, state_value, false); \
lexer->advance(lexer, false); \
state = state_value; \
goto next_state; \
}
#define SKIP(state_value) \
{ \
lexer->advance(lexer, state_value, true); \
lexer->advance(lexer, true); \
state = state_value; \
goto next_state; \
}
@ -146,21 +165,21 @@ typedef struct TSLanguage {
{ .type = TSParseActionTypeAccept } \
}
#define EXPORT_LANGUAGE(language_name) \
static TSLanguage language = { \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_states = ts_lex_states, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
}; \
\
const TSLanguage *language_name() { \
return &language; \
}
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#ifdef __cplusplus
}

View file

@ -9,6 +9,8 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>
#define TREE_SITTER_LANGUAGE_VERSION 1
typedef unsigned short TSSymbol;
typedef struct TSLanguage TSLanguage;
typedef struct TSDocument TSDocument;
@ -114,6 +116,7 @@ uint32_t ts_document_parse_count(const TSDocument *);
uint32_t ts_language_symbol_count(const TSLanguage *);
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
uint32_t ts_language_version(const TSLanguage *);
#ifdef __cplusplus
}

View file

@ -7,6 +7,7 @@ GRAMMARS=(
json
c
cpp
python
)
for grammar in ${GRAMMARS[@]}; do
@ -21,7 +22,7 @@ for grammar in ${GRAMMARS[@]}; do
(
cd $grammar_dir;
git reset --hard;
git pull origin master;
git fetch origin
git reset --hard origin/master;
)
done

View file

@ -27,7 +27,7 @@ describe("recovery_tokens(rule)", []() {
})),
};
AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
});
});

View file

@ -14,10 +14,10 @@ START_TEST
describe("LexConflictManager::resolve(new_action, old_action)", []() {
LexConflictManager conflict_manager;
bool update;
Symbol sym1(0, true);
Symbol sym2(1, true);
Symbol sym3(2, true);
Symbol sym4(3, true);
Symbol sym1(0, Symbol::Terminal);
Symbol sym2(1, Symbol::Terminal);
Symbol sym3(2, Symbol::Terminal);
Symbol sym4(3, Symbol::Terminal);
LexItemSet item_set({ LexItem(sym4, blank() )});
it("favors advance actions over empty accept token actions", [&]() {

View file

@ -14,7 +14,7 @@ START_TEST
describe("LexItem", []() {
describe("completion_status()", [&]() {
it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
LexItem item1(Symbol(0, true), character({ 'a', 'b', 'c' }));
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
AssertThat(item1.completion_status().is_done, IsFalse());
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
AssertThat(item1.completion_status().is_string, IsFalse());
@ -23,7 +23,7 @@ describe("LexItem", []() {
params.precedence = 3;
params.has_precedence = true;
params.is_string = 1;
LexItem item2(Symbol(0, true), choice({
LexItem item2(Symbol(0, Symbol::Terminal), choice({
metadata(blank(), params),
character({ 'a', 'b', 'c' })
}));
@ -32,7 +32,7 @@ describe("LexItem", []() {
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
AssertThat(item2.completion_status().is_string, IsTrue());
LexItem item3(Symbol(0, true), repeat(character({ ' ', '\t' })));
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
AssertThat(item3.completion_status().is_done, IsTrue());
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
AssertThat(item3.completion_status().is_string, IsFalse());
@ -43,7 +43,7 @@ describe("LexItem", []() {
describe("LexItemSet::transitions()", [&]() {
it("handles single characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
});
AssertThat(
@ -53,7 +53,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
@ -67,7 +67,7 @@ describe("LexItemSet::transitions()", [&]() {
params.is_main_token = true;
LexItemSet item_set({
LexItem(Symbol(1), metadata(character({ 'x' }), params)),
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
});
AssertThat(
@ -77,7 +77,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1), metadata(blank(), params)),
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
}),
PrecedenceRange(),
true
@ -88,7 +88,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'w' }),
character({ 'x' }),
character({ 'y' }),
@ -103,7 +103,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('w'),
Transition{
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'x' }),
character({ 'y' }),
character({ 'z' }),
@ -118,7 +118,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences with nested precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
prec(3, seq({
character({ 'v' }),
prec(4, seq({
@ -140,7 +140,7 @@ describe("LexItemSet::transitions()", [&]() {
// The outer precedence is now 'active', because we are within its
// contained rule.
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
prec(4, seq({
character({ 'w' }),
@ -168,7 +168,7 @@ describe("LexItemSet::transitions()", [&]() {
Transition{
// The inner precedence is now 'active'
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, seq({
active_prec(4, character({ 'x' })),
character({ 'y' }) })),
@ -193,7 +193,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
active_prec(3, character({ 'y' })),
character({ 'z' }),
})),
@ -216,7 +216,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
}),
PrecedenceRange(3),
false
@ -227,7 +227,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles sequences where the left hand side can be blank", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ 'x' }),
blank(),
@ -244,7 +244,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('x'),
Transition{
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'y' }),
character({ 'z' }),
})),
@ -257,7 +257,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('y'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ 'z' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
}),
PrecedenceRange(),
false
@ -268,7 +268,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles blanks", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), blank()),
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
});
AssertThat(item_set.transitions(), IsEmpty());
@ -276,11 +276,11 @@ describe("LexItemSet::transitions()", [&]() {
it("handles repeats", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), repeat1(seq({
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))),
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
});
AssertThat(
@ -290,14 +290,14 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
character({ 'b' }),
repeat1(seq({
character({ 'a' }),
character({ 'b' }),
}))
})),
LexItem(Symbol(1), character({ 'b' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
}),
PrecedenceRange(),
false
@ -307,8 +307,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('c'),
Transition{
LexItemSet({
LexItem(Symbol(2), repeat1(character({ 'c' }))),
LexItem(Symbol(2), blank()),
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
@ -319,7 +319,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles repeats with precedence", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' }))))
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
});
AssertThat(
@ -329,8 +329,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a'),
Transition{
LexItemSet({
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' })))),
LexItem(Symbol(1), active_prec(-1, blank())),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
}),
PrecedenceRange(-1),
false
@ -341,7 +341,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between overlapping character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
active_prec(2, seq({
character({ 'a', 'b', 'c', 'd' }),
character({ 'x' }),
@ -360,7 +360,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a', 'b'),
Transition{
LexItemSet({
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
}),
PrecedenceRange(2),
false
@ -370,8 +370,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('c', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
}),
PrecedenceRange(2, 3),
false
@ -381,7 +381,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
}),
PrecedenceRange(3),
false
@ -392,7 +392,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between a subset and a superset of characters", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), choice({
LexItem(Symbol(1, Symbol::NonTerminal), choice({
seq({
character({ 'b', 'c', 'd' }),
character({ 'x' }),
@ -411,7 +411,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a').include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ 'y' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
}),
PrecedenceRange(),
false
@ -421,8 +421,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('b', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ 'x' })),
LexItem(Symbol(1), character({ 'y' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
}),
PrecedenceRange(),
false
@ -433,7 +433,7 @@ describe("LexItemSet::transitions()", [&]() {
it("handles choices between whitelisted and blacklisted character sets", [&]() {
LexItemSet item_set({
LexItem(Symbol(1), seq({
LexItem(Symbol(1, Symbol::NonTerminal), seq({
choice({
character({ '/' }, false),
seq({
@ -452,7 +452,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include_all().exclude('/').exclude('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
}),
PrecedenceRange(),
false
@ -462,8 +462,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('\\'),
Transition{
LexItemSet({
LexItem(Symbol(1), character({ '/' })),
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
}),
PrecedenceRange(),
false
@ -474,8 +474,8 @@ describe("LexItemSet::transitions()", [&]() {
it("handles different items with overlapping character sets", [&]() {
LexItemSet set1({
LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
});
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
@ -483,7 +483,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('a', 'd'),
Transition{
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
@ -493,8 +493,8 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('e', 'f'),
Transition{
LexItemSet({
LexItem(Symbol(1), blank()),
LexItem(Symbol(2), blank()),
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false
@ -504,7 +504,7 @@ describe("LexItemSet::transitions()", [&]() {
CharacterSet().include('g', 'i'),
Transition{
LexItemSet({
LexItem(Symbol(2), blank()),
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
}),
PrecedenceRange(),
false

View file

@ -27,26 +27,26 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable("rule0", VariableTypeNamed, {
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(11, true), 0, AssociativityNone},
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
}),
}),
SyntaxVariable("rule1", VariableTypeNamed, {
Production({
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13, true), 0, AssociativityNone},
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
}),
Production({
{Symbol(2), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
})
}),
SyntaxVariable("rule2", VariableTypeNamed, {
Production({
{Symbol(14, true), 0, AssociativityNone},
{Symbol(15, true), 0, AssociativityNone},
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
})
}),
}, {}, {}};
}, {}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
@ -54,8 +54,8 @@ describe("ParseItemSetBuilder", []() {
ParseItemSet item_set({
{
ParseItem(Symbol(0), production(0, 0), 0),
LookaheadSet({ 10 }),
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
}
});
@ -64,20 +64,20 @@ describe("ParseItemSetBuilder", []() {
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0), production(0, 0), 0),
LookaheadSet({ 10 })
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
},
{
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(1), production(1, 0), 0),
LookaheadSet({ 11 })
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(1), production(1, 1), 0),
LookaheadSet({ 11 })
},
{
ParseItem(Symbol(2), production(2, 0), 0),
LookaheadSet({ 11 })
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
})));
});
@ -86,18 +86,18 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable("rule0", VariableTypeNamed, {
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(11, true), 0, AssociativityNone},
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
}),
}),
SyntaxVariable("rule1", VariableTypeNamed, {
Production({
{Symbol(12, true), 0, AssociativityNone},
{Symbol(13, true), 0, AssociativityNone},
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
}),
Production({})
}),
}, {}, {}};
}, {}, {}, {}};
auto production = [&](int variable_index, int production_index) -> const Production & {
return grammar.variables[variable_index].productions[production_index];
@ -105,8 +105,8 @@ describe("ParseItemSetBuilder", []() {
ParseItemSet item_set({
{
ParseItem(Symbol(0), production(0, 0), 0),
LookaheadSet({ 10 }),
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
}
});
@ -115,16 +115,16 @@ describe("ParseItemSetBuilder", []() {
AssertThat(item_set, Equals(ParseItemSet({
{
ParseItem(Symbol(0), production(0, 0), 0),
LookaheadSet({ 10 })
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
LookaheadSet({ Symbol(10, Symbol::Terminal) })
},
{
ParseItem(Symbol(1), production(1, 0), 0),
LookaheadSet({ 11 })
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
{
ParseItem(Symbol(1), production(1, 1), 0),
LookaheadSet({ 11 })
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
LookaheadSet({ Symbol(11, Symbol::Terminal) })
},
})));
});

View file

@ -13,7 +13,7 @@ describe("expand_repeats", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);
@ -32,7 +32,7 @@ describe("expand_repeats", []() {
i_token(10),
repeat1(i_token(11)),
})),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);
@ -54,7 +54,7 @@ describe("expand_repeats", []() {
i_token(10),
repeat1(i_token(11))
})),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);
@ -80,7 +80,7 @@ describe("expand_repeats", []() {
i_token(3),
repeat1(i_token(4))
})),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);
@ -106,7 +106,7 @@ describe("expand_repeats", []() {
repeat1(i_token(10)),
repeat1(i_token(11)),
})),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);
@ -130,7 +130,7 @@ describe("expand_repeats", []() {
InitialSyntaxGrammar grammar{{
Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
}, {}, {}};
}, {}, {}, {}};
auto result = expand_repeats(grammar);

View file

@ -5,6 +5,7 @@
#include "compiler/prepare_grammar/extract_tokens.h"
#include "helpers/rule_helpers.h"
#include "helpers/equals_pointer.h"
#include "helpers/stream_methods.h"
START_TEST
@ -28,7 +29,7 @@ describe("extract_tokens", []() {
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
}, {}, {}});
}, {}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
@ -91,7 +92,7 @@ describe("extract_tokens", []() {
i_sym(0),
str("ab"),
})),
}, {}, {}});
}, {}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
@ -110,7 +111,7 @@ describe("extract_tokens", []() {
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
Variable("rule_B", VariableTypeNamed, str("cd")),
Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
}, {}, {}});
}, {}, {}, {}});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
LexicalGrammar &lexical_grammar = get<1>(result);
@ -129,17 +130,26 @@ describe("extract_tokens", []() {
});
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
auto result = extract_tokens(InternedGrammar{{
Variable("rule_A", VariableTypeNamed, str("ok")),
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
}, { str(" ") }, { { Symbol(1), Symbol(2) } }});
auto result = extract_tokens(InternedGrammar{
{
Variable("rule_A", VariableTypeNamed, str("ok")),
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
},
{
str(" ")
},
{
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
},
{}
});
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
{ Symbol(0), Symbol(1) },
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
})));
});
@ -150,7 +160,7 @@ describe("extract_tokens", []() {
}, {
str("y"),
pattern("\\s+"),
}, {}});
}, {}, {}});
AssertThat(get<2>(result), Equals(CompileError::none()));
@ -167,11 +177,11 @@ describe("extract_tokens", []() {
Variable("rule_B", VariableTypeNamed, str("y")),
}, {
str("y"),
}, {}});
}, {}, {}});
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
});
it("updates extra symbols according to the new symbol numbers", [&]() {
@ -181,12 +191,12 @@ describe("extract_tokens", []() {
Variable("rule_C", VariableTypeNamed, str("z")),
}, {
i_sym(2),
}, {}});
}, {}, {}});
AssertThat(get<2>(result), Equals(CompileError::none()));
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
{ Symbol(3, true) },
{ Symbol(3, Symbol::Terminal) },
})));
AssertThat(get<1>(result).separators, IsEmpty());
@ -196,11 +206,11 @@ describe("extract_tokens", []() {
auto result = extract_tokens(InternedGrammar{{
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
}, { i_sym(1) }, {}});
}, { i_sym(1) }, {}, {}});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
CompileError(TSCompileErrorTypeInvalidExtraToken,
"Not a token: rule_B")));
});
@ -208,14 +218,34 @@ describe("extract_tokens", []() {
auto result = extract_tokens(InternedGrammar{{
Variable("rule_A", VariableTypeNamed, str("x")),
Variable("rule_B", VariableTypeNamed, str("y")),
}, { choice({ i_sym(1), blank() }) }, {}});
}, { choice({ i_sym(1), blank() }) }, {}, {}});
AssertThat(get<2>(result), !Equals(CompileError::none()));
AssertThat(get<2>(result), Equals(
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: (choice (sym 1) (blank))")));
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExtraToken,
"Not a token: (choice (non-terminal 1) (blank))"
)));
});
});
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
auto result = extract_tokens(InternedGrammar{
{
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
},
{},
{},
{
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
}
});
AssertThat(get<2>(result), Equals(CompileError(
TSCompileErrorTypeInvalidExternalToken,
"Name 'rule_A' cannot be used for both an external token and a non-terminal rule"
)));
});
});
END_TEST

View file

@ -36,19 +36,19 @@ describe("flatten_grammar", []() {
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(2), 101, AssociativityLeft},
{Symbol(3), 102, AssociativityRight},
{Symbol(4), 101, AssociativityLeft},
{Symbol(6), 0, AssociativityNone},
{Symbol(7), 0, AssociativityNone},
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
}),
Production({
{Symbol(1), 0, AssociativityNone},
{Symbol(2), 101, AssociativityLeft},
{Symbol(5), 101, AssociativityLeft},
{Symbol(6), 0, AssociativityNone},
{Symbol(7), 0, AssociativityNone},
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
})
})))
});
@ -65,8 +65,8 @@ describe("flatten_grammar", []() {
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 101, AssociativityLeft},
{Symbol(2), 101, AssociativityLeft},
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
})
})))
@ -80,7 +80,7 @@ describe("flatten_grammar", []() {
AssertThat(result.productions, Equals(vector<Production>({
Production({
{Symbol(1), 101, AssociativityLeft},
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
})
})))
});

View file

@ -3,8 +3,10 @@
#include "compiler/grammar.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "helpers/equals_pointer.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
START_TEST
@ -17,7 +19,7 @@ describe("intern_symbols", []() {
{ "x", choice({ sym("y"), sym("_z") }) },
{ "y", sym("_z") },
{ "_z", str("stuff") }
}, {}, {}};
}, {}, {}, {}};
auto result = intern_symbols(grammar);
@ -33,7 +35,7 @@ describe("intern_symbols", []() {
it("returns an error", []() {
Grammar grammar{{
{ "x", sym("y") },
}, {}, {}};
}, {}, {}, {}};
auto result = intern_symbols(grammar);
@ -48,7 +50,7 @@ describe("intern_symbols", []() {
{ "z", str("stuff") }
}, {
sym("z")
}, {}};
}, {}, {}};
auto result = intern_symbols(grammar);
@ -56,6 +58,32 @@ describe("intern_symbols", []() {
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
});
it("records any rule names that match external token names", [&]() {
Grammar grammar{{
{ "x", choice({ sym("y"), sym("z") }) },
{ "y", sym("z") },
{ "z", str("stuff") }
}, {}, {}, {
"w",
"z"
}};
auto result = intern_symbols(grammar);
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>({
{
"w",
VariableTypeNamed,
rules::NONE()
},
{
"z",
VariableTypeNamed,
Symbol(2, Symbol::NonTerminal)
}
})))
});
});
END_TEST

View file

@ -9,7 +9,7 @@ START_TEST
describe("Repeat", []() {
describe("constructing repeats", [&]() {
it("doesn't create redundant repeats", [&]() {
auto sym = make_shared<Symbol>(1);
auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
auto repeat = Repeat::build(sym);
auto outer_repeat = Repeat::build(repeat);

View file

@ -0,0 +1,29 @@
==========================================
errors in if statements
==========================================
if a is:
print b
print c
---
(module
(if_statement (identifier) (ERROR)
(print_statement (identifier))
(print_statement (identifier))))
==========================================
errors in function definitions
==========================================
def a()::
b
c
---
(module
(function_definition (identifier) (parameters) (ERROR)
(expression_statement (identifier))
(expression_statement (identifier))))

View file

@ -0,0 +1,42 @@
#include <tree_sitter/parser.h>
enum {
COMMENT,
};
void *tree_sitter_extra_external_tokens_external_scanner_create() {
return NULL;
}
void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
}
bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
bool tree_sitter_extra_external_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
while (lexer->lookahead == ' ') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == '#') {
lexer->advance(lexer, false);
while (lexer->lookahead != '\n') {
lexer->advance(lexer, false);
}
lexer->result_symbol = COMMENT;
return true;
}
return false;
}
void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
}

View file

@ -0,0 +1,118 @@
#include <stdbool.h>
#include <tree_sitter/parser.h>
enum {
percent_string,
percent_string_start,
percent_string_end
};
typedef struct {
int32_t open_delimiter;
int32_t close_delimiter;
uint32_t depth;
} Scanner;
void *tree_sitter_external_scanner_example_external_scanner_create() {
Scanner *scanner = malloc(sizeof(Scanner));
*scanner = (Scanner){
.open_delimiter = 0,
.close_delimiter = 0,
.depth = 0
};
return scanner;
}
bool tree_sitter_external_scanner_example_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
Scanner *scanner = payload;
if (whitelist[percent_string]) {
while (lexer->lookahead == ' ' ||
lexer->lookahead == '\t' ||
lexer->lookahead == '\n') {
lexer->advance(lexer, true);
}
if (lexer->lookahead != '%') return false;
lexer->advance(lexer, false);
switch (lexer->lookahead) {
case '(':
scanner->open_delimiter = '(';
scanner->close_delimiter = ')';
scanner->depth = 1;
break;
case '[':
scanner->open_delimiter = '[';
scanner->close_delimiter = ']';
scanner->depth = 1;
break;
case '{':
scanner->open_delimiter = '{';
scanner->close_delimiter = '}';
scanner->depth = 1;
break;
default:
return false;
}
lexer->advance(lexer, false);
for (;;) {
if (scanner->depth == 0) {
lexer->result_symbol = percent_string;
return true;
}
if (lexer->lookahead == scanner->open_delimiter) {
scanner->depth++;
} else if (lexer->lookahead == scanner->close_delimiter) {
scanner->depth--;
} else if (lexer->lookahead == '#') {
lexer->advance(lexer, false);
if (lexer->lookahead == '{') {
lexer->advance(lexer, false);
lexer->result_symbol = percent_string_start;
return true;
}
}
lexer->advance(lexer, false);
}
} else if (whitelist[percent_string_end]) {
if (lexer->lookahead != '}') return false;
lexer->advance(lexer, false);
for (;;) {
if (scanner->depth == 0) {
lexer->result_symbol = percent_string_end;
return true;
}
if (lexer->lookahead == scanner->open_delimiter) {
scanner->depth++;
} else if (lexer->lookahead == scanner->close_delimiter) {
scanner->depth--;
}
lexer->advance(lexer, false);
}
}
return false;
}
void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
}
bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
free(payload);
}

View file

@ -0,0 +1,63 @@
#include <stdbool.h>
#include <tree_sitter/parser.h>
enum {
STRING,
LINE_BREAK
};
void *tree_sitter_shared_external_tokens_external_scanner_create() {
return NULL;
}
void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
}
bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
return true;
}
void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
}
bool tree_sitter_shared_external_tokens_external_scanner_scan(
void *payload, TSLexer *lexer, const bool *whitelist) {
// If a line-break is a valid lookahead token, only skip spaces.
if (whitelist[LINE_BREAK]) {
while (lexer->lookahead == ' ') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == '\n') {
lexer->advance(lexer, false);
lexer->result_symbol = LINE_BREAK;
return true;
}
}
// If a line-break is not a valid lookahead token, skip line breaks as well
// as spaces.
if (whitelist[STRING]) {
while (lexer->lookahead == ' ' || lexer->lookahead == '\n') {
lexer->advance(lexer, true);
}
if (lexer->lookahead == '\'') {
lexer->advance(lexer, false);
while (lexer->lookahead != '\'') {
lexer->advance(lexer, false);
}
lexer->advance(lexer, false);
lexer->result_symbol = STRING;
return true;
}
}
return false;
}
void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
}

12
spec/helpers/dedent.h Normal file
View file

@ -0,0 +1,12 @@
#include "compiler/util/string_helpers.h"
#include <string>
static std::string dedent(std::string input) {
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
std::string whitespace = "\n" + std::string(indent_level, ' ');
tree_sitter::util::str_replace(&input, whitespace, "\n");
return input.substr(
input.find_first_not_of("\n "),
input.find_last_not_of("\n ") + 1
);
}

View file

@ -28,10 +28,11 @@ const char *libcompiler_path =
"out/Test/libcompiler.a";
#endif
static std::string run_cmd(const char *cmd, const char *args[]) {
static std::string run_command(const char *cmd, const char *args[]) {
int child_pid = fork();
if (child_pid < 0)
if (child_pid < 0) {
return "fork failed";
}
if (child_pid == 0) {
close(0);
@ -39,7 +40,6 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
dup2(2, 1);
dup2(1, 2);
execvp(cmd, (char * const * )args);
return "";
}
int status;
@ -47,12 +47,16 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
waitpid(child_pid, &status, 0);
} while (!WIFEXITED(status));
if (WEXITSTATUS(status) == 0)
if (WEXITSTATUS(status) == 0) {
return "";
else
} else {
return "command failed";
}
}
return "";
static bool file_exists(const string &path) {
struct stat file_stat;
return stat(path.c_str(), &file_stat) == 0;
}
static int get_modified_time(const string &path) {
@ -67,46 +71,46 @@ static int get_modified_time(const string &path) {
const TSLanguage *load_language(const string &source_filename,
const string &lib_filename,
const string &language_name) {
string language_function_name = "ts_language_" + language_name;
const string &language_name,
string external_scanner_filename = "") {
string language_function_name = "tree_sitter_" + language_name;
string header_dir = getenv("PWD") + string("/include");
int source_mtime = get_modified_time(source_filename);
int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h");
int lib_mtime = get_modified_time(lib_filename);
int external_scanner_mtime = get_modified_time(external_scanner_filename);
if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime) {
string obj_filename = lib_filename + ".o";
const char *compiler_name = getenv("CC");
if (!compiler_name) {
compiler_name = "gcc";
}
if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime ||
lib_mtime < external_scanner_mtime) {
const char *compiler_name = getenv("CXX");
if (!compiler_name) compiler_name = "c++";
const char *compile_argv[] = {
compiler_name,
"-x", "c",
"-fPIC",
"-g",
"-I", header_dir.c_str(),
"-c", source_filename.c_str(),
"-o", obj_filename.c_str(),
NULL
};
string compile_error = run_cmd("gcc", compile_argv);
if (!compile_error.empty()) {
AssertThat(string(compile_error), IsEmpty());
return nullptr;
}
const char *link_argv[] = {
vector<const char *> compile_args = {
compiler_name,
"-shared",
"-Wl", obj_filename.c_str(),
"-fPIC",
"-I", header_dir.c_str(),
"-o", lib_filename.c_str(),
NULL
"-x", "c",
source_filename.c_str()
};
string link_error = run_cmd("gcc", link_argv);
if (!link_error.empty()) {
AssertThat(link_error, IsEmpty());
if (!external_scanner_filename.empty()) {
compile_args.push_back("-g");
string extension = external_scanner_filename.substr(external_scanner_filename.rfind("."));
if (extension == ".c") {
compile_args.push_back("-xc");
} else {
compile_args.push_back("-xc++");
}
compile_args.push_back(external_scanner_filename.c_str());
}
compile_args.push_back(nullptr);
string compile_error = run_command(compiler_name, compile_args.data());
if (!compile_error.empty()) {
AssertThat(string(compile_error), IsEmpty());
return nullptr;
}
}
@ -118,19 +122,19 @@ const TSLanguage *load_language(const string &source_filename,
return nullptr;
}
void *symbol_value = dlsym(parser_lib, language_function_name.c_str());
if (!symbol_value) {
void *language_function = dlsym(parser_lib, language_function_name.c_str());
if (!language_function) {
std::string message(dlerror());
AssertThat(message, IsEmpty());
return nullptr;
}
typedef TSLanguage * (* LanguageFunction)();
LanguageFunction language_fn = reinterpret_cast<LanguageFunction>(symbol_value);
return language_fn();
return reinterpret_cast<TSLanguage *(*)()>(language_function)();
}
const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) {
const TSLanguage *load_compile_result(const string &name,
const TSCompileResult &compile_result,
string external_scanner_path) {
if (compile_result.error_type != TSCompileErrorTypeNone) {
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
return nullptr;
@ -146,7 +150,7 @@ const TSLanguage *load_compile_result(const string &name, const TSCompileResult
source_file << compile_result.code;
source_file.close();
const TSLanguage *language = load_language(source_filename, lib_filename, name);
auto language = load_language(source_filename, lib_filename, name, external_scanner_path);
free(compile_result.code);
return language;
}
@ -158,6 +162,10 @@ const TSLanguage *get_test_language(const string &language_name) {
string language_dir = string("spec/fixtures/grammars/") + language_name;
string grammar_filename = language_dir + "/src/grammar.json";
string parser_filename = language_dir + "/src/parser.c";
string external_scanner_filename = language_dir + "/src/scanner.cc";
if (!file_exists(external_scanner_filename)) {
external_scanner_filename = "";
}
int grammar_mtime = get_modified_time(grammar_filename);
if (!grammar_mtime)
@ -192,7 +200,7 @@ const TSLanguage *get_test_language(const string &language_name) {
mkdir("out/tmp", 0777);
string lib_filename = "out/tmp/" + language_name + ".so";
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name);
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name, external_scanner_filename);
loaded_languages[language_name] = language;
return language;
};

View file

@ -5,7 +5,8 @@
#include "tree_sitter/runtime.h"
#include <string>
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &);
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
std::string external_scanner_path = "");
const TSLanguage *get_test_language(const std::string &language_name);
#endif // HELPERS_LOAD_LANGUAGE_H_

View file

@ -15,7 +15,9 @@ bool operator==(const TSRange &left, const TSRange &right) {
}
bool operator==(const Length &left, const Length &right) {
return length_eq(left, right);
return left.bytes == right.bytes &&
left.chars == right.chars &&
left.extent == right.extent;
}
bool operator<(const TSPoint &left, const TSPoint &right) {

View file

@ -9,6 +9,7 @@ namespace tree_sitter {
using std::ostream;
using std::string;
using std::to_string;
using rules::Symbol;
rule_ptr character(const set<uint32_t> &ranges) {
return character(ranges, true);
@ -28,11 +29,11 @@ namespace tree_sitter {
}
rule_ptr i_sym(size_t index) {
return make_shared<rules::Symbol>(index);
return make_shared<Symbol>(index, Symbol::NonTerminal);
}
rule_ptr i_token(size_t index) {
return make_shared<rules::Symbol>(index, true);
return make_shared<Symbol>(index, Symbol::Terminal);
}
rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) {

View file

@ -23,20 +23,21 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
ScopeStack *current_scopes,
TSNode node, TSDocument *document,
const std::string &text) {
append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size());
append_text_to_scope_sequence(
sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()
);
string scope = ts_node_type(node, document);
current_scopes->push_back(scope);
size_t child_count = ts_node_child_count(node);
if (child_count > 0) {
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
append_to_scope_sequence(sequence, current_scopes, child, document, text);
}
} else {
size_t length = ts_node_end_byte(node) - ts_node_start_byte(node);
append_text_to_scope_sequence(sequence, current_scopes, text, length);
current_scopes->push_back(ts_node_type(node, document));
for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) {
TSNode child = ts_node_child(node, i);
append_to_scope_sequence(sequence, current_scopes, child, document, text);
}
append_text_to_scope_sequence(
sequence, current_scopes, text, ts_node_end_byte(node) - sequence->size()
);
current_scopes->pop_back();
}

View file

@ -10,16 +10,7 @@ namespace tree_sitter {
ostream &operator<<(ostream &stream, const Grammar &grammar) {
stream << string("#<grammar");
stream << string(" rules: {");
bool started = false;
for (auto pair : grammar.rules) {
if (started)
stream << string(", ");
stream << pair.first;
stream << string(" => ");
stream << pair.second;
started = true;
}
stream << " rules: " << grammar.rules;
return stream << string("}>");
}
@ -85,6 +76,11 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
return stream << string(">");
}
ostream &operator<<(ostream &stream, const ExternalToken &external_token) {
return stream << "{" << external_token.name << ", " << external_token.type <<
"," << external_token.corresponding_internal_token << "}";
}
ostream &operator<<(ostream &stream, const ProductionStep &step) {
stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence);
stream << ", associativity: ";

View file

@ -97,6 +97,7 @@ struct AdvanceAction;
struct AcceptTokenAction;
class ParseAction;
class ParseState;
struct ExternalToken;
struct ProductionStep;
struct PrecedenceRange;
@ -110,6 +111,7 @@ ostream &operator<<(ostream &, const AdvanceAction &);
ostream &operator<<(ostream &, const AcceptTokenAction &);
ostream &operator<<(ostream &, const ParseAction &);
ostream &operator<<(ostream &, const ParseState &);
ostream &operator<<(ostream &, const ExternalToken &);
ostream &operator<<(ostream &, const ProductionStep &);
ostream &operator<<(ostream &, const PrecedenceRange &);

View file

@ -1,19 +1,11 @@
#include "spec_helper.h"
#include "runtime/alloc.h"
#include "helpers/load_language.h"
#include "helpers/stderr_logger.h"
#include "helpers/dedent.h"
#include "compiler/util/string_helpers.h"
#include <map>
static string dedent(string input) {
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
string whitespace = "\n" + string(indent_level, ' ');
util::str_replace(&input, whitespace, "\n");
return input.substr(
input.find_first_not_of("\n "),
input.find_last_not_of("\n ") + 1
);
}
static string fill_template(string input, map<string, string> parameters) {
string result = input;
for (const auto &pair : parameters) {
@ -507,6 +499,190 @@ describe("compile_grammar", []() {
});
});
describe("external scanners", [&]() {
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
string grammar = R"JSON({
"name": "external_scanner_example",
"externals": [
"_percent_string",
"_percent_string_start",
"_percent_string_end"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "sum"},
{"type": "SYMBOL", "name": "identifier"}
]
},
"sum": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "+"},
{"type": "SYMBOL", "name": "expression"}
]
}
},
"string": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "_percent_string"},
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_percent_string_start"},
{"type": "SYMBOL", "name": "expression"},
{"type": "SYMBOL", "name": "_percent_string_end"}
]
},
]
},
"identifier": {
"type": "PATTERN",
"value": "\\a+"
}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"external_scanner_example",
result,
"spec/fixtures/external_scanners/percent_strings.c"
));
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
ts_document_parse(document);
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
ts_document_parse(document);
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
});
it("allows external scanners to refer to tokens that are defined internally", [&]() {
string grammar = R"JSON({
"name": "shared_external_tokens",
"externals": [
"string",
"line_break"
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "_expression"},
{"type": "SYMBOL", "name": "line_break"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "string"},
{"type": "SYMBOL", "name": "variable"},
{"type": "SYMBOL", "name": "number"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"},
"number": {"type": "PATTERN", "value": "\\d+"},
"line_break": {"type": "STRING", "value": "\n"}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"shared_external_tokens",
result,
"spec/fixtures/external_scanners/shared_external_tokens.c"
));
ts_document_set_input_string(document, "a b\n");
ts_document_parse(document);
assert_root_node("(statement (variable) (variable) (line_break))");
ts_document_set_input_string(document, "a \nb\n");
ts_document_parse(document);
assert_root_node("(statement (variable) (variable) (line_break))");
ts_document_set_input_string(document, "'hello' 'world'\n");
ts_document_parse(document);
assert_root_node("(statement (string) (string) (line_break))");
ts_document_set_input_string(document, "'hello' \n'world'\n");
ts_document_parse(document);
assert_root_node("(statement (string) (string) (line_break))");
});
it("allows external tokens to be used as extras", [&]() {
string grammar = R"JSON({
"name": "extra_external_tokens",
"externals": [
"comment"
],
"extras": [
{"type": "PATTERN", "value": "\\s"},
{"type": "SYMBOL", "name": "comment"}
],
"rules": {
"assignment": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "variable"},
{"type": "STRING", "value": "="},
{"type": "SYMBOL", "name": "variable"}
]
},
"variable": {"type": "PATTERN", "value": "\\a+"}
}
})JSON";
TSCompileResult result = ts_compile_grammar(grammar.c_str());
AssertThat(result.error_message, IsNull());
ts_document_set_language(document, load_compile_result(
"extra_external_tokens",
result,
"spec/fixtures/external_scanners/extra_external_tokens.c"
));
ts_document_set_input_string(document, "x = # a comment\n y");
ts_document_parse(document);
assert_root_node("(assignment (variable) (comment) (variable))");
});
});
describe("when the grammar's start symbol is a token", [&]() {
it("parses the token", [&]() {
TSCompileResult result = ts_compile_grammar(R"JSON(

View file

@ -84,6 +84,7 @@ describe("The Corpus", []() {
"json",
"c",
"cpp",
"python",
});
for (auto &language_name : test_languages) {

View file

@ -5,6 +5,7 @@
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include "helpers/spy_logger.h"
#include "helpers/stderr_logger.h"
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
@ -15,22 +16,22 @@ TSPoint point(size_t row, size_t column) {
START_TEST
describe("Document", [&]() {
TSDocument *doc;
TSDocument *document;
TSNode root;
before_each([&]() {
record_alloc::start();
doc = ts_document_new();
document = ts_document_new();
});
after_each([&]() {
ts_document_free(doc);
ts_document_free(document);
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto assert_node_string_equals = [&](TSNode node, const string &expected) {
char *str = ts_node_string(node, doc);
char *str = ts_node_string(node, document);
string actual(str);
ts_free(str);
AssertThat(actual, Equals(expected));
@ -42,11 +43,11 @@ describe("Document", [&]() {
before_each([&]() {
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
ts_document_set_language(doc, get_test_language("json"));
ts_document_set_input_string(doc, "{\"key\": [1, 2]}");
ts_document_parse(doc);
ts_document_set_language(document, get_test_language("json"));
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
ts_document_parse(document);
root = ts_document_root_node(doc);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(object (pair (string) (array (number) (number))))");
@ -61,11 +62,11 @@ describe("Document", [&]() {
spy_input->content = string((const char *)content, sizeof(content));
spy_input->encoding = TSInputEncodingUTF16;
ts_document_set_input(doc, spy_input->input());
ts_document_invalidate(doc);
ts_document_parse(doc);
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
root = ts_document_root_node(doc);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(array (true) (false))");
@ -77,27 +78,27 @@ describe("Document", [&]() {
spy_input->encoding = TSInputEncodingUTF16;
// spy_input->measure_columns_in_bytes
ts_document_set_input(doc, spy_input->input());
ts_document_invalidate(doc);
ts_document_parse(doc);
ts_document_set_input(document, spy_input->input());
ts_document_invalidate(document);
ts_document_parse(document);
});
it("allows the input to be retrieved later", [&]() {
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_input(doc).payload, Equals<void *>(spy_input));
AssertThat(ts_document_input(doc).read, Equals(spy_input->input().read));
AssertThat(ts_document_input(doc).seek, Equals(spy_input->input().seek));
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_input(document).payload, Equals<void *>(spy_input));
AssertThat(ts_document_input(document).read, Equals(spy_input->input().read));
AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek));
});
it("does not assume that the document's text has changed", [&]() {
ts_document_set_input(doc, spy_input->input());
AssertThat(ts_document_root_node(doc), Equals<TSNode>(root));
ts_document_set_input(document, spy_input->input());
AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
AssertThat(ts_node_has_changes(root), IsFalse());
AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
});
it("reads text from the new input for future parses", [&]() {
ts_document_set_input(doc, spy_input->input());
ts_document_set_input(document, spy_input->input());
// Insert 'null', delete '1'.
TSInputEdit edit = {};
@ -105,28 +106,28 @@ describe("Document", [&]() {
edit.extent_added.column = edit.bytes_added = 4;
edit.extent_removed.column = edit.bytes_removed = 1;
ts_document_edit(doc, edit);
ts_document_parse(doc);
ts_document_edit(document, edit);
ts_document_parse(document);
TSNode new_root = ts_document_root_node(doc);
TSNode new_root = ts_document_root_node(document);
assert_node_string_equals(
new_root,
"(object (pair (string) (array (null) (number))))");
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2" })));
});
it("reads from the new input correctly when the old input was blank", [&]() {
ts_document_set_input_string(doc, "");
ts_document_parse(doc);
TSNode new_root = ts_document_root_node(doc);
ts_document_set_input_string(document, "");
ts_document_parse(document);
TSNode new_root = ts_document_root_node(document);
AssertThat(ts_node_end_char(new_root), Equals<size_t>(0));
assert_node_string_equals(
new_root,
"(ERROR)");
ts_document_set_input_string(doc, "1");
ts_document_parse(doc);
new_root = ts_document_root_node(doc);
ts_document_set_input_string(document, "1");
ts_document_parse(document);
new_root = ts_document_root_node(document);
AssertThat(ts_node_end_char(new_root), Equals<size_t>(1));
assert_node_string_equals(
new_root,
@ -136,33 +137,44 @@ describe("Document", [&]() {
describe("set_language(language)", [&]() {
before_each([&]() {
ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n");
ts_document_set_input_string(document, "{\"key\": [1, 2]}\n");
});
it("uses the given language for future parses", [&]() {
ts_document_set_language(doc, get_test_language("json"));
ts_document_parse(doc);
ts_document_set_language(document, get_test_language("json"));
ts_document_parse(document);
root = ts_document_root_node(doc);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(object (pair (string) (array (number) (number))))");
});
it("clears out any previous tree", [&]() {
ts_document_set_language(doc, get_test_language("json"));
ts_document_parse(doc);
ts_document_set_language(document, get_test_language("json"));
ts_document_parse(document);
ts_document_set_language(doc, get_test_language("javascript"));
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
ts_document_set_language(document, get_test_language("javascript"));
AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
ts_document_parse(doc);
root = ts_document_root_node(doc);
ts_document_parse(document);
root = ts_document_root_node(document);
assert_node_string_equals(
root,
"(program (expression_statement "
"(object (pair (string) (array (number) (number))))))");
});
it("does not allow setting a language with a different version number", [&]() {
TSLanguage language = *get_test_language("json");
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
language.version++;
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
ts_document_set_language(document, &language);
AssertThat(ts_document_language(document), IsNull());
});
});
describe("set_logger(TSLogger)", [&]() {
@ -170,45 +182,39 @@ describe("Document", [&]() {
before_each([&]() {
logger = new SpyLogger();
ts_document_set_language(doc, get_test_language("json"));
ts_document_set_input_string(doc, "[1, 2]");
ts_document_set_language(document, get_test_language("json"));
ts_document_set_input_string(document, "[1, 2]");
});
after_each([&]() {
delete logger;
});
it("calls the debugger with a message for each lex action", [&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_parse(doc);
AssertThat(logger->messages, Contains("lookahead char:'1'"));
AssertThat(logger->messages, Contains("lookahead char:'['"));
});
it("calls the debugger with a message for each parse action", [&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_parse(doc);
ts_document_set_logger(document, logger->logger());
ts_document_parse(document);
AssertThat(logger->messages, Contains("new_parse"));
AssertThat(logger->messages, Contains("lookahead char:'['"));
AssertThat(logger->messages, Contains("skip character:' '"));
AssertThat(logger->messages, Contains("consume character:'['"));
AssertThat(logger->messages, Contains("consume character:'1'"));
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
AssertThat(logger->messages, Contains("accept"));
});
it("allows the debugger to be retrieved later", [&]() {
ts_document_set_logger(doc, logger->logger());
AssertThat(ts_document_logger(doc).payload, Equals(logger));
ts_document_set_logger(document, logger->logger());
AssertThat(ts_document_logger(document).payload, Equals(logger));
});
describe("disabling debugging", [&]() {
before_each([&]() {
ts_document_set_logger(doc, logger->logger());
ts_document_set_logger(doc, {NULL, NULL});
ts_document_set_logger(document, logger->logger());
ts_document_set_logger(document, {NULL, NULL});
});
it("does not call the debugger any more", [&]() {
ts_document_parse(doc);
ts_document_parse(document);
AssertThat(logger->messages, IsEmpty());
});
});
@ -218,12 +224,12 @@ describe("Document", [&]() {
SpyInput *input;
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
ts_document_set_language(document, get_test_language("javascript"));
input = new SpyInput("{a: null};", 3);
ts_document_set_input(doc, input->input());
ts_document_parse(doc);
ts_document_set_input(document, input->input());
ts_document_parse(document);
assert_node_string_equals(
ts_document_root_node(doc),
ts_document_root_node(document),
"(program (expression_statement (object (pair (identifier) (null)))))");
});
@ -231,26 +237,25 @@ describe("Document", [&]() {
delete input;
});
auto get_ranges = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
auto get_invalidated_ranges_for_edit = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
TSInputEdit edit = callback();
ts_document_edit(doc, edit);
ts_document_edit(document, edit);
TSRange *ranges;
uint32_t range_count = 0;
ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count);
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
vector<TSRange> result;
for (size_t i = 0; i < range_count; i++)
for (size_t i = 0; i < range_count; i++) {
result.push_back(ranges[i]);
}
ts_free(ranges);
return result;
};
it("reports changes when one token has been updated", [&]() {
// Replace `null` with `nothing`
auto ranges = get_ranges([&]() {
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("ull"), 1, "othing");
});
@ -262,7 +267,7 @@ describe("Document", [&]() {
})));
// Replace `nothing` with `null` again
ranges = get_ranges([&]() {
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
@ -276,7 +281,7 @@ describe("Document", [&]() {
it("reports changes when tokens have been appended", [&]() {
// Add a second key-value pair
auto ranges = get_ranges([&]() {
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("}"), 0, ", b: false");
});
@ -288,12 +293,12 @@ describe("Document", [&]() {
})));
// Add a third key-value pair in between the first two
ranges = get_ranges([&]() {
ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find(", b"), 0, ", c: 1");
});
assert_node_string_equals(
ts_document_root_node(doc),
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (identifier) (null)) "
"(pair (identifier) (number)) "
@ -307,41 +312,39 @@ describe("Document", [&]() {
})));
// Delete the middle pair.
ranges = get_ranges([&]() {
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(doc),
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (identifier) (null)) "
"(pair (identifier) (false)))))");
AssertThat(ranges, Equals(vector<TSRange>({
})));
AssertThat(ranges, IsEmpty());
// Delete the second pair.
ranges = get_ranges([&]() {
ranges = get_invalidated_ranges_for_edit([&]() {
return input->undo();
});
assert_node_string_equals(
ts_document_root_node(doc),
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (identifier) (null)))))");
AssertThat(ranges, Equals(vector<TSRange>({
})));
AssertThat(ranges, IsEmpty());
});
it("reports changes when trees have been wrapped", [&]() {
// Wrap the object in an assignment expression.
auto ranges = get_ranges([&]() {
auto ranges = get_invalidated_ranges_for_edit([&]() {
return input->replace(input->content.find("null"), 0, "b === ");
});
assert_node_string_equals(
ts_document_root_node(doc),
ts_document_root_node(document),
"(program (expression_statement (object "
"(pair (identifier) (rel_op (identifier) (null))))))");

View file

@ -4,11 +4,13 @@
#include "helpers/spy_input.h"
#include "helpers/load_language.h"
#include "helpers/record_alloc.h"
#include "helpers/stderr_logger.h"
#include "helpers/dedent.h"
START_TEST
describe("Parser", [&]() {
TSDocument *doc;
TSDocument *document;
SpyInput *input;
TSNode root;
size_t chunk_size;
@ -18,90 +20,76 @@ describe("Parser", [&]() {
chunk_size = 3;
input = nullptr;
doc = ts_document_new();
document = ts_document_new();
});
after_each([&]() {
if (doc)
ts_document_free(doc);
if (input)
delete input;
if (document) ts_document_free(document);
if (input) delete input;
record_alloc::stop();
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
});
auto set_text = [&](const char *text) {
auto set_text = [&](string text) {
input = new SpyInput(text, chunk_size);
ts_document_set_input(doc, input->input());
ts_document_parse(doc);
ts_document_set_input(document, input->input());
ts_document_parse(document);
root = ts_document_root_node(doc);
AssertThat(ts_node_end_byte(root), Equals(strlen(text)));
root = ts_document_root_node(document);
AssertThat(ts_node_end_byte(root), Equals(text.size()));
input->clear();
};
auto insert_text = [&](size_t position, string text) {
size_t prev_size = ts_node_end_byte(root);
ts_document_edit(doc, input->replace(position, 0, text));
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size + text.size()));
};
auto delete_text = [&](size_t position, size_t length) {
size_t prev_size = ts_node_end_byte(root);
ts_document_edit(doc, input->replace(position, length, ""));
ts_document_parse(doc);
root = ts_document_root_node(doc);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size - length));
};
auto replace_text = [&](size_t position, size_t length, string new_text) {
size_t prev_size = ts_node_end_byte(root);
ts_document_edit(doc, input->replace(position, length, new_text));
ts_document_parse(doc);
ts_document_edit(document, input->replace(position, length, new_text));
ts_document_parse(document);
root = ts_document_root_node(doc);
root = ts_document_root_node(document);
size_t new_size = ts_node_end_byte(root);
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
};
auto insert_text = [&](size_t position, string text) {
replace_text(position, 0, text);
};
auto delete_text = [&](size_t position, size_t length) {
replace_text(position, length, "");
};
auto undo = [&]() {
ts_document_edit(document, input->undo());
ts_document_parse(document);
};
auto assert_root_node = [&](const string &expected) {
TSNode node = ts_document_root_node(doc);
char *str = ts_node_string(node, doc);
string actual(str);
ts_free(str);
TSNode node = ts_document_root_node(document);
char *node_string = ts_node_string(node, document);
string actual(node_string);
ts_free(node_string);
AssertThat(actual, Equals(expected));
};
auto get_node_text = [&](TSNode node) {
size_t start = ts_node_start_byte(node);
size_t end = ts_node_end_byte(node);
return input->content.substr(start, end - start);
};
describe("handling errors", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("json"));
});
auto get_node_text = [&](TSNode node) {
size_t start = ts_node_start_byte(node);
size_t end = ts_node_end_byte(node);
return input->content.substr(start, end - start);
};
describe("when there is an invalid substring right before a valid token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
set_text(" [123, @@@@@, true]");
assert_root_node(
"(array (number) (ERROR (UNEXPECTED '@')) (true))");
TSNode error = ts_node_named_child(root, 1);
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(get_node_text(error), Equals(", @@@@@"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
@ -112,56 +100,59 @@ describe("Parser", [&]() {
AssertThat(get_node_text(garbage), Equals("@@@@@"));
TSNode node_after_error = ts_node_named_child(root, 2);
AssertThat(ts_node_type(node_after_error, doc), Equals("true"));
AssertThat(ts_node_type(node_after_error, document), Equals("true"));
AssertThat(get_node_text(node_after_error), Equals("true"));
});
});
describe("when there is an unexpected string in the middle of a token", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
set_text(" [123, faaaaalse, true]");
assert_root_node(
"(array (number) (ERROR (UNEXPECTED 'a')) (true))");
TSNode error = ts_node_named_child(root, 1);
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
TSNode comma = ts_node_child(error, 0);
AssertThat(ts_node_type(comma, doc), Equals(","));
AssertThat(ts_node_type(comma, document), Equals(","));
AssertThat(get_node_text(comma), Equals(","));
TSNode garbage = ts_node_child(error, 1);
AssertThat(ts_node_type(garbage, doc), Equals("ERROR"));
AssertThat(ts_node_type(garbage, document), Equals("ERROR"));
AssertThat(get_node_text(garbage), Equals("faaaaalse"));
TSNode last = ts_node_named_child(root, 2);
AssertThat(ts_node_type(last, doc), Equals("true"));
AssertThat(ts_node_type(last, document), Equals("true"));
AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, ")));
});
});
describe("when there is one unexpected token between two valid tokens", [&]() {
it("computes the error node's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
set_text(" [123, true false, true]");
assert_root_node(
"(array (number) (true) (ERROR (false)) (true))");
TSNode error = ts_node_named_child(root, 2);
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
AssertThat(ts_node_type(error, document), Equals("ERROR"));
AssertThat(get_node_text(error), Equals("false"));
AssertThat(ts_node_child_count(error), Equals<size_t>(1));
TSNode last = ts_node_named_child(root, 1);
AssertThat(ts_node_type(last, doc), Equals("true"));
AssertThat(ts_node_type(last, document), Equals("true"));
AssertThat(get_node_text(last), Equals("true"));
});
});
describe("when there is an unexpected string at the end of a token", [&]() {
it("computes the error's size and position correctly", [&]() {
ts_document_set_language(document, get_test_language("json"));
set_text(" [123, \"hi\n, true]");
assert_root_node(
@ -171,7 +162,7 @@ describe("Parser", [&]() {
describe("when there is an unterminated error", [&]() {
it("maintains a consistent tree", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
ts_document_set_language(document, get_test_language("javascript"));
set_text("a; /* b");
assert_root_node(
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
@ -180,14 +171,9 @@ describe("Parser", [&]() {
});
describe("handling extra tokens", [&]() {
// In the javascript example grammar, ASI works by using newlines as
// terminators in statements, but also as extra tokens.
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
});
describe("when the token appears as part of a grammar rule", [&]() {
it("is incorporated into the tree", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("fn()\n");
assert_root_node(
@ -196,7 +182,8 @@ describe("Parser", [&]() {
});
describe("when the token appears somewhere else", [&]() {
it("is incorporated into the tree", [&]() {
it("incorporates it into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text(
"fn()\n"
" .otherFn();");
@ -211,7 +198,8 @@ describe("Parser", [&]() {
});
describe("when several extra tokens appear in a row", [&]() {
it("is incorporated into the tree", [&]() {
it("incorporates them into the tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text(
"fn()\n\n"
"// This is a comment"
@ -230,199 +218,219 @@ describe("Parser", [&]() {
});
describe("editing", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("x * (100 + abc);");
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
"(math_op (number) (identifier)))))");
insert_text(strlen("x * (100 + abc"), ".d");
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
});
});
describe("inserting text", [&]() {
describe("creating new tokens near the end of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
set_text("x * (100 + abc);");
describe("creating new tokens near the beginning of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
"(math_op (number) (identifier)))))");
ts_document_set_language(document, get_test_language("javascript"));
set_text("123 + 456 * (10 + x);");
insert_text(strlen("x * (100 + abc"), ".d");
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(math_op (number) (math_op (number) (identifier))))))");
assert_root_node(
"(program (expression_statement (math_op "
"(identifier) "
"(math_op (number) (member_access (identifier) (identifier))))))");
insert_text(strlen("123"), " || 5");
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
});
});
describe("creating new tokens near the beginning of the input", [&]() {
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
chunk_size = 2;
set_text("123 + 456 * (10 + x);");
assert_root_node(
"(program (expression_statement (math_op "
assert_root_node(
"(program (expression_statement (bool_op "
"(number) "
"(math_op "
"(number) "
"(math_op (number) (math_op (number) (identifier))))))");
"(math_op (number) (math_op (number) (identifier)))))))");
insert_text(strlen("123"), " || 5");
assert_root_node(
"(program (expression_statement (bool_op "
"(number) "
"(math_op "
"(number) "
"(math_op (number) (math_op (number) (identifier)))))))");
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
});
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
});
});
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
describe("introducing an error", [&]() {
it("gives the error the right size", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("var x = y;");
set_text("var x = y;");
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (identifier))))");
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (identifier))))");
insert_text(strlen("var x = y"), " *");
insert_text(strlen("var x = y"), " *");
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (identifier)) (ERROR)))");
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (identifier)) (ERROR)))");
insert_text(strlen("var x = y *"), " z");
insert_text(strlen("var x = y *"), " z");
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (math_op (identifier) (identifier)))))");
});
assert_root_node(
"(program (var_declaration (var_assignment "
"(identifier) (math_op (identifier) (identifier)))))");
});
});
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
set_text("abc * 123;");
describe("into the middle of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("abc * 123;");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
insert_text(strlen("ab"), "XYZ");
insert_text(strlen("ab"), "XYZ");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
AssertThat(ts_node_type(node, doc), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
});
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
AssertThat(ts_node_type(node, document), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
});
});
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
set_text("abc * 123;");
describe("at the end of an existing token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("abc * 123;");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
insert_text(strlen("abc"), "XYZ");
insert_text(strlen("abc"), "XYZ");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
assert_root_node(
"(program (expression_statement (math_op (identifier) (number))))");
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
AssertThat(ts_node_type(node, doc), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
});
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
AssertThat(ts_node_type(node, document), Equals("identifier"));
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
});
});
describe("into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
set_text("123 *\n"
describe("inserting text into a node containing a extra token", [&]() {
it("updates the parse tree", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("123 *\n"
"// a-comment\n"
"abc;");
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
"(identifier))))");
insert_text(
strlen("123 *\n"
"// a-comment\n"
"abc;");
"abc"),
"XYZ");
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
"(identifier))))");
insert_text(
strlen("123 *\n"
"// a-comment\n"
"abc"),
"XYZ");
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
"(identifier))))");
});
assert_root_node(
"(program (expression_statement (math_op "
"(number) "
"(comment) "
"(identifier))))");
});
});
describe("deleting text", [&]() {
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
set_text("123 * 456; 789 * 123;");
describe("when a critical token is removed", [&]() {
it("updates the parse tree, creating an error", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("123 * 456; 789 * 123;");
assert_root_node(
"(program "
"(expression_statement (math_op (number) (number))) "
"(expression_statement (math_op (number) (number))))");
assert_root_node(
"(program "
"(expression_statement (math_op (number) (number))) "
"(expression_statement (math_op (number) (number))))");
delete_text(strlen("123 "), 2);
delete_text(strlen("123 "), 2);
assert_root_node(
"(program "
"(expression_statement (number) (ERROR (number))) "
"(expression_statement (math_op (number) (number))))");
});
assert_root_node(
"(program "
"(expression_statement (number) (ERROR (number))) "
"(expression_statement (math_op (number) (number))))");
});
});
describe("replacing text", [&]() {
it("does not try to re-use nodes that are within the edited region", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
describe("with external tokens", [&]() {
it("maintains the external scanner's state during incremental parsing", [&]() {
ts_document_set_language(document, get_test_language("python"));
string text = dedent(R"PYTHON(
if a:
print b
return c
)PYTHON");
set_text("{ x: (b.c) };");
set_text(text);
assert_root_node("(module "
"(if_statement (identifier) "
"(print_statement (identifier))) "
"(return_statement (expression_list (identifier))))");
assert_root_node(
"(program (expression_statement (object (pair "
"(identifier) (member_access (identifier) (identifier))))))");
replace_text(text.find("return"), 0, " ");
assert_root_node("(module "
"(if_statement (identifier) "
"(print_statement (identifier)) "
"(return_statement (expression_list (identifier)))))");
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
assert_root_node(
"(program (expression_statement (object (pair "
"(identifier) (member_access (identifier) (identifier))))))");
undo();
assert_root_node("(module "
"(if_statement (identifier) "
"(print_statement (identifier))) "
"(return_statement (expression_list (identifier))))");
});
});
it("does not try to re-use nodes that are within the edited region", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("{ x: (b.c) };");
assert_root_node(
"(program (expression_statement (object (pair "
"(identifier) (member_access (identifier) (identifier))))))");
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
assert_root_node(
"(program (expression_statement (object (pair "
"(identifier) (member_access (identifier) (identifier))))))");
});
it("updates the document's parse count", [&]() {
ts_document_set_language(doc, get_test_language("javascript"));
AssertThat(ts_document_parse_count(doc), Equals<size_t>(0));
ts_document_set_language(document, get_test_language("javascript"));
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
set_text("{ x: (b.c) };");
AssertThat(ts_document_parse_count(doc), Equals<size_t>(1));
AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
insert_text(strlen("{ x"), "yz");
AssertThat(ts_document_parse_count(doc), Equals<size_t>(2));
AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
});
});
describe("lexing", [&]() {
before_each([&]() {
ts_document_set_language(doc, get_test_language("javascript"));
});
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
it("terminates them at the end of the document", [&]() {
ts_document_set_language(document, get_test_language("javascript"));
set_text("x; // this is a comment");
assert_root_node(
@ -437,6 +445,7 @@ describe("Parser", [&]() {
it("recognizes UTF8 characters as single characters", [&]() {
// 'ΩΩΩ — ΔΔ';
ts_document_set_language(document, get_test_language("javascript"));
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
assert_root_node(

View file

@ -521,6 +521,31 @@ describe("Stack", [&]() {
free_slice_array(&pop.slices);
});
});
describe("setting external token state", [&]() {
TSExternalTokenState external_token_state1, external_token_state2;
it("allows the state to be retrieved", [&]() {
AssertThat(ts_stack_external_token_state(stack, 0), Equals(nullptr));
ts_stack_set_external_token_state(stack, 0, &external_token_state1);
AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
ts_stack_copy_version(stack, 0);
AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
});
it("does not merge stack versions with different external token states", [&]() {
ts_stack_copy_version(stack, 0);
ts_stack_push(stack, 0, trees[0], false, 5);
ts_stack_push(stack, 1, trees[0], false, 5);
ts_stack_set_external_token_state(stack, 0, &external_token_state1);
ts_stack_set_external_token_state(stack, 0, &external_token_state2);
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
});
});
});
END_TEST

View file

@ -22,47 +22,32 @@ void assert_consistent(const Tree *tree) {
START_TEST
enum {
cat = 1,
dog,
eel,
fox,
goat,
hog,
};
describe("Tree", []() {
Tree *tree1, *tree2, *parent1;
enum {
symbol1 = 1,
symbol2,
symbol3,
symbol4,
symbol5,
symbol6,
symbol7,
symbol8,
symbol9,
};
TSSymbolMetadata visible = {true, true, false, true};
TSSymbolMetadata invisible = {false, false, false, true};
before_each([&]() {
tree1 = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
tree2 = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent1 = ts_tree_make_node(dog, 2, tree_array({
tree1,
tree2,
}), visible);
});
after_each([&]() {
ts_tree_release(tree1);
ts_tree_release(tree2);
ts_tree_release(parent1);
});
describe("make_leaf(sym, size, padding, is_hidden)", [&]() {
it("does not record that it is fragile", [&]() {
AssertThat(tree1->fragile_left, IsFalse());
AssertThat(tree1->fragile_right, IsFalse());
describe("make_leaf", [&]() {
it("does not mark the tree as fragile", [&]() {
Tree *tree = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
AssertThat(tree->fragile_left, IsFalse());
AssertThat(tree->fragile_right, IsFalse());
});
});
describe("make_error(size, padding, lookahead_char)", [&]() {
it("records that it is fragile", [&]() {
describe("make_error", [&]() {
it("marks the tree as fragile", [&]() {
Tree *error_tree = ts_tree_make_error(
length_zero(),
length_zero(),
@ -75,15 +60,33 @@ describe("Tree", []() {
});
});
describe("make_node(symbol, child_count, children, is_hidden)", [&]() {
it("computes its size based on its child nodes", [&]() {
AssertThat(parent1->size.bytes, Equals<size_t>(
tree1->size.bytes + + tree2->padding.bytes + tree2->size.bytes));
AssertThat(parent1->size.chars, Equals<size_t>(
tree1->size.chars + + tree2->padding.chars + tree2->size.chars));
describe("make_node", [&]() {
Tree *tree1, *tree2, *parent1;
before_each([&]() {
tree1 = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
tree2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent1 = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
});
it("computes its padding based on its first child", [&]() {
after_each([&]() {
ts_tree_release(tree1);
ts_tree_release(tree2);
ts_tree_release(parent1);
});
it("computes its size and padding based on its child nodes", [&]() {
AssertThat(parent1->size.bytes, Equals<size_t>(
tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes));
AssertThat(parent1->size.chars, Equals<size_t>(
tree1->size.chars + tree2->padding.chars + tree2->size.chars));
AssertThat(parent1->padding.bytes, Equals<size_t>(tree1->padding.bytes));
AssertThat(parent1->padding.chars, Equals<size_t>(tree1->padding.chars));
});
@ -97,7 +100,7 @@ describe("Tree", []() {
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(eel, 2, tree_array({
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
@ -121,7 +124,7 @@ describe("Tree", []() {
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(eel, 2, tree_array({
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
@ -145,7 +148,7 @@ describe("Tree", []() {
ts_tree_retain(tree1);
ts_tree_retain(tree2);
parent = ts_tree_make_node(eel, 2, tree_array({
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
@ -162,14 +165,14 @@ describe("Tree", []() {
});
});
describe("edit(InputEdit)", [&]() {
describe("edit", [&]() {
Tree *tree = nullptr;
before_each([&]() {
tree = ts_tree_make_node(cat, 3, tree_array({
ts_tree_make_leaf(dog, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(eel, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(fox, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
tree = ts_tree_make_node(symbol1, 3, tree_array({
ts_tree_make_leaf(symbol2, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(symbol3, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(symbol4, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
}), visible);
AssertThat(tree->padding, Equals<Length>({2, 2, {0, 2}}));
@ -180,7 +183,6 @@ describe("Tree", []() {
ts_tree_release(tree);
});
describe("edits within a tree's padding", [&]() {
it("resizes the padding of the tree and its leftmost descendants", [&]() {
TSInputEdit edit;
@ -312,69 +314,124 @@ describe("Tree", []() {
});
});
describe("equality", [&]() {
describe("eq", [&]() {
Tree *leaf;
before_each([&]() {
leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
});
after_each([&]() {
ts_tree_release(leaf);
});
it("returns true for identical trees", [&]() {
Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
AssertThat(ts_tree_eq(tree1, tree1_copy), IsTrue());
Tree *leaf_copy = ts_tree_make_leaf(symbol1, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue());
Tree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
AssertThat(ts_tree_eq(tree2, tree2_copy), IsTrue());
Tree *parent2 = ts_tree_make_node(dog, 2, tree_array({
tree1_copy,
tree2_copy,
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf_copy,
}), visible);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
AssertThat(ts_tree_eq(parent1, parent2), IsTrue());
Tree *parent_copy = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf_copy,
}), visible);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
ts_tree_release(parent2);
AssertThat(ts_tree_eq(parent, parent_copy), IsTrue());
ts_tree_release(leaf_copy);
ts_tree_release(parent);
ts_tree_release(parent_copy);
});
it("returns false for trees with different symbols", [&]() {
Tree *different_tree = ts_tree_make_leaf(
tree1->symbol + 1,
tree1->padding,
tree1->size,
Tree *different_leaf = ts_tree_make_leaf(
leaf->symbol + 1,
leaf->padding,
leaf->size,
visible);
AssertThat(ts_tree_eq(tree1, different_tree), IsFalse());
ts_tree_release(different_tree);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(different_leaf);
});
it("returns false for trees with different options", [&]() {
Tree *tree1_copy = ts_tree_make_leaf(cat, tree1->padding, tree1->size, invisible);
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
ts_tree_release(tree1_copy);
Tree *different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, leaf->size, invisible);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(different_leaf);
});
it("returns false for trees with different sizes", [&]() {
Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, tree1->size, invisible);
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
ts_tree_release(tree1_copy);
Tree *different_leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, leaf->size, invisible);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(different_leaf);
tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, {1, 10}}, invisible);
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
ts_tree_release(tree1_copy);
different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, {5, 4, {1, 10}}, invisible);
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
ts_tree_release(different_leaf);
});
it("returns false for trees with different children", [&]() {
Tree *different_tree = ts_tree_make_leaf(
tree1->symbol + 1,
tree1->padding,
tree1->size,
visible);
Tree *leaf2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
ts_tree_retain(different_tree);
ts_tree_retain(tree2);
Tree *different_parent = ts_tree_make_node(dog, 2, tree_array({
different_tree, tree2,
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf2,
}), visible);
ts_tree_retain(leaf);
ts_tree_retain(leaf2);
Tree *different_parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf2,
leaf,
}), visible);
ts_tree_retain(leaf2);
ts_tree_retain(leaf);
AssertThat(ts_tree_eq(different_parent, parent), IsFalse());
AssertThat(ts_tree_eq(parent, different_parent), IsFalse());
ts_tree_release(leaf2);
ts_tree_release(parent);
ts_tree_release(different_parent);
});
});
describe("last_external_token_state", [&]() {
Length padding = {1, 1, {0, 1}};
Length size = {2, 2, {0, 2}};
auto make_external = [](Tree *tree) {
tree->has_external_tokens = true;
tree->has_external_token_state = true;
return tree;
};
it("returns the last serialized external token state in the given tree", [&]() {
Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9;
tree1 = ts_tree_make_node(symbol1, 2, tree_array({
(tree2 = ts_tree_make_node(symbol2, 3, tree_array({
(tree3 = make_external(ts_tree_make_leaf(symbol3, padding, size, visible))),
(tree4 = ts_tree_make_leaf(symbol4, padding, size, visible)),
(tree5 = ts_tree_make_leaf(symbol5, padding, size, visible)),
}), visible)),
(tree6 = ts_tree_make_node(symbol6, 2, tree_array({
(tree7 = ts_tree_make_node(symbol7, 1, tree_array({
(tree8 = ts_tree_make_leaf(symbol8, padding, size, visible)),
}), visible)),
(tree9 = ts_tree_make_leaf(symbol9, padding, size, visible)),
}), visible)),
}), visible);
AssertThat(ts_tree_eq(different_parent, parent1), IsFalse());
AssertThat(ts_tree_eq(parent1, different_parent), IsFalse());
ts_tree_release(different_tree);
ts_tree_release(different_parent);
auto state = ts_tree_last_external_token_state(tree1);
AssertThat(state, Equals(&tree3->external_token_state));
});
});
});

View file

@ -64,7 +64,7 @@ class LexTableBuilder {
private:
void add_lex_state_for_parse_state(ParseState *parse_state) {
parse_state->lex_state_id =
add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
}
LexStateId add_lex_state(const LexItemSet &item_set) {
@ -112,24 +112,27 @@ class LexTableBuilder {
void mark_fragile_tokens() {
for (ParseState &state : parse_table->states) {
for (auto &entry : state.terminal_entries) {
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
if (homonyms != conflict_manager.possible_homonyms.end())
for (Symbol::Index homonym : homonyms->second)
if (state.terminal_entries.count(homonym)) {
entry.second.reusable = false;
break;
}
Symbol symbol = entry.first;
if (symbol.is_token()) {
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
if (homonyms != conflict_manager.possible_homonyms.end())
for (Symbol::Index homonym : homonyms->second)
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
entry.second.reusable = false;
break;
}
if (!entry.second.reusable)
continue;
if (!entry.second.reusable)
continue;
auto extensions = conflict_manager.possible_extensions.find(entry.first);
if (extensions != conflict_manager.possible_extensions.end())
for (Symbol::Index extension : extensions->second)
if (state.terminal_entries.count(extension)) {
entry.second.depends_on_lookahead = true;
break;
}
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
if (extensions != conflict_manager.possible_extensions.end())
for (Symbol::Index extension : extensions->second)
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
entry.second.depends_on_lookahead = true;
break;
}
}
}
}
}
@ -150,24 +153,27 @@ class LexTableBuilder {
}
}
LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
LexItemSet result;
for (const Symbol &symbol : symbols)
for (const rule_ptr &rule : rules_for_symbol(symbol))
for (const rule_ptr &separator_rule : separator_rules)
result.entries.insert(LexItem(
symbol,
Metadata::separator(
Seq::build({
separator_rule,
Metadata::main_token(rule) }))));
for (const auto &pair : terminals) {
Symbol symbol = pair.first;
if (symbol.is_token()) {
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
for (const rule_ptr &separator_rule : separator_rules) {
result.entries.insert(LexItem(
symbol,
Metadata::separator(
Seq::build({
separator_rule,
Metadata::main_token(rule) }))));
}
}
}
}
return result;
}
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
if (!symbol.is_token)
return {};
if (symbol == rules::END_OF_INPUT())
return { CharacterSet().include(0).copy() };

View file

@ -52,7 +52,10 @@ class ParseTableBuilder {
allow_any_conflict(false) {}
pair<ParseTable, CompileError> build() {
Symbol start_symbol = Symbol(0, grammar.variables.empty());
Symbol start_symbol = grammar.variables.empty() ?
Symbol(0, Symbol::Terminal) :
Symbol(0, Symbol::NonTerminal);
Production start_production({
ProductionStep(start_symbol, 0, rules::AssociativityNone),
});
@ -63,7 +66,7 @@ class ParseTableBuilder {
add_parse_state(ParseItemSet({
{
ParseItem(rules::START(), start_production, 0),
LookaheadSet({ END_OF_INPUT().index }),
LookaheadSet({ END_OF_INPUT() }),
},
}));
@ -107,21 +110,25 @@ class ParseTableBuilder {
void build_error_parse_state() {
ParseState error_state;
for (const Symbol::Index index : parse_table.mergeable_symbols) {
add_out_of_context_parse_state(&error_state, Symbol(index, true));
for (const Symbol symbol : parse_table.mergeable_symbols) {
add_out_of_context_parse_state(&error_state, symbol);
}
for (const Symbol &symbol : grammar.extra_tokens) {
if (!error_state.terminal_entries.count(symbol.index)) {
error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
if (!error_state.terminal_entries.count(symbol)) {
error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra());
}
}
for (size_t i = 0; i < grammar.variables.size(); i++) {
add_out_of_context_parse_state(&error_state, Symbol(i, false));
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External));
}
error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
for (size_t i = 0; i < grammar.variables.size(); i++) {
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
}
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
parse_table.states[0] = error_state;
}
@ -130,10 +137,10 @@ class ParseTableBuilder {
const ParseItemSet &item_set = recovery_states[symbol];
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
if (symbol.is_token) {
error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
} else {
if (symbol.is_non_terminal()) {
error_state->nonterminal_entries[symbol.index] = state;
} else {
error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) });
}
}
}
@ -152,9 +159,9 @@ class ParseTableBuilder {
}
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
map<Symbol::Index, ParseItemSet> terminal_successors;
map<Symbol, ParseItemSet> terminal_successors;
map<Symbol::Index, ParseItemSet> nonterminal_successors;
set<Symbol::Index> lookaheads_with_conflicts;
set<Symbol> lookaheads_with_conflicts;
for (const auto &pair : item_set.entries) {
const ParseItem &item = pair.first;
@ -168,7 +175,7 @@ class ParseTableBuilder {
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
int precedence = item.precedence();
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
for (Symbol lookahead : *lookahead_symbols.entries) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
// Only add the highest-precedence Reduce actions to the parse table.
@ -203,10 +210,10 @@ class ParseTableBuilder {
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
if (symbol.is_token) {
terminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
} else {
if (symbol.is_non_terminal()) {
nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
} else {
terminal_successors[symbol].entries[new_item] = lookahead_symbols;
}
}
}
@ -214,7 +221,7 @@ class ParseTableBuilder {
// Add a Shift action for each possible successor state. Shift actions for
// terminal lookaheads can conflict with Reduce actions added previously.
for (auto &pair : terminal_successors) {
Symbol::Index lookahead = pair.first;
Symbol lookahead = pair.first;
ParseItemSet &next_item_set = pair.second;
ParseStateId next_state_id = add_parse_state(next_item_set);
ParseState &state = parse_table.states[state_id];
@ -223,7 +230,7 @@ class ParseTableBuilder {
if (!allow_any_conflict) {
if (had_existing_action)
lookaheads_with_conflicts.insert(lookahead);
recovery_states[Symbol(lookahead, true)].add(next_item_set);
recovery_states[lookahead].add(next_item_set);
}
}
@ -234,10 +241,10 @@ class ParseTableBuilder {
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
if (!allow_any_conflict)
recovery_states[Symbol(lookahead, false)].add(next_item_set);
recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
}
for (Symbol::Index lookahead : lookaheads_with_conflicts) {
for (Symbol lookahead : lookaheads_with_conflicts) {
string conflict = handle_conflict(item_set, state_id, lookahead);
if (!conflict.empty()) return conflict;
}
@ -245,9 +252,9 @@ class ParseTableBuilder {
ParseAction shift_extra = ParseAction::ShiftExtra();
ParseState &state = parse_table.states[state_id];
for (const Symbol &extra_symbol : grammar.extra_tokens) {
if (!state.terminal_entries.count(extra_symbol.index) ||
if (!state.terminal_entries.count(extra_symbol) ||
state.has_shift_action() || allow_any_conflict) {
parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra);
parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
}
}
@ -257,7 +264,6 @@ class ParseTableBuilder {
void mark_fragile_actions() {
for (ParseState &state : parse_table.states) {
for (auto &entry : state.terminal_entries) {
const Symbol symbol(entry.first, true);
auto &actions = entry.second.actions;
for (ParseAction &action : actions) {
@ -359,7 +365,7 @@ class ParseTableBuilder {
}
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
Symbol::Index lookahead) {
Symbol lookahead) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
int reduction_precedence = entry.actions.front().precedence();
set<ParseItem> shift_items;
@ -468,7 +474,7 @@ class ParseTableBuilder {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
}
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
description += "\n\n";
description += "Possible interpretations:\n\n";
@ -487,7 +493,7 @@ class ParseTableBuilder {
description += " " + symbol_name(step.symbol);
}
description += ")";
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
description += "\n";
}
}
@ -564,14 +570,23 @@ class ParseTableBuilder {
return "END_OF_INPUT";
else
return "";
} else if (symbol.is_token) {
const Variable &variable = lexical_grammar.variables[symbol.index];
if (variable.type == VariableTypeNamed)
return variable.name;
else
return "'" + variable.name + "'";
} else {
return grammar.variables[symbol.index].name;
}
switch (symbol.type) {
case Symbol::Terminal: {
const Variable &variable = lexical_grammar.variables[symbol.index];
if (variable.type == VariableTypeNamed)
return variable.name;
else
return "'" + variable.name + "'";
}
case Symbol::NonTerminal: {
return grammar.variables[symbol.index].name;
}
case Symbol::External:
default: {
return grammar.external_tokens[symbol.index].name;
}
}
}

View file

@ -12,8 +12,8 @@ using rules::Symbol;
LookaheadSet::LookaheadSet() : entries(nullptr) {}
LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
: entries(make_shared<set<Symbol::Index>>(symbols)) {}
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
: entries(make_shared<set<Symbol>>(symbols)) {}
bool LookaheadSet::empty() const {
return !entries.get() || entries->empty();
@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
return *entries == *other.entries;
}
bool LookaheadSet::contains(const Symbol::Index &symbol) const {
bool LookaheadSet::contains(const Symbol &symbol) const {
return entries->find(symbol) != entries->end();
}
@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) {
if (!other.entries.get())
return false;
if (!entries.get())
entries = make_shared<set<Symbol::Index>>();
entries = make_shared<set<Symbol>>();
size_t previous_size = entries->size();
entries->insert(other.entries->begin(), other.entries->end());
return entries->size() > previous_size;
}
bool LookaheadSet::insert(const Symbol::Index &symbol) {
bool LookaheadSet::insert(const Symbol &symbol) {
if (!entries.get())
entries = make_shared<set<Symbol::Index>>();
entries = make_shared<set<Symbol>>();
return entries->insert(symbol).second;
}

View file

@ -11,15 +11,15 @@ namespace build_tables {
class LookaheadSet {
public:
LookaheadSet();
explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
explicit LookaheadSet(const std::set<rules::Symbol> &);
bool empty() const;
bool operator==(const LookaheadSet &) const;
bool contains(const rules::Symbol::Index &) const;
bool contains(const rules::Symbol &) const;
bool insert_all(const LookaheadSet &);
bool insert(const rules::Symbol::Index &);
bool insert(const rules::Symbol &);
std::shared_ptr<std::set<rules::Symbol::Index>> entries;
std::shared_ptr<std::set<rules::Symbol>> entries;
};
} // namespace build_tables

View file

@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
}
Symbol ParseItem::lhs() const {
return Symbol(variable_index);
return Symbol(variable_index, Symbol::NonTerminal);
}
bool ParseItem::is_done() const {
@ -105,38 +105,6 @@ size_t ParseItemSet::unfinished_item_signature() const {
return result;
}
ParseItemSet::ActionMap ParseItemSet::actions() const {
ParseItemSet::ActionMap result;
for (const auto &pair : entries) {
const ParseItem &item = pair.first;
const LookaheadSet &lookahead_symbols = pair.second;
if (item.step_index == item.production->size()) {
int precedence = item.precedence();
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
Action &action = result.terminal_actions[lookahead];
if (precedence > action.completion_precedence) {
action.completions.assign({ &item });
} else if (precedence == action.completion_precedence) {
action.completions.push_back({ &item });
}
}
} else {
Symbol symbol = item.production->at(item.step_index).symbol;
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
if (symbol.is_token) {
result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols;
} else {
result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols;
}
}
}
return result;
}
void ParseItemSet::add(const ParseItemSet &other) {
for (const auto &pair : other.entries)
entries[pair.first].insert_all(pair.second);

View file

@ -41,16 +41,6 @@ class ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
struct Completion;
struct Action;
struct ActionMap {
std::map<rules::Symbol::Index, Action> terminal_actions;
std::map<rules::Symbol::Index, ParseItemSet> nonterminal_continuations;
};
ActionMap actions() const;
bool operator==(const ParseItemSet &) const;
void add(const ParseItemSet &);
size_t unfinished_item_signature() const;
@ -58,22 +48,6 @@ class ParseItemSet {
std::map<ParseItem, LookaheadSet> entries;
};
struct ParseItemSet::Completion {
const ParseItem *item;
int precedence;
rules::Associativity associativity;
bool operator<(const ParseItemSet::Completion &other) {
return precedence < other.precedence;
}
};
struct ParseItemSet::Action {
ParseItemSet continuation;
std::vector<const ParseItem *> completions;
int completion_precedence;
};
} // namespace build_tables
} // namespace tree_sitter

View file

@ -27,12 +27,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
set<Symbol::Index> processed_non_terminals;
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol symbol(i, true);
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
Symbol symbol(i, Symbol::Terminal);
first_sets.insert({symbol, LookaheadSet({ symbol })});
}
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
Symbol symbol(i, Symbol::External);
first_sets.insert({symbol, LookaheadSet({ symbol })});
}
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol(i);
Symbol symbol(i, Symbol::NonTerminal);
LookaheadSet first_set;
processed_non_terminals.clear();
@ -42,10 +47,10 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
Symbol current_symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (current_symbol.is_token) {
first_set.insert(current_symbol.index);
if (!current_symbol.is_non_terminal()) {
first_set.insert(current_symbol);
} else if (processed_non_terminals.insert(current_symbol.index).second) {
for (const Production &production : grammar.productions(current_symbol)) {
for (const Production &production : grammar.variables[current_symbol.index].productions) {
if (!production.empty()) {
symbols_to_process.push_back(production[0].symbol);
}
@ -59,11 +64,11 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
vector<ParseItemSetComponent> components_to_process;
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol(i);
Symbol symbol(i, Symbol::NonTerminal);
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
components_to_process.clear();
for (const Production &production : grammar.productions(symbol)) {
for (const Production &production : grammar.variables[i].productions) {
components_to_process.push_back(ParseItemSetComponent{
ParseItem(symbol, production, 0),
LookaheadSet(),
@ -87,7 +92,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
if (component_is_new) {
Symbol next_symbol = item.next_symbol();
if (next_symbol.is_built_in() || next_symbol.is_token)
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
continue;
LookaheadSet next_lookaheads;
@ -102,7 +107,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
propagates_lookaheads = false;
}
for (const Production &production : grammar.productions(next_symbol)) {
for (const Production &production : grammar.variables[next_symbol.index].productions) {
components_to_process.push_back(ParseItemSetComponent{
ParseItem(next_symbol, production, 0),
next_lookaheads,
@ -130,7 +135,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
const LookaheadSet &lookaheads = pair.second;
const Symbol &next_symbol = item.next_symbol();
if (!next_symbol.is_token && !next_symbol.is_built_in()) {
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
LookaheadSet next_lookaheads;
size_t next_step = item.step_index + 1;
if (next_step == item.production->size()) {

View file

@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
class LastCharacters : public CharacterAggregator<false, true> {};
class AllCharacters : public CharacterAggregator<true, true> {};
set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
set<Symbol::Index> result;
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
set<Symbol> result;
AllCharacters all_separator_characters;
for (const rule_ptr &separator : grammar.separators)
@ -79,7 +79,7 @@ set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
!all_characters.result.intersects(all_separator_characters.result);
if ((has_distinct_start && has_distinct_end) || has_no_separators)
result.insert(i);
result.insert(Symbol(i, Symbol::Terminal));
}
return result;

View file

@ -11,7 +11,7 @@ struct LexicalGrammar;
namespace build_tables {
std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -11,9 +11,11 @@
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/util/string_helpers.h"
#include "tree_sitter/runtime.h"
namespace tree_sitter {
namespace generate_code {
using std::function;
using std::map;
using std::pair;
@ -22,6 +24,7 @@ using std::string;
using std::to_string;
using std::vector;
using util::escape_char;
using rules::Symbol;
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
@ -73,9 +76,8 @@ class CCodeGenerator {
const LexicalGrammar lexical_grammar;
map<string, string> sanitized_names;
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
size_t next_in_progress_symbol_list_index;
public:
CCodeGenerator(string name, const ParseTable &parse_table,
@ -87,19 +89,26 @@ class CCodeGenerator {
lex_table(lex_table),
syntax_grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
next_parse_action_list_index(0),
next_in_progress_symbol_list_index(0) {}
next_parse_action_list_index(0) {}
string code() {
buffer = "";
add_includes();
add_state_and_symbol_counts();
add_warning_pragma();
add_stats();
add_symbol_enum();
add_symbol_names_list();
add_symbol_node_types_list();
add_symbol_metadata_list();
add_lex_function();
add_lex_states_list();
add_lex_modes_list();
if (!syntax_grammar.external_tokens.empty()) {
add_external_token_enum();
add_external_scanner_symbol_map();
add_external_scanner_states_list();
}
add_parse_table();
add_parser_export();
@ -112,10 +121,25 @@ class CCodeGenerator {
line();
}
void add_state_and_symbol_counts() {
void add_warning_pragma() {
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
}
void add_stats() {
size_t token_count = 1 + lexical_grammar.variables.size();
for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
if (external_token.corresponding_internal_token == rules::NONE()) {
token_count++;
}
}
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
line("#define TOKEN_COUNT " + to_string(token_count));
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
line();
}
@ -124,7 +148,7 @@ class CCodeGenerator {
indent([&]() {
size_t i = 1;
for (const auto &entry : parse_table.symbols) {
const rules::Symbol &symbol = entry.first;
const Symbol &symbol = entry.first;
if (!symbol.is_built_in()) {
line(symbol_id(symbol) + " = " + to_string(i) + ",");
i++;
@ -146,11 +170,11 @@ class CCodeGenerator {
line();
}
void add_symbol_node_types_list() {
void add_symbol_metadata_list() {
line("static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {");
indent([&]() {
for (const auto &entry : parse_table.symbols) {
const rules::Symbol &symbol = entry.first;
const Symbol &symbol = entry.first;
line("[" + symbol_id(symbol) + "] = {");
indent([&]() {
switch (symbol_type(symbol)) {
@ -198,13 +222,102 @@ class CCodeGenerator {
line();
}
void add_lex_states_list() {
line("static TSStateId ts_lex_states[STATE_COUNT] = {");
void add_lex_modes_list() {
add_external_scanner_state({});
map<Symbol::Index, Symbol::Index> external_tokens_by_corresponding_internal_token;
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
for (size_t j = 0; j < syntax_grammar.external_tokens.size(); j++) {
const ExternalToken &external_token = syntax_grammar.external_tokens[j];
if (external_token.corresponding_internal_token.index == Symbol::Index(i)) {
external_tokens_by_corresponding_internal_token.insert({i, j});
break;
}
}
}
line("static TSLexMode ts_lex_modes[STATE_COUNT] = {");
indent([&]() {
size_t state_id = 0;
for (const auto &state : parse_table.states)
line("[" + to_string(state_id++) + "] = " +
to_string(state.lex_state_id) + ",");
for (const auto &state : parse_table.states) {
line("[" + to_string(state_id++) + "] = {.lex_state = ");
add(to_string(state.lex_state_id));
bool needs_external_scanner = false;
set<Symbol::Index> external_token_indices;
for (const auto &pair : state.terminal_entries) {
Symbol symbol = pair.first;
if (symbol.is_external()) {
needs_external_scanner = true;
external_token_indices.insert(symbol.index);
} else if (symbol.is_token()) {
auto corresponding_external_token =
external_tokens_by_corresponding_internal_token.find(symbol.index);
if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) {
external_token_indices.insert(corresponding_external_token->second);
}
}
}
if (needs_external_scanner) {
add(", .external_lex_state = " + add_external_scanner_state(external_token_indices));
}
add("},");
}
});
line("};");
line();
}
string add_external_scanner_state(set<Symbol::Index> external_token_ids) {
for (size_t i = 0, n = external_scanner_states.size(); i < n; i++)
if (external_scanner_states[i] == external_token_ids)
return to_string(i);
external_scanner_states.push_back(external_token_ids);
return to_string(external_scanner_states.size() - 1);
}
void add_external_token_enum() {
line("enum {");
indent([&]() {
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++)
line(external_token_id(i) + ",");
});
line("};");
line();
}
void add_external_scanner_symbol_map() {
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
indent([&]() {
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
}
});
line("};");
line();
}
void add_external_scanner_states_list() {
line("static bool ts_external_scanner_states[");
add(to_string(external_scanner_states.size()));
add("][EXTERNAL_TOKEN_COUNT] = {");
indent([&]() {
size_t i = 0;
for (const auto &valid_external_lookaheads : external_scanner_states) {
if (!valid_external_lookaheads.empty()) {
line("[" + to_string(i) + "] = {");
indent([&]() {
for (Symbol::Index id : valid_external_lookaheads) {
line("[" + external_token_id(id) + "] = true,");
}
});
line("},");
}
i++;
}
});
line("};");
line();
@ -214,9 +327,6 @@ class CCodeGenerator {
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
size_t state_id = 0;
line("#pragma GCC diagnostic push");
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
line();
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
indent([&]() {
@ -224,12 +334,12 @@ class CCodeGenerator {
line("[" + to_string(state_id++) + "] = {");
indent([&]() {
for (const auto &entry : state.nonterminal_entries) {
line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
add(to_string(entry.second));
add("),");
}
for (const auto &entry : state.terminal_entries) {
line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
line("[" + symbol_id(entry.first) + "] = ACTIONS(");
add(to_string(add_parse_action_list_id(entry.second)));
add("),");
}
@ -242,12 +352,42 @@ class CCodeGenerator {
line();
add_parse_action_list();
line();
line("#pragma GCC diagnostic pop");
line();
}
void add_parser_export() {
line("EXPORT_LANGUAGE(ts_language_" + name + ");");
string language_function_name = "tree_sitter_" + name;
string external_scanner_name = language_function_name + "_external_scanner";
if (!syntax_grammar.external_tokens.empty()) {
line("void *" + external_scanner_name + "_create();");
line("void " + external_scanner_name + "_destroy();");
line("void " + external_scanner_name + "_reset(void *);");
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);");
line("void " + external_scanner_name + "_deserialize(void *, const TSExternalTokenState);");
line();
}
line("const TSLanguage *" + language_function_name + "() {");
indent([&]() {
line("GET_LANGUAGE(");
if (syntax_grammar.external_tokens.empty()) {
add(");");
} else {
indent([&]() {
line("(const bool *)ts_external_scanner_states,");
line("ts_external_scanner_symbol_map,");
line(external_scanner_name + "_create,");
line(external_scanner_name + "_destroy,");
line(external_scanner_name + "_reset,");
line(external_scanner_name + "_scan,");
line(external_scanner_name + "_serialize,");
line(external_scanner_name + "_deserialize,");
});
line(");");
}
});
line("}");
line();
}
@ -379,22 +519,13 @@ class CCodeGenerator {
return result;
}
size_t add_in_progress_symbol_list_id(const set<rules::Symbol> &symbols) {
for (const auto &pair : in_progress_symbols) {
if (pair.second == symbols) {
return pair.first;
}
}
size_t result = next_in_progress_symbol_list_index;
in_progress_symbols.push_back({ result, symbols });
next_in_progress_symbol_list_index += 1 + symbols.size();
return result;
}
// Helper functions
string symbol_id(const rules::Symbol &symbol) {
string external_token_id(Symbol::Index index) {
return "ts_external_token_" + syntax_grammar.external_tokens[index].name;
}
string symbol_id(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return "ts_builtin_sym_end";
@ -411,25 +542,33 @@ class CCodeGenerator {
}
}
string symbol_name(const rules::Symbol &symbol) {
string symbol_name(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return "END";
return entry_for_symbol(symbol).first;
}
VariableType symbol_type(const rules::Symbol &symbol) {
VariableType symbol_type(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return VariableTypeHidden;
return entry_for_symbol(symbol).second;
}
pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
if (symbol.is_token) {
const Variable &variable = lexical_grammar.variables[symbol.index];
return { variable.name, variable.type };
} else {
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
return { variable.name, variable.type };
pair<string, VariableType> entry_for_symbol(const Symbol &symbol) {
switch (symbol.type) {
case Symbol::NonTerminal: {
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
return { variable.name, variable.type };
}
case Symbol::Terminal: {
const Variable &variable = lexical_grammar.variables[symbol.index];
return { variable.name, variable.type };
}
case Symbol::External:
default: {
const ExternalToken &token = syntax_grammar.external_tokens[symbol.index];
return { token.name, token.type };
}
}
}

View file

@ -12,6 +12,7 @@ struct Grammar {
std::vector<std::pair<std::string, rule_ptr>> rules;
std::vector<rule_ptr> extra_tokens;
std::vector<std::vector<std::string>> expected_conflicts;
std::vector<std::string> external_tokens;
};
} // namespace tree_sitter

View file

@ -210,7 +210,7 @@ ParseGrammarResult parse_grammar(const string &input) {
string error_message;
string name;
Grammar grammar;
json_value name_json, rules_json, extras_json, conflicts_json;
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
char parse_error[json_error_max];
@ -302,6 +302,25 @@ ParseGrammarResult parse_grammar(const string &input) {
}
}
external_tokens_json = grammar_json->operator[]("externals");
if (external_tokens_json.type != json_none) {
if (external_tokens_json.type != json_array) {
error_message = "External tokens must be an array";
goto error;
}
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
json_value *token_name_json = external_tokens_json.u.array.values[i];
if (token_name_json->type != json_string) {
error_message = "External token values must be strings";
goto error;
}
string token_name = token_name_json->u.string.ptr;
grammar.external_tokens.push_back(token_name);
}
}
json_value_free(grammar_json);
return { name, grammar, "" };

View file

@ -1,6 +1,7 @@
#include "compiler/parse_table.h"
#include <string>
#include "compiler/precedence_range.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
@ -28,7 +29,7 @@ ParseAction::ParseAction()
extra(false),
fragile(false),
state_index(-1),
symbol(Symbol(-1)),
symbol(rules::NONE()),
consumed_symbol_count(0),
production(nullptr) {}
@ -43,11 +44,11 @@ ParseAction ParseAction::Accept() {
}
ParseAction ParseAction::Shift(ParseStateId state_index) {
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr);
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
}
ParseAction ParseAction::Recover(ParseStateId state_index) {
return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0,
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
nullptr);
}
@ -150,9 +151,7 @@ bool ParseState::has_shift_action() const {
set<Symbol> ParseState::expected_inputs() const {
set<Symbol> result;
for (auto &entry : terminal_entries)
result.insert(Symbol(entry.first, true));
for (auto &entry : nonterminal_entries)
result.insert(Symbol(entry.first, false));
result.insert(entry.first);
return result;
}
@ -182,33 +181,24 @@ ParseStateId ParseTable::add_state() {
return states.size() - 1;
}
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
Symbol::Index index,
ParseAction action) {
states[state_id].terminal_entries[index].actions.clear();
return add_terminal_action(state_id, index, action);
}
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
Symbol::Index index,
Symbol lookahead,
ParseAction action) {
Symbol symbol(index, true);
if (action.type == ParseActionTypeShift && action.extra)
symbols[symbol].extra = true;
symbols[lookahead].extra = true;
else
symbols[symbol].structural = true;
symbols[lookahead].structural = true;
ParseTableEntry &entry = states[state_id].terminal_entries[index];
ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
entry.actions.push_back(action);
return *entry.actions.rbegin();
}
void ParseTable::set_nonterminal_action(ParseStateId state_id,
Symbol::Index index,
Symbol::Index lookahead,
ParseStateId next_state_id) {
Symbol symbol(index, false);
symbols[symbol].structural = true;
states[state_id].nonterminal_entries[index] = next_state_id;
symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
states[state_id].nonterminal_entries[lookahead] = next_state_id;
}
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
@ -226,12 +216,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
return false;
for (auto &entry : state.terminal_entries) {
Symbol::Index index = entry.first;
Symbol lookahead = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
const auto &other_entry = other.terminal_entries.find(index);
const auto &other_entry = other.terminal_entries.find(lookahead);
if (other_entry == other.terminal_entries.end()) {
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
@ -242,25 +232,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
}
}
set<Symbol::Index> symbols_to_merge;
set<Symbol> symbols_to_merge;
for (auto &entry : other.terminal_entries) {
Symbol::Index index = entry.first;
Symbol lookahead = entry.first;
const vector<ParseAction> &actions = entry.second.actions;
if (!state.terminal_entries.count(index)) {
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
if (!state.terminal_entries.count(lookahead)) {
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
return false;
if (actions.back().type != ParseActionTypeReduce)
return false;
if (!has_entry(state, entry.second))
return false;
symbols_to_merge.insert(index);
symbols_to_merge.insert(lookahead);
}
}
for (const Symbol::Index &index : symbols_to_merge)
state.terminal_entries[index] = other.terminal_entries.find(index)->second;
for (const Symbol &lookahead : symbols_to_merge)
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
return true;
}

View file

@ -76,7 +76,7 @@ class ParseState {
void each_referenced_state(std::function<void(ParseStateId *)>);
bool has_shift_action() const;
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
LexStateId lex_state_id;
size_t shift_actions_signature;
@ -91,15 +91,14 @@ class ParseTable {
public:
std::set<rules::Symbol> all_symbols() const;
ParseStateId add_state();
ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
bool merge_state(size_t i, size_t j);
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::set<rules::Symbol::Index> mergeable_symbols;
std::set<rules::Symbol> mergeable_symbols;
};
} // namespace tree_sitter

View file

@ -39,7 +39,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
rule_ptr inner_rule = apply(rule->content);
size_t index = aux_rules.size();
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
Symbol repeat_symbol(offset + index);
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
existing_repeats.push_back({ rule->copy(), repeat_symbol });
aux_rules.push_back(
Variable(helper_rule_name, VariableTypeAuxiliary,
@ -65,6 +65,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
result.variables = grammar.variables;
result.extra_tokens = grammar.extra_tokens;
result.expected_conflicts = grammar.expected_conflicts;
result.external_tokens = grammar.external_tokens;
ExpandRepeats expander(result.variables.size());
for (auto &variable : result.variables)

View file

@ -38,7 +38,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
map<Symbol, Symbol> replacements;
Symbol replace_symbol(const Symbol &symbol) {
if (symbol.is_built_in() || symbol.is_token)
if (!symbol.is_non_terminal())
return symbol;
auto replacement_pair = replacements.find(symbol);
@ -49,7 +49,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
for (const auto &pair : replacements)
if (pair.first.index < symbol.index)
new_index--;
return Symbol(new_index);
return Symbol(new_index, Symbol::NonTerminal);
}
};
@ -60,14 +60,14 @@ class TokenExtractor : public rules::IdentityRuleFn {
for (size_t i = 0; i < tokens.size(); i++)
if (tokens[i].rule->operator==(*input)) {
token_usage_counts[i]++;
return make_shared<Symbol>(i, true);
return make_shared<Symbol>(i, Symbol::Terminal);
}
rule_ptr rule = input->copy();
size_t index = tokens.size();
tokens.push_back(Variable(token_description(rule), entry_type, rule));
token_usage_counts.push_back(1);
return make_shared<Symbol>(index, true);
return make_shared<Symbol>(index, Symbol::Terminal);
}
rule_ptr apply_to(const rules::String *rule) {
@ -90,9 +90,8 @@ class TokenExtractor : public rules::IdentityRuleFn {
vector<Variable> tokens;
};
static CompileError ubiq_token_err(const string &message) {
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
"Not a token: " + message);
static CompileError extra_token_error(const string &message) {
return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message);
}
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
@ -122,11 +121,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
size_t i = 0;
for (const Variable &variable : processed_variables) {
auto symbol = variable.rule->as<Symbol>();
if (symbol && symbol->is_token && !symbol->is_built_in() &&
extractor.token_usage_counts[symbol->index] == 1) {
if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
lexical_grammar.variables[symbol->index].type = variable.type;
lexical_grammar.variables[symbol->index].name = variable.name;
symbol_replacer.replacements.insert({ Symbol(i), *symbol });
symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
} else {
syntax_grammar.variables.push_back(variable);
}
@ -158,7 +156,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
bool used_elsewhere_in_grammar = false;
for (const Variable &variable : lexical_grammar.variables) {
if (variable.rule->operator==(*rule)) {
syntax_grammar.extra_tokens.insert(Symbol(i, true));
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
used_elsewhere_in_grammar = true;
}
i++;
@ -175,17 +173,39 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
auto symbol = rule->as<Symbol>();
if (!symbol)
return make_tuple(syntax_grammar, lexical_grammar,
ubiq_token_err(rule->to_string()));
extra_token_error(rule->to_string()));
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
if (!new_symbol.is_token)
if (new_symbol.is_non_terminal()) {
return make_tuple(
syntax_grammar, lexical_grammar,
ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
extra_token_error(syntax_grammar.variables[new_symbol.index].name));
}
syntax_grammar.extra_tokens.insert(new_symbol);
}
for (const ExternalToken &external_token : grammar.external_tokens) {
Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token);
if (internal_token.is_non_terminal()) {
return make_tuple(
syntax_grammar,
lexical_grammar,
CompileError(
TSCompileErrorTypeInvalidExternalToken,
"Name '" + external_token.name + "' cannot be used for both an external token and a non-terminal rule"
)
);
}
syntax_grammar.external_tokens.push_back({
external_token.name,
external_token.type,
internal_token
});
}
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
}

View file

@ -92,6 +92,7 @@ pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &gr
SyntaxGrammar result;
result.expected_conflicts = grammar.expected_conflicts;
result.extra_tokens = grammar.extra_tokens;
result.external_tokens = grammar.external_tokens;
bool is_start = true;
for (const Variable &variable : grammar.variables) {

View file

@ -1,13 +1,12 @@
#ifndef COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
#define COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
#include <vector>
#include <string>
#include <set>
#include <vector>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
#include "compiler/variable.h"
#include "compiler/syntax_grammar.h"
#include "compiler/variable.h"
namespace tree_sitter {
namespace prepare_grammar {
@ -16,6 +15,7 @@ struct InitialSyntaxGrammar {
std::vector<Variable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};
} // namespace prepare_grammar

View file

@ -8,6 +8,7 @@
#include "compiler/rules/blank.h"
#include "compiler/rules/named_symbol.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
namespace tree_sitter {
namespace prepare_grammar {
@ -17,8 +18,9 @@ using std::vector;
using std::set;
using std::pair;
using std::make_shared;
using rules::Symbol;
class InternSymbols : public rules::IdentityRuleFn {
class SymbolInterner : public rules::IdentityRuleFn {
using rules::IdentityRuleFn::apply_to;
rule_ptr apply_to(const rules::NamedSymbol *rule) {
@ -34,11 +36,14 @@ class InternSymbols : public rules::IdentityRuleFn {
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
for (size_t i = 0; i < grammar.rules.size(); i++)
if (grammar.rules[i].first == rule_name)
return make_shared<rules::Symbol>(i);
return make_shared<Symbol>(i, Symbol::NonTerminal);
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
if (grammar.external_tokens[i] == rule_name)
return make_shared<rules::Symbol>(i, Symbol::External);
return nullptr;
}
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {}
const Grammar grammar;
string missing_rule_name;
};
@ -50,16 +55,35 @@ CompileError missing_rule_error(string rule_name) {
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
InternedGrammar result;
InternSymbols interner(grammar);
for (auto &external_token_name : grammar.external_tokens) {
Symbol corresponding_internal_token = rules::NONE();
for (size_t i = 0, n = grammar.rules.size(); i < n; i++) {
if (grammar.rules[i].first == external_token_name) {
corresponding_internal_token = Symbol(i, Symbol::NonTerminal);
break;
}
}
result.external_tokens.push_back(ExternalToken{
external_token_name,
external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
corresponding_internal_token
});
}
SymbolInterner interner(grammar);
for (auto &pair : grammar.rules) {
auto new_rule = interner.apply(pair.second);
if (!interner.missing_rule_name.empty())
return { result, missing_rule_error(interner.missing_rule_name) };
result.variables.push_back(Variable(
pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
new_rule));
result.variables.push_back(Variable{
pair.first,
pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
new_rule
});
}
for (auto &rule : grammar.extra_tokens) {

View file

@ -15,6 +15,7 @@ struct InternedGrammar {
std::vector<Variable> variables;
std::vector<rule_ptr> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};
} // namespace prepare_grammar

View file

@ -4,15 +4,15 @@ namespace tree_sitter {
namespace rules {
Symbol END_OF_INPUT() {
return Symbol(-1, true);
return Symbol(-1, Symbol::Terminal);
}
Symbol START() {
return Symbol(-2);
return Symbol(-2, Symbol::NonTerminal);
}
Symbol NONE() {
return Symbol(-3);
return Symbol(-3, Symbol::Type(-1));
}
} // namespace rules

View file

@ -11,12 +11,10 @@ using std::string;
using std::to_string;
using util::hash_combine;
Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
bool Symbol::operator==(const Symbol &other) const {
return (other.index == index) && (other.is_token == is_token);
return (other.index == index) && (other.type == type);
}
bool Symbol::operator==(const Rule &rule) const {
@ -27,7 +25,7 @@ bool Symbol::operator==(const Rule &rule) const {
size_t Symbol::hash_code() const {
size_t result = 0;
hash_combine(&result, index);
hash_combine(&result, is_token);
hash_combine<int>(&result, type);
return result;
}
@ -36,14 +34,22 @@ rule_ptr Symbol::copy() const {
}
string Symbol::to_string() const {
string name = is_token ? "token" : "sym";
return "(" + name + " " + std::to_string(index) + ")";
switch (type) {
case Symbol::Terminal:
return "(terminal " + std::to_string(index) + ")";
case Symbol::NonTerminal:
return "(non-terminal " + std::to_string(index) + ")";
case Symbol::External:
return "(external " + std::to_string(index) + ")";
default:
return "(none)";
}
}
bool Symbol::operator<(const Symbol &other) const {
if (is_token && !other.is_token)
if (type < other.type)
return true;
if (!is_token && other.is_token)
if (other.type < type)
return false;
return (index < other.index);
}
@ -56,6 +62,18 @@ bool Symbol::is_built_in() const {
return is_built_in(index);
}
bool Symbol::is_token() const {
return type == Symbol::Terminal;
}
bool Symbol::is_external() const {
return type == Symbol::External;
}
bool Symbol::is_non_terminal() const {
return type == Symbol::NonTerminal;
}
void Symbol::accept(Visitor *visitor) const {
visitor->visit(this);
}

View file

@ -11,9 +11,13 @@ class Symbol : public Rule {
public:
typedef int Index;
typedef enum {
External,
Terminal,
NonTerminal,
} Type;
explicit Symbol(Index index);
Symbol(Index index, bool is_token);
Symbol(Index index, Type type);
bool operator==(const Symbol &other) const;
bool operator==(const Rule &other) const;
@ -26,9 +30,12 @@ class Symbol : public Rule {
bool operator<(const Symbol &other) const;
static bool is_built_in(Index);
bool is_built_in() const;
bool is_token() const;
bool is_external() const;
bool is_non_terminal() const;
Index index;
bool is_token;
Type type;
};
} // namespace rules

View file

@ -16,6 +16,7 @@ class String;
class Symbol;
class Pattern;
class Metadata;
class ExternalToken;
class Visitor {
public:
@ -29,6 +30,7 @@ class Visitor {
virtual void visit(const String *rule) = 0;
virtual void visit(const NamedSymbol *rule) = 0;
virtual void visit(const Symbol *rule) = 0;
virtual void visit(const ExternalToken *rule) = 0;
virtual ~Visitor();
};
@ -86,6 +88,10 @@ class RuleFn : private Visitor {
return default_apply((const Rule *)rule);
}
virtual T apply_to(const ExternalToken *rule) {
return default_apply((const Rule *)rule);
}
void visit(const Blank *rule) {
value_ = apply_to(rule);
}
@ -126,6 +132,10 @@ class RuleFn : private Visitor {
value_ = apply_to(rule);
}
void visit(const ExternalToken *rule) {
value_ = apply_to(rule);
}
private:
T value_;
};
@ -170,6 +180,9 @@ class RuleFn<void> : private Visitor {
virtual void apply_to(const Symbol *rule) {
return default_apply((const Rule *)rule);
}
virtual void apply_to(const ExternalToken *rule) {
return default_apply((const Rule *)rule);
}
void visit(const Blank *rule) {
apply_to(rule);
@ -201,6 +214,9 @@ class RuleFn<void> : private Visitor {
void visit(const Symbol *rule) {
apply_to(rule);
}
void visit(const ExternalToken *rule) {
apply_to(rule);
}
};
class IdentityRuleFn : public RuleFn<rule_ptr> {

View file

@ -13,8 +13,6 @@ using std::pair;
using std::vector;
using std::set;
static const vector<Production> NO_PRODUCTIONS;
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
const vector<Production> &productions)
: name(name), productions(productions), type(type) {}
@ -23,18 +21,14 @@ ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
rules::Associativity associativity)
: symbol(symbol), precedence(precedence), associativity(associativity) {}
bool ExternalToken::operator==(const ExternalToken &other) const {
return name == other.name && type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
associativity == other.associativity;
}
const vector<Production> &SyntaxGrammar::productions(
const rules::Symbol &symbol) const {
if (symbol.is_built_in() || symbol.is_token) {
return NO_PRODUCTIONS;
} else {
return variables[symbol.index].productions;
}
}
} // namespace tree_sitter

View file

@ -10,6 +10,14 @@
namespace tree_sitter {
struct ExternalToken {
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
bool operator==(const ExternalToken &) const;
};
struct ProductionStep {
ProductionStep(const rules::Symbol &, int, rules::Associativity);
bool operator==(const ProductionStep &) const;
@ -33,11 +41,10 @@ struct SyntaxVariable {
typedef std::set<rules::Symbol> ConflictSet;
struct SyntaxGrammar {
const std::vector<Production> &productions(const rules::Symbol &) const;
std::vector<SyntaxVariable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::vector<ExternalToken> external_tokens;
};
} // namespace tree_sitter

View file

@ -36,8 +36,9 @@ const TSLanguage *ts_document_language(TSDocument *self) {
}
void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
if (language->version != TREE_SITTER_LANGUAGE_VERSION) return;
ts_document_invalidate(self);
self->parser.language = language;
parser_set_language(&self->parser, language);
if (self->tree) {
ts_tree_release(self->tree);
self->tree = NULL;

View file

@ -34,6 +34,10 @@ uint32_t ts_language_symbol_count(const TSLanguage *language) {
return language->symbol_count;
}
uint32_t ts_language_version(const TSLanguage *language) {
return language->version;
}
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
TSSymbol symbol) {
if (symbol == ts_builtin_sym_error)

View file

@ -19,6 +19,10 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
return 0 < symbol && symbol < self->external_token_count + 1;
}
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
TSStateId state,
TSSymbol symbol,
@ -49,6 +53,16 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
}
}
static inline const bool *
ts_language_enabled_external_tokens(const TSLanguage *self,
unsigned external_scanner_state) {
if (external_scanner_state == 0) {
return NULL;
} else {
return self->external_scanner.states + self->external_token_count * external_scanner_state;
}
}
#ifdef __cplusplus
}
#endif

View file

@ -21,12 +21,11 @@ static inline void length_set_unknown_chars(Length *self) {
}
static inline Length length_min(Length len1, Length len2) {
return (len1.chars < len2.chars) ? len1 : len2;
return (len1.bytes < len2.bytes) ? len1 : len2;
}
static inline Length length_add(Length len1, Length len2) {
Length result;
result.chars = len1.chars + len2.chars;
result.bytes = len1.bytes + len2.bytes;
result.extent = point_add(len1.extent, len2.extent);
@ -57,10 +56,4 @@ static inline Length length_zero() {
return (Length){ 0, 0, {0, 0} };
}
static inline bool length_eq(Length self, Length other) {
return self.bytes == other.bytes && self.chars == other.chars &&
self.extent.row == other.extent.row &&
self.extent.column == other.extent.column;
}
#endif

View file

@ -11,11 +11,8 @@
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
}
#define LOG_LOOKAHEAD() \
LOG((0 < self->data.lookahead && self->data.lookahead < 256) \
? "lookahead char:'%c'" \
: "lookahead char:%d", \
self->data.lookahead);
#define LOG_CHARACTER(message, character) \
LOG(character < 255 ? message " character:'%c'" : message " character:%d", character)
static const char empty_chunk[2] = { 0, 0 };
@ -42,11 +39,9 @@ static void ts_lexer__get_lookahead(Lexer *self) {
utf8proc_iterate(chunk, size, &self->data.lookahead);
else
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
LOG_LOOKAHEAD();
}
static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
static void ts_lexer__advance(void *payload, bool skip) {
Lexer *self = (Lexer *)payload;
if (self->chunk == empty_chunk)
return;
@ -63,10 +58,10 @@ static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
}
if (skip) {
LOG("skip_separator state:%d", state);
LOG_CHARACTER("skip", self->data.lookahead);
self->token_start_position = self->current_position;
} else {
LOG("advance state:%d", state);
LOG_CHARACTER("consume", self->data.lookahead);
}
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
@ -93,6 +88,7 @@ void ts_lexer_init(Lexer *self) {
.payload = NULL,
.log = NULL
},
.last_external_token_state = NULL,
};
ts_lexer_reset(self, length_zero());
}
@ -115,17 +111,16 @@ static inline void ts_lexer__reset(Lexer *self, Length position) {
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
ts_lexer__reset(self, length_zero());
self->last_external_token_state = NULL;
}
void ts_lexer_reset(Lexer *self, Length position) {
if (!length_eq(position, self->current_position))
if (position.bytes != self->current_position.bytes) {
ts_lexer__reset(self, position);
return;
}
}
void ts_lexer_start(Lexer *self, TSStateId lex_state) {
LOG("start_lex state:%d, pos:%u", lex_state, self->current_position.chars);
void ts_lexer_start(Lexer *self) {
self->token_start_position = self->current_position;
self->data.result_symbol = 0;

View file

@ -25,12 +25,13 @@ typedef struct {
TSInput input;
TSLogger logger;
char debug_buffer[TS_DEBUG_BUFFER_SIZE];
const TSExternalTokenState *last_external_token_state;
} Lexer;
void ts_lexer_init(Lexer *);
void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *, TSStateId);
void ts_lexer_start(Lexer *);
#ifdef __cplusplus
}

View file

@ -39,7 +39,15 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
static inline uint32_t ts_node__relevant_child_count(TSNode self,
bool include_anonymous) {
const Tree *tree = ts_node__tree(self);
return include_anonymous ? tree->visible_child_count : tree->named_child_count;
if (tree->child_count > 0) {
if (include_anonymous) {
return tree->visible_child_count;
} else {
return tree->named_child_count;
}
} else {
return 0;
}
}
static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) {
@ -324,11 +332,21 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
}
uint32_t ts_node_child_count(TSNode self) {
return ts_node__tree(self)->visible_child_count;
const Tree *tree = ts_node__tree(self);
if (tree->child_count > 0) {
return tree->visible_child_count;
} else {
return 0;
}
}
uint32_t ts_node_named_child_count(TSNode self) {
return ts_node__tree(self)->named_child_count;
const Tree *tree = ts_node__tree(self);
if (tree->child_count > 0) {
return tree->named_child_count;
} else {
return 0;
}
}
TSNode ts_node_next_sibling(TSNode self) {

View file

@ -109,28 +109,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
return did_break_down;
}
static void parser__pop_reusable_node(ReusableNode *reusable_node) {
reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree);
while (reusable_node->tree) {
Tree *parent = reusable_node->tree->context.parent;
uint32_t next_index = reusable_node->tree->context.index + 1;
if (parent && parent->child_count > next_index) {
reusable_node->tree = parent->children[next_index];
return;
}
reusable_node->tree = parent;
}
}
static bool parser__breakdown_reusable_node(ReusableNode *reusable_node) {
if (reusable_node->tree->child_count == 0) {
return false;
} else {
reusable_node->tree = reusable_node->tree->children[0];
return true;
}
}
static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
TSStateId state,
ReusableNode *reusable_node) {
@ -140,12 +118,11 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
reusable_node->tree->fragile_left ||
reusable_node->tree->fragile_right)) {
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
parser__breakdown_reusable_node(reusable_node);
reusable_node_breakdown(reusable_node);
result = true;
}
if (result) {
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
ts_tree_release(*lookahead);
ts_tree_retain(*lookahead = reusable_node->tree);
}
@ -153,16 +130,20 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
return result;
}
static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
while (reusable_node->tree->child_count > 0)
reusable_node->tree = reusable_node->tree->children[0];
parser__pop_reusable_node(reusable_node);
static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) {
return self.lex_state == other.lex_state &&
self.external_lex_state == other.external_lex_state;
}
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
TableEntry *table_entry) {
if (tree->first_leaf.lex_state == self->language->lex_states[state])
TSLexMode current_lex_mode = self->language->lex_modes[state];
if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode))
return true;
if (current_lex_mode.external_lex_state != 0)
return false;
if (tree->size.bytes == 0)
return false;
if (!table_entry->is_reusable)
return false;
if (!table_entry->depends_on_lookahead)
@ -208,28 +189,76 @@ static bool parser__condense_stack(Parser *self) {
return result;
}
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
TSStateId start_state = self->language->lex_states[parse_state];
TSStateId current_state = start_state;
Length start_position = self->lexer.current_position;
LOG("lex state:%d", start_state);
static void parser__restore_external_scanner(Parser *self, StackVersion version) {
const TSExternalTokenState *state = ts_stack_external_token_state(self->stack, version);
if (self->lexer.last_external_token_state != state) {
LOG("restore_external_scanner");
self->lexer.last_external_token_state = state;
if (state) {
self->language->external_scanner.deserialize(
self->external_scanner_payload,
*state
);
} else {
self->language->external_scanner.reset(self->external_scanner_payload);
}
}
}
static Tree *parser__lex(Parser *self, StackVersion version) {
TSStateId parse_state = ts_stack_top_state(self->stack, version);
Length start_position = ts_stack_top_position(self->stack, version);
TSLexMode lex_mode = self->language->lex_modes[parse_state];
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_lex_state
);
bool found_external_token = false;
bool found_error = false;
bool skipped_error = false;
int32_t first_error_character = 0;
Length error_start_position, error_end_position;
ts_lexer_reset(&self->lexer, start_position);
ts_lexer_start(&self->lexer, start_state);
for (;;) {
Length current_position = self->lexer.current_position;
while (!self->language->lex_fn(&self->lexer.data, current_state)) {
if (current_state != ERROR_STATE) {
if (valid_external_tokens) {
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
current_position.extent.row, current_position.extent.column);
parser__restore_external_scanner(self, version);
ts_lexer_start(&self->lexer);
if (self->language->external_scanner.scan(self->external_scanner_payload,
&self->lexer.data, valid_external_tokens)) {
found_external_token = true;
break;
}
ts_lexer_reset(&self->lexer, current_position);
}
LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
current_position.extent.row, current_position.extent.column);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
break;
}
if (!found_error) {
LOG("retry_in_error_mode");
current_state = ERROR_STATE;
found_error = true;
lex_mode = self->language->lex_modes[ERROR_STATE];
valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_lex_state
);
ts_lexer_reset(&self->lexer, start_position);
ts_lexer_start(&self->lexer, current_state);
continue;
}
if (!skipped_error) {
LOG("skip_unrecognized_character");
skipped_error = true;
error_start_position = self->lexer.token_start_position;
first_error_character = self->lexer.data.lookahead;
}
@ -239,15 +268,13 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
self->lexer.data.result_symbol = ts_builtin_sym_error;
break;
}
self->lexer.data.advance(&self->lexer, ERROR_STATE, false);
self->lexer.data.advance(&self->lexer, false);
}
skipped_error = true;
error_end_position = self->lexer.current_position;
}
Tree *result;
if (skipped_error) {
Length padding = length_sub(error_start_position, start_position);
Length size = length_sub(error_end_position, error_start_position);
@ -255,20 +282,28 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
result = ts_tree_make_error(size, padding, first_error_character);
} else {
TSSymbol symbol = self->lexer.data.result_symbol;
Length padding =
length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position,
self->lexer.token_start_position);
result =
ts_tree_make_leaf(symbol, padding, size,
ts_language_symbol_metadata(self->language, symbol));
if (found_external_token) {
symbol = self->language->external_scanner.symbol_map[symbol];
}
Length padding = length_sub(self->lexer.token_start_position, start_position);
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
result = ts_tree_make_leaf(symbol, padding, size, metadata);
if (found_external_token) {
result->has_external_tokens = true;
result->has_external_token_state = true;
memset(result->external_token_state, 0, sizeof(TSExternalTokenState));
self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state);
self->lexer.last_external_token_state = &result->external_token_state;
}
}
if (!result)
return NULL;
result->parse_state = parse_state;
result->first_leaf.lex_state = start_state;
result->first_leaf.lex_mode = lex_mode;
LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes);
return result;
}
@ -277,21 +312,31 @@ static void parser__clear_cached_token(Parser *self) {
self->cached_token = NULL;
}
static inline bool ts_external_token_state_eq(const TSExternalTokenState *self,
const TSExternalTokenState *other) {
if (self == other) {
return true;
} else if (!self || !other) {
return false;
} else {
return memcmp(self, other, sizeof(TSExternalTokenState)) == 0;
}
}
static Tree *parser__get_lookahead(Parser *self, StackVersion version,
ReusableNode *reusable_node) {
ReusableNode *reusable_node,
bool *is_fresh) {
Length position = ts_stack_top_position(self->stack, version);
while (reusable_node->tree) {
if (reusable_node->byte_index > position.bytes) {
LOG("before_reusable sym:%s, pos:%u",
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
break;
}
if (reusable_node->byte_index < position.bytes) {
LOG("past_reusable sym:%s, pos:%u",
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
parser__pop_reusable_node(reusable_node);
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
reusable_node_pop(reusable_node);
continue;
}
@ -299,8 +344,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
LOG("cant_reuse_changed tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!parser__breakdown_reusable_node(reusable_node)) {
parser__pop_reusable_node(reusable_node);
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
}
continue;
@ -310,8 +355,21 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
LOG("cant_reuse_error tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!parser__breakdown_reusable_node(reusable_node)) {
parser__pop_reusable_node(reusable_node);
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
}
continue;
}
if (!ts_external_token_state_eq(
reusable_node->preceding_external_token_state,
ts_stack_external_token_state(self->stack, version))) {
LOG("cant_reuse_external_tokens tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
}
continue;
@ -327,9 +385,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
return self->cached_token;
}
ts_lexer_reset(&self->lexer, position);
TSStateId parse_state = ts_stack_top_state(self->stack, version);
return parser__lex(self, parse_state);
*is_fresh = true;
return parser__lex(self, version);
}
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
@ -407,6 +464,10 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state,
bool is_pending = lookahead->child_count > 0;
ts_stack_push(self->stack, version, lookahead, is_pending, state);
if (lookahead->has_external_token_state) {
ts_stack_set_external_token_state(
self->stack, version, ts_tree_last_external_token_state(lookahead));
}
ts_tree_release(lookahead);
}
@ -729,9 +790,13 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
LOG("new_parse");
}
if (self->language->external_scanner.reset) {
self->language->external_scanner.reset(self->external_scanner_payload);
}
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
self->reusable_node = (ReusableNode){ previous_tree, 0 };
self->reusable_node = reusable_node_new(previous_tree);
self->cached_token = NULL;
self->finished_tree = NULL;
}
@ -950,30 +1015,29 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
static void parser__advance(Parser *self, StackVersion version,
ReusableNode *reusable_node) {
bool validated_lookahead = false;
Tree *lookahead = parser__get_lookahead(self, version, reusable_node);
Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry table_entry;
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
&table_entry);
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
if (!validated_lookahead) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
if (lookahead == reusable_node->tree)
parser__pop_reusable_node_leaf(reusable_node);
else
if (lookahead == reusable_node->tree) {
reusable_node_pop_leaf(reusable_node);
} else {
parser__clear_cached_token(self);
}
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
continue;
}
validated_lookahead = true;
LOG("lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol),
lookahead->size.bytes);
LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
}
bool reduction_stopped_at_error = false;
@ -996,12 +1060,11 @@ static void parser__advance(Parser *self, StackVersion version,
}
if (lookahead->child_count > 0) {
if (parser__breakdown_lookahead(self, &lookahead, state,
reusable_node)) {
if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
parser__pop_reusable_node(reusable_node);
reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
}
}
@ -1011,7 +1074,7 @@ static void parser__advance(Parser *self, StackVersion version,
parser__shift(self, version, next_state, lookahead, extra);
if (lookahead == reusable_node->tree)
parser__pop_reusable_node(reusable_node);
reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
return;
@ -1053,7 +1116,7 @@ static void parser__advance(Parser *self, StackVersion version,
case TSParseActionTypeRecover: {
while (lookahead->child_count > 0) {
parser__breakdown_reusable_node(reusable_node);
reusable_node_breakdown(reusable_node);
ts_tree_release(lookahead);
lookahead = reusable_node->tree;
ts_tree_retain(lookahead);
@ -1061,7 +1124,7 @@ static void parser__advance(Parser *self, StackVersion version,
parser__recover(self, version, action.params.to_state, lookahead);
if (lookahead == reusable_node->tree)
parser__pop_reusable_node(reusable_node);
reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
return;
}
@ -1103,6 +1166,18 @@ bool parser_init(Parser *self) {
return true;
}
void parser_set_language(Parser *self, const TSLanguage *language) {
if (self->external_scanner_payload && self->language->external_scanner.destroy)
self->language->external_scanner.destroy(self->external_scanner_payload);
if (language && language->external_scanner.create)
self->external_scanner_payload = language->external_scanner.create();
else
self->external_scanner_payload = NULL;
self->language = language;
}
void parser_destroy(Parser *self) {
if (self->stack)
ts_stack_delete(self->stack);
@ -1112,6 +1187,7 @@ void parser_destroy(Parser *self) {
array_delete(&self->tree_path1);
if (self->tree_path2.contents)
array_delete(&self->tree_path2);
parser_set_language(self, NULL);
}
Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
@ -1128,15 +1204,14 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
while (!ts_stack_is_halted(self->stack, version)) {
position = ts_stack_top_position(self->stack, version).chars;
if (position > last_position ||
(version > 0 && position == last_position))
if (position > last_position || (version > 0 && position == last_position))
break;
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
version, ts_stack_version_count(self->stack),
ts_stack_top_state(self->stack, version),
ts_stack_top_position(self->stack, version).extent.row + 1,
ts_stack_top_position(self->stack, version).extent.column + 1);
ts_stack_top_position(self->stack, version).extent.row,
ts_stack_top_position(self->stack, version).extent.column);
parser__advance(self, version, &reusable_node);
LOG_STACK();

View file

@ -8,13 +8,9 @@ extern "C" {
#include "runtime/stack.h"
#include "runtime/array.h"
#include "runtime/lexer.h"
#include "runtime/reusable_node.h"
#include "runtime/reduce_action.h"
typedef struct {
Tree *tree;
uint32_t byte_index;
} ReusableNode;
typedef struct {
Lexer lexer;
Stack *stack;
@ -29,11 +25,14 @@ typedef struct {
ReusableNode reusable_node;
TreePath tree_path1;
TreePath tree_path2;
void *external_scanner_payload;
Tree *last_external_token;
} Parser;
bool parser_init(Parser *);
void parser_destroy(Parser *);
Tree *parser_parse(Parser *, TSInput, Tree *);
void parser_set_language(Parser *, const TSLanguage *);
#ifdef __cplusplus
}

View file

@ -0,0 +1,50 @@
#include "runtime/tree.h"
typedef struct {
Tree *tree;
uint32_t byte_index;
bool has_preceding_external_token;
const TSExternalTokenState *preceding_external_token_state;
} ReusableNode;
static inline ReusableNode reusable_node_new(Tree *tree) {
return (ReusableNode){
.tree = tree,
.byte_index = 0,
.has_preceding_external_token = false,
.preceding_external_token_state = NULL,
};
}
static inline void reusable_node_pop(ReusableNode *self) {
self->byte_index += ts_tree_total_bytes(self->tree);
if (self->tree->has_external_tokens) {
self->has_preceding_external_token = true;
self->preceding_external_token_state = ts_tree_last_external_token_state(self->tree);
}
while (self->tree) {
Tree *parent = self->tree->context.parent;
uint32_t next_index = self->tree->context.index + 1;
if (parent && parent->child_count > next_index) {
self->tree = parent->children[next_index];
return;
}
self->tree = parent;
}
}
static inline void reusable_node_pop_leaf(ReusableNode *self) {
while (self->tree->child_count > 0)
self->tree = self->tree->children[0];
reusable_node_pop(self);
}
static inline bool reusable_node_breakdown(ReusableNode *self) {
if (self->tree->child_count == 0) {
return false;
} else {
self->tree = self->tree->children[0];
return true;
}
}

View file

@ -50,6 +50,7 @@ typedef struct {
StackNode *node;
bool is_halted;
unsigned push_count;
const TSExternalTokenState *external_token_state;
} StackHead;
struct Stack {
@ -168,11 +169,13 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
}
static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
unsigned push_count) {
unsigned push_count,
const TSExternalTokenState *external_token_state) {
StackHead head = {
.node = node,
.is_halted = false,
.push_count = push_count,
.external_token_state = external_token_state,
};
array_push(&self->heads, head);
stack_node_retain(node);
@ -180,7 +183,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
}
static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
unsigned push_count) {
unsigned push_count,
const TSExternalTokenState *external_token_state) {
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
StackVersion version = self->slices.contents[i].version;
if (self->heads.contents[version].node == node) {
@ -190,7 +194,7 @@ static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
}
}
StackVersion version = ts_stack__add_version(self, node, push_count);
StackVersion version = ts_stack__add_version(self, node, push_count, external_token_state);
StackSlice slice = { *trees, version };
array_push(&self->slices, slice);
}
@ -202,6 +206,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
StackHead *head = array_get(&self->heads, version);
unsigned push_count = head->push_count;
const TSExternalTokenState *external_token_state = head->external_token_state;
Iterator iterator = {
.node = head->node,
.trees = array_new(),
@ -229,7 +234,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
if (!should_stop)
ts_tree_array_copy(trees, &trees);
array_reverse(&trees);
ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count);
ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count,
external_token_state);
}
if (should_stop) {
@ -288,7 +294,12 @@ Stack *ts_stack_new() {
self->base_node =
stack_node_new(NULL, NULL, false, 1, length_zero(), &self->node_pool);
stack_node_retain(self->base_node);
array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
array_push(&self->heads, ((StackHead){
self->base_node,
false,
0,
NULL
}));
return self;
}
@ -327,11 +338,19 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->push_count;
}
void ts_stack_decrease_push_count(const Stack *self, StackVersion version,
void ts_stack_decrease_push_count(Stack *self, StackVersion version,
unsigned decrement) {
array_get(&self->heads, version)->push_count -= decrement;
}
const TSExternalTokenState *ts_stack_external_token_state(const Stack *self, StackVersion version) {
return array_get(&self->heads, version)->external_token_state;
}
void ts_stack_set_external_token_state(Stack *self, StackVersion version, const TSExternalTokenState *state) {
array_get(&self->heads, version)->external_token_state = state;
}
ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
StackHead *head = array_get(&self->heads, version);
return (ErrorStatus){
@ -480,7 +499,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
if (new_node->state == node->state &&
new_node->position.chars == node->position.chars &&
new_node->error_count == node->error_count &&
new_node->error_cost == node->error_cost) {
new_node->error_cost == node->error_cost &&
new_head->external_token_state == head->external_token_state) {
for (uint32_t j = 0; j < new_node->link_count; j++)
stack_node_add_link(node, new_node->links[j]);
if (new_head->push_count > head->push_count)
@ -505,7 +525,12 @@ void ts_stack_clear(Stack *self) {
for (uint32_t i = 0; i < self->heads.size; i++)
stack_node_release(self->heads.contents[i].node, &self->node_pool);
array_clear(&self->heads);
array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
array_push(&self->heads, ((StackHead){
self->base_node,
false,
0,
NULL
}));
}
bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
@ -528,8 +553,20 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
fprintf(
f,
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
"labeltooltip=\"push_count: %u\"]\n",
"labeltooltip=\"push_count: %u",
i, head->node, i, head->push_count);
if (head->external_token_state) {
const TSExternalTokenState *s = head->external_token_state;
fprintf(f,
"\nexternal_token_state: "
"%2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X",
(*s)[0], (*s)[1], (*s)[2], (*s)[3], (*s)[4], (*s)[5], (*s)[6], (*s)[7],
(*s)[8], (*s)[9], (*s)[10], (*s)[11], (*s)[12], (*s)[13], (*s)[14], (*s)[15]
);
}
fprintf(f, "\"]\n");
array_push(&self->iterators, ((Iterator){.node = head->node }));
}

View file

@ -65,7 +65,11 @@ TSStateId ts_stack_top_state(const Stack *, StackVersion);
unsigned ts_stack_push_count(const Stack *, StackVersion);
void ts_stack_decrease_push_count(const Stack *, StackVersion, unsigned);
void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned);
const TSExternalTokenState *ts_stack_external_token_state(const Stack *, StackVersion);
void ts_stack_set_external_token_state(Stack *, StackVersion, const TSExternalTokenState *);
/*
* Get the position at the top of the given version of the stack. If the stack

View file

@ -25,10 +25,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
.first_leaf = {
.symbol = sym,
.lex_state = 0
}
.first_leaf.symbol = sym,
};
return result;
}
@ -111,6 +108,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->named_child_count = 0;
self->visible_child_count = 0;
self->error_cost = 0;
self->has_external_tokens = false;
self->has_external_token_state = false;
for (uint32_t i = 0; i < child_count; i++) {
Tree *child = children[i];
@ -128,11 +127,14 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->visible_child_count++;
if (child->named)
self->named_child_count++;
} else {
} else if (child->child_count > 0) {
self->visible_child_count += child->visible_child_count;
self->named_child_count += child->named_child_count;
}
if (child->has_external_tokens) self->has_external_tokens = true;
if (child->has_external_token_state) self->has_external_token_state = true;
if (child->symbol == ts_builtin_sym_error) {
self->fragile_left = self->fragile_right = true;
self->parse_state = TS_TREE_STATE_NONE;
@ -377,6 +379,21 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
}
}
const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree) {
while (tree->child_count > 0) {
for (uint32_t i = tree->child_count - 1; i + 1 > 0; i--) {
Tree *child = tree->children[i];
if (child->has_external_token_state) {
tree = child;
break;
} else if (child->has_external_tokens) {
return NULL;
}
}
}
return &tree->external_token_state;
}
static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
if (c == 0)
return snprintf(s, n, "EOF");

View file

@ -22,10 +22,13 @@ typedef struct Tree {
} context;
uint32_t child_count;
uint32_t visible_child_count;
uint32_t named_child_count;
union {
struct Tree **children;
struct {
uint32_t visible_child_count;
uint32_t named_child_count;
struct Tree **children;
};
TSExternalTokenState external_token_state;
int32_t lookahead_char;
};
@ -38,7 +41,7 @@ typedef struct Tree {
struct {
TSSymbol symbol;
TSStateId lex_state;
TSLexMode lex_mode;
} first_leaf;
unsigned short ref_count;
@ -48,6 +51,8 @@ typedef struct Tree {
bool fragile_left : 1;
bool fragile_right : 1;
bool has_changes : 1;
bool has_external_tokens : 1;
bool has_external_token_state : 1;
} Tree;
typedef struct {
@ -81,6 +86,7 @@ void ts_tree_assign_parents(Tree *, TreePath *);
void ts_tree_edit(Tree *, const TSInputEdit *edit);
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);
const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *);
static inline uint32_t ts_tree_total_bytes(const Tree *self) {
return self->padding.bytes + self->size.bytes;

View file

@ -21,20 +21,20 @@ static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) {
}
}
static bool tree_path_descend(TreePath *path, TSPoint position) {
static bool tree_path_descend(TreePath *path, Length position) {
uint32_t original_size = path->size;
bool did_descend;
do {
did_descend = false;
TreePathEntry entry = *array_back(path);
Length child_position = entry.position;
Length child_left = entry.position;
for (uint32_t i = 0; i < entry.tree->child_count; i++) {
Tree *child = entry.tree->children[i];
Length child_right_position =
length_add(child_position, ts_tree_total_size(child));
if (point_lt(position, child_right_position.extent)) {
TreePathEntry child_entry = { child, child_position, i };
if (child->visible) {
Length child_right = length_add(child_left, ts_tree_total_size(child));
if (position.bytes < child_right.bytes) {
TreePathEntry child_entry = { child, child_left, i };
if (child->visible || child->child_count == 0) {
array_push(path, child_entry);
return true;
} else if (child->visible_child_count > 0) {
@ -43,39 +43,44 @@ static bool tree_path_descend(TreePath *path, TSPoint position) {
break;
}
}
child_position = child_right_position;
child_left = child_right;
}
} while (did_descend);
path->size = original_size;
return false;
}
static uint32_t tree_path_advance(TreePath *path) {
uint32_t ascend_count = 0;
while (path->size > 0) {
TreePathEntry entry = array_pop(path);
if (path->size == 0)
break;
if (path->size == 0) break;
TreePathEntry parent_entry = *array_back(path);
if (parent_entry.tree->visible) ascend_count++;
Length position =
length_add(entry.position, ts_tree_total_size(entry.tree));
Length position = length_add(entry.position, ts_tree_total_size(entry.tree));
for (uint32_t i = entry.child_index + 1; i < parent_entry.tree->child_count; i++) {
Tree *next_child = parent_entry.tree->children[i];
if (next_child->visible || next_child->visible_child_count > 0) {
if (next_child->visible ||
next_child->child_count == 0 ||
next_child->visible_child_count > 0) {
if (parent_entry.tree->visible) ascend_count--;
array_push(path, ((TreePathEntry){
.tree = next_child,
.child_index = i,
.position = position,
}));
if (!next_child->visible)
tree_path_descend(path, (TSPoint){ 0, 0 });
if (!next_child->visible) {
tree_path_descend(path, length_zero());
}
return ascend_count;
}
position = length_add(position, ts_tree_total_size(next_child));
}
}
return ascend_count;
}
@ -94,8 +99,27 @@ static void tree_path_init(TreePath *path, Tree *tree) {
.position = { 0, 0, { 0, 0 } },
.child_index = 0,
}));
if (!tree->visible)
tree_path_descend(path, (TSPoint){ 0, 0 });
if (!tree->visible) {
tree_path_descend(path, length_zero());
}
}
Tree *tree_path_visible_tree(TreePath *self) {
for (uint32_t i = self->size - 1; i + 1 > 0; i--) {
Tree *tree = self->contents[i].tree;
if (tree->visible) return tree;
}
return NULL;
}
Length tree_path_start_position(TreePath *self) {
TreePathEntry entry = *array_back(self);
return length_add(entry.position, entry.tree->padding);
}
Length tree_path_end_position(TreePath *self) {
TreePathEntry entry = *array_back(self);
return length_add(length_add(entry.position, entry.tree->padding), entry.tree->size);
}
static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
@ -112,67 +136,59 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
TSRange **ranges, uint32_t *range_count) {
TSPoint position = { 0, 0 };
Length position = length_zero();
RangeArray results = array_new();
while (old_path->size && new_path->size) {
bool is_changed = false;
TSPoint next_position = position;
Length next_position = position;
TreePathEntry old_entry = *array_back(old_path);
TreePathEntry new_entry = *array_back(new_path);
Tree *old_tree = old_entry.tree;
Tree *new_tree = new_entry.tree;
uint32_t old_start_byte = old_entry.position.bytes + old_tree->padding.bytes;
uint32_t new_start_byte = new_entry.position.bytes + new_tree->padding.bytes;
TSPoint old_start_point =
point_add(old_entry.position.extent, old_tree->padding.extent);
TSPoint new_start_point =
point_add(new_entry.position.extent, new_tree->padding.extent);
TSPoint old_end_point = point_add(old_start_point, old_tree->size.extent);
TSPoint new_end_point = point_add(new_start_point, new_tree->size.extent);
Tree *old_tree = tree_path_visible_tree(old_path);
Tree *new_tree = tree_path_visible_tree(new_path);
Length old_start = tree_path_start_position(old_path);
Length new_start = tree_path_start_position(new_path);
Length old_end = tree_path_end_position(old_path);
Length new_end = tree_path_end_position(new_path);
// #define NAME(t) (ts_language_symbol_name(language, ((Tree *)(t))->symbol))
// printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\n",
// position.row, position.column, NAME(old_tree), old_start_point.row,
// old_start_point.column, old_end_point.row, old_end_point.column,
// NAME(new_tree), new_start_point.row, new_start_point.column,
// new_end_point.row, new_end_point.column);
// printf("At [%-2u, %-2u] Compare (%-20s\t [%-2u, %-2u] - [%u, %u])\tvs\t(%-20s\t [%u, %u] - [%u, %u])\n",
// position.extent.row, position.extent.column,
// NAME(old_tree), old_start.extent.row, old_start.extent.column, old_end.extent.row, old_end.extent.column,
// NAME(new_tree), new_start.extent.row, new_start.extent.column, new_end.extent.row, new_end.extent.column);
if (point_lt(position, old_start_point)) {
if (point_lt(position, new_start_point)) {
next_position = point_min(old_start_point, new_start_point);
if (position.bytes < old_start.bytes) {
if (position.bytes < new_start.bytes) {
next_position = length_min(old_start, new_start);
} else {
is_changed = true;
next_position = old_start_point;
next_position = old_start;
}
} else if (point_lt(position, new_start_point)) {
} else if (position.bytes < new_start.bytes) {
is_changed = true;
next_position = new_start_point;
} else if (old_start_byte == new_start_byte &&
tree_must_eq(old_tree, new_tree)) {
next_position = old_end_point;
next_position = new_start;
} else if (old_start.bytes == new_start.bytes && tree_must_eq(old_tree, new_tree)) {
next_position = old_end;
} else if (old_tree->symbol == new_tree->symbol) {
if (tree_path_descend(old_path, position)) {
if (!tree_path_descend(new_path, position)) {
tree_path_ascend(old_path, 1);
is_changed = true;
next_position = new_end_point;
next_position = new_end;
}
} else if (tree_path_descend(new_path, position)) {
tree_path_ascend(new_path, 1);
is_changed = true;
next_position = old_end_point;
next_position = old_end;
} else {
next_position = point_min(old_end_point, new_end_point);
next_position = length_min(old_end, new_end);
}
} else {
is_changed = true;
next_position = point_min(old_end_point, new_end_point);
next_position = length_min(old_end, new_end);
}
bool at_old_end = point_lte(old_end_point, next_position);
bool at_new_end = point_lte(new_end_point, next_position);
bool at_old_end = old_end.bytes <= next_position.bytes;
bool at_new_end = new_end.bytes <= next_position.bytes;
if (at_new_end && at_old_end) {
uint32_t old_ascend_count = tree_path_advance(old_path);
@ -190,7 +206,7 @@ static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
tree_path_ascend(new_path, ascend_count);
}
if (is_changed) range_array_add(&results, position, next_position);
if (is_changed) range_array_add(&results, position.extent, next_position.extent);
position = next_position;
}

32
todo.md
View file

@ -1,32 +0,0 @@
TODO
====
### Handling ambiguity (GLR)
* Add a simple way to specify syntactic ambiguity resolutions in the Grammar (e.g. 'prefer declarations to statements' in C), similar to bison's `dprec`
construct.
### Runtime System
* Refactoring: make separate symbol for unexpected characters than for interior error nodes.
### Testing / Quality
* Start running the clang-analyzer on the codebase on Travis-CI.
* Use the Valgrind leak checker to fix the memory leaks in the runtime library.
* Randomize the editing in the language tests, using a seed that can be specified in order to reproduce failures.
### Ubiquitous token handling
* Fix the unintuitive tree that results when ubiquitous tokens are last child of their parent node.
### Error handling
* Use information about nesting depth of tokens like '(' and ')' to make error recovery more accurate.
### Grammar Features
* Regexp assertions
- [ ] '^'
- [ ] '$'
- [ ] '\b'
* Composing languages
- [ ] Rule for referencing named grammar
- [ ] Grammar registry object in runtime
- [ ] Parsing returns control to parent language
* Indentation tokens