Merge pull request #54 from tree-sitter/external-scanners
External scanners
This commit is contained in:
commit
3edb5dbdd9
75 changed files with 2162 additions and 1123 deletions
|
|
@ -176,11 +176,11 @@ tokens, like `(` and `+`. This is useful when analyzing the meaning of a documen
|
|||
#include "tree_sitter/runtime.h"
|
||||
|
||||
// Declare the language function that was generated from your grammar.
|
||||
TSLanguage *ts_language_arithmetic();
|
||||
TSLanguage *tree_sitter_arithmetic();
|
||||
|
||||
int main() {
|
||||
TSDocument *document = ts_document_new();
|
||||
ts_document_set_language(document, ts_language_arithmetic());
|
||||
ts_document_set_language(document, tree_sitter_arithmetic());
|
||||
ts_document_set_input_string(document, "a + b * 5");
|
||||
ts_document_parse(document);
|
||||
|
||||
|
|
|
|||
|
|
@ -40,6 +40,14 @@
|
|||
"pattern": "^[a-zA-Z_]\\w*$"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
"externals": {
|
||||
"type": "array",
|
||||
"items": {
|
||||
"type": "string",
|
||||
"pattern": "^[a-zA-Z_]\\w*$"
|
||||
}
|
||||
}
|
||||
},
|
||||
|
||||
|
|
|
|||
|
|
@ -10,7 +10,8 @@ typedef enum {
|
|||
TSCompileErrorTypeInvalidGrammar,
|
||||
TSCompileErrorTypeInvalidRegex,
|
||||
TSCompileErrorTypeUndefinedSymbol,
|
||||
TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
TSCompileErrorTypeLexConflict,
|
||||
TSCompileErrorTypeParseConflict,
|
||||
TSCompileErrorTypeEpsilonRule,
|
||||
|
|
|
|||
|
|
@ -12,6 +12,8 @@ extern "C" {
|
|||
typedef unsigned short TSSymbol;
|
||||
typedef unsigned short TSStateId;
|
||||
|
||||
typedef uint8_t TSExternalTokenState[16];
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
#define ts_builtin_sym_end 0
|
||||
|
||||
|
|
@ -23,7 +25,7 @@ typedef struct {
|
|||
} TSSymbolMetadata;
|
||||
|
||||
typedef struct {
|
||||
void (*advance)(void *, TSStateId, bool);
|
||||
void (*advance)(void *, bool);
|
||||
int32_t lookahead;
|
||||
TSSymbol result_symbol;
|
||||
} TSLexer;
|
||||
|
|
@ -48,6 +50,11 @@ typedef struct {
|
|||
bool fragile : 1;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
uint16_t lex_state;
|
||||
uint16_t external_lex_state;
|
||||
} TSLexMode;
|
||||
|
||||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
|
|
@ -58,14 +65,26 @@ typedef union {
|
|||
} TSParseActionEntry;
|
||||
|
||||
typedef struct TSLanguage {
|
||||
uint32_t version;
|
||||
uint32_t symbol_count;
|
||||
uint32_t token_count;
|
||||
uint32_t external_token_count;
|
||||
const char **symbol_names;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const unsigned short *parse_table;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const TSStateId *lex_states;
|
||||
const TSLexMode *lex_modes;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
struct {
|
||||
const bool *states;
|
||||
const TSSymbol *symbol_map;
|
||||
void *(*create)();
|
||||
void (*destroy)(void *);
|
||||
void (*reset)(void *);
|
||||
bool (*scan)(void *, TSLexer *, const bool *symbol_whitelist);
|
||||
bool (*serialize)(void *, TSExternalTokenState);
|
||||
void (*deserialize)(void *, const TSExternalTokenState);
|
||||
} external_scanner;
|
||||
} TSLanguage;
|
||||
|
||||
/*
|
||||
|
|
@ -79,14 +98,14 @@ typedef struct TSLanguage {
|
|||
|
||||
#define ADVANCE(state_value) \
|
||||
{ \
|
||||
lexer->advance(lexer, state_value, false); \
|
||||
lexer->advance(lexer, false); \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
||||
#define SKIP(state_value) \
|
||||
{ \
|
||||
lexer->advance(lexer, state_value, true); \
|
||||
lexer->advance(lexer, true); \
|
||||
state = state_value; \
|
||||
goto next_state; \
|
||||
}
|
||||
|
|
@ -146,21 +165,21 @@ typedef struct TSLanguage {
|
|||
{ .type = TSParseActionTypeAccept } \
|
||||
}
|
||||
|
||||
#define EXPORT_LANGUAGE(language_name) \
|
||||
static TSLanguage language = { \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_states = ts_lex_states, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
}; \
|
||||
\
|
||||
const TSLanguage *language_name() { \
|
||||
return &language; \
|
||||
}
|
||||
#define GET_LANGUAGE(...) \
|
||||
static TSLanguage language = { \
|
||||
.version = LANGUAGE_VERSION, \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_modes = ts_lex_modes, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
.external_token_count = EXTERNAL_TOKEN_COUNT, \
|
||||
.external_scanner = {__VA_ARGS__} \
|
||||
}; \
|
||||
return &language \
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,8 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 1
|
||||
|
||||
typedef unsigned short TSSymbol;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
typedef struct TSDocument TSDocument;
|
||||
|
|
@ -114,6 +116,7 @@ uint32_t ts_document_parse_count(const TSDocument *);
|
|||
|
||||
uint32_t ts_language_symbol_count(const TSLanguage *);
|
||||
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
|
||||
uint32_t ts_language_version(const TSLanguage *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ GRAMMARS=(
|
|||
json
|
||||
c
|
||||
cpp
|
||||
python
|
||||
)
|
||||
|
||||
for grammar in ${GRAMMARS[@]}; do
|
||||
|
|
@ -21,7 +22,7 @@ for grammar in ${GRAMMARS[@]}; do
|
|||
|
||||
(
|
||||
cd $grammar_dir;
|
||||
git reset --hard;
|
||||
git pull origin master;
|
||||
git fetch origin
|
||||
git reset --hard origin/master;
|
||||
)
|
||||
done
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ describe("recovery_tokens(rule)", []() {
|
|||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol::Index>>({ 1 }));
|
||||
AssertThat(recovery_tokens(grammar), Equals<set<Symbol>>({ Symbol(1, Symbol::Terminal) }));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -14,10 +14,10 @@ START_TEST
|
|||
describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
||||
LexConflictManager conflict_manager;
|
||||
bool update;
|
||||
Symbol sym1(0, true);
|
||||
Symbol sym2(1, true);
|
||||
Symbol sym3(2, true);
|
||||
Symbol sym4(3, true);
|
||||
Symbol sym1(0, Symbol::Terminal);
|
||||
Symbol sym2(1, Symbol::Terminal);
|
||||
Symbol sym3(2, Symbol::Terminal);
|
||||
Symbol sym4(3, Symbol::Terminal);
|
||||
LexItemSet item_set({ LexItem(sym4, blank() )});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ START_TEST
|
|||
describe("LexItem", []() {
|
||||
describe("completion_status()", [&]() {
|
||||
it("indicates whether the item is done, its precedence, and whether it is a string", [&]() {
|
||||
LexItem item1(Symbol(0, true), character({ 'a', 'b', 'c' }));
|
||||
LexItem item1(Symbol(0, Symbol::Terminal), character({ 'a', 'b', 'c' }));
|
||||
AssertThat(item1.completion_status().is_done, IsFalse());
|
||||
AssertThat(item1.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item1.completion_status().is_string, IsFalse());
|
||||
|
|
@ -23,7 +23,7 @@ describe("LexItem", []() {
|
|||
params.precedence = 3;
|
||||
params.has_precedence = true;
|
||||
params.is_string = 1;
|
||||
LexItem item2(Symbol(0, true), choice({
|
||||
LexItem item2(Symbol(0, Symbol::Terminal), choice({
|
||||
metadata(blank(), params),
|
||||
character({ 'a', 'b', 'c' })
|
||||
}));
|
||||
|
|
@ -32,7 +32,7 @@ describe("LexItem", []() {
|
|||
AssertThat(item2.completion_status().precedence, Equals(PrecedenceRange(3)));
|
||||
AssertThat(item2.completion_status().is_string, IsTrue());
|
||||
|
||||
LexItem item3(Symbol(0, true), repeat(character({ ' ', '\t' })));
|
||||
LexItem item3(Symbol(0, Symbol::Terminal), repeat(character({ ' ', '\t' })));
|
||||
AssertThat(item3.completion_status().is_done, IsTrue());
|
||||
AssertThat(item3.completion_status().precedence, Equals(PrecedenceRange()));
|
||||
AssertThat(item3.completion_status().is_string, IsFalse());
|
||||
|
|
@ -43,7 +43,7 @@ describe("LexItem", []() {
|
|||
describe("LexItemSet::transitions()", [&]() {
|
||||
it("handles single characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -53,7 +53,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -67,7 +67,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
params.is_main_token = true;
|
||||
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), metadata(character({ 'x' }), params)),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(character({ 'x' }), params)),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -77,7 +77,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), metadata(blank(), params)),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), metadata(blank(), params)),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
true
|
||||
|
|
@ -88,7 +88,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'w' }),
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
|
|
@ -103,7 +103,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('w'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'x' }),
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
|
|
@ -118,7 +118,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences with nested precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
prec(3, seq({
|
||||
character({ 'v' }),
|
||||
prec(4, seq({
|
||||
|
|
@ -140,7 +140,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
// The outer precedence is now 'active', because we are within its
|
||||
// contained rule.
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
prec(4, seq({
|
||||
character({ 'w' }),
|
||||
|
|
@ -168,7 +168,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
Transition{
|
||||
// The inner precedence is now 'active'
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, seq({
|
||||
active_prec(4, character({ 'x' })),
|
||||
character({ 'y' }) })),
|
||||
|
|
@ -193,7 +193,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
active_prec(3, character({ 'y' })),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
|
|
@ -216,7 +216,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'z' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -227,7 +227,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles sequences where the left hand side can be blank", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ 'x' }),
|
||||
blank(),
|
||||
|
|
@ -244,7 +244,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('x'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'y' }),
|
||||
character({ 'z' }),
|
||||
})),
|
||||
|
|
@ -257,7 +257,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('y'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'z' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'z' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -268,7 +268,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles blanks", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
});
|
||||
|
||||
AssertThat(item_set.transitions(), IsEmpty());
|
||||
|
|
@ -276,11 +276,11 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), repeat1(seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))),
|
||||
LexItem(Symbol(2), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -290,14 +290,14 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
character({ 'b' }),
|
||||
repeat1(seq({
|
||||
character({ 'a' }),
|
||||
character({ 'b' }),
|
||||
}))
|
||||
})),
|
||||
LexItem(Symbol(1), character({ 'b' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'b' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -307,8 +307,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), repeat1(character({ 'c' }))),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -319,7 +319,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles repeats with precedence", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' }))))
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -329,8 +329,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1), active_prec(-1, blank())),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, repeat1(character({ 'a' })))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(-1, blank())),
|
||||
}),
|
||||
PrecedenceRange(-1),
|
||||
false
|
||||
|
|
@ -341,7 +341,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between overlapping character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
active_prec(2, seq({
|
||||
character({ 'a', 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
|
|
@ -360,7 +360,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'b'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
}),
|
||||
PrecedenceRange(2),
|
||||
false
|
||||
|
|
@ -370,8 +370,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('c', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(2, character({ 'x' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(2, 3),
|
||||
false
|
||||
|
|
@ -381,7 +381,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), active_prec(3, character({ 'y' }))),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), active_prec(3, character({ 'y' }))),
|
||||
}),
|
||||
PrecedenceRange(3),
|
||||
false
|
||||
|
|
@ -392,7 +392,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between a subset and a superset of characters", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), choice({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), choice({
|
||||
seq({
|
||||
character({ 'b', 'c', 'd' }),
|
||||
character({ 'x' }),
|
||||
|
|
@ -411,7 +411,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a').include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -421,8 +421,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('b', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ 'x' })),
|
||||
LexItem(Symbol(1), character({ 'y' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'x' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'y' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -433,7 +433,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles choices between whitelisted and blacklisted character sets", [&]() {
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol(1), seq({
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({
|
||||
choice({
|
||||
character({ '/' }, false),
|
||||
seq({
|
||||
|
|
@ -452,7 +452,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include_all().exclude('/').exclude('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -462,8 +462,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('\\'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), character({ '/' })),
|
||||
LexItem(Symbol(1), seq({ character({ '/' }), character({ '/' }) })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ '/' })),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), seq({ character({ '/' }), character({ '/' }) })),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -474,8 +474,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
|
||||
it("handles different items with overlapping character sets", [&]() {
|
||||
LexItemSet set1({
|
||||
LexItem(Symbol(1), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), character({ 'a', 'b', 'c', 'd', 'e', 'f' })),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), character({ 'e', 'f', 'g', 'h', 'i' }))
|
||||
});
|
||||
|
||||
AssertThat(set1.transitions(), Equals(LexItemSet::TransitionMap({
|
||||
|
|
@ -483,7 +483,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('a', 'd'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -493,8 +493,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('e', 'f'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(1), blank()),
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(1, Symbol::NonTerminal), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
@ -504,7 +504,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet().include('g', 'i'),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol(2), blank()),
|
||||
LexItem(Symbol(2, Symbol::NonTerminal), blank()),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
false
|
||||
|
|
|
|||
|
|
@ -27,26 +27,26 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(11, true), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, true), 0, AssociativityNone},
|
||||
{Symbol(13, true), 0, AssociativityNone},
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(2), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
SyntaxVariable("rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(14, true), 0, AssociativityNone},
|
||||
{Symbol(15, true), 0, AssociativityNone},
|
||||
{Symbol(14, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(15, Symbol::Terminal), 0, AssociativityNone},
|
||||
})
|
||||
}),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
return grammar.variables[variable_index].productions[production_index];
|
||||
|
|
@ -54,8 +54,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 }),
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -64,20 +64,20 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 })
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ 11 })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(2), production(2, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(2, Symbol::NonTerminal), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
@ -86,18 +86,18 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable("rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(11, true), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(11, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
}),
|
||||
SyntaxVariable("rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
{Symbol(12, true), 0, AssociativityNone},
|
||||
{Symbol(13, true), 0, AssociativityNone},
|
||||
{Symbol(12, Symbol::Terminal), 0, AssociativityNone},
|
||||
{Symbol(13, Symbol::Terminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto production = [&](int variable_index, int production_index) -> const Production & {
|
||||
return grammar.variables[variable_index].productions[production_index];
|
||||
|
|
@ -105,8 +105,8 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
ParseItemSet item_set({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 }),
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) }),
|
||||
}
|
||||
});
|
||||
|
||||
|
|
@ -115,16 +115,16 @@ describe("ParseItemSetBuilder", []() {
|
|||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
{
|
||||
ParseItem(Symbol(0), production(0, 0), 0),
|
||||
LookaheadSet({ 10 })
|
||||
ParseItem(Symbol(0, Symbol::NonTerminal), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol(10, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 0), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol(1), production(1, 1), 0),
|
||||
LookaheadSet({ 11 })
|
||||
ParseItem(Symbol(1, Symbol::NonTerminal), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol(11, Symbol::Terminal) })
|
||||
},
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ describe("expand_repeats", []() {
|
|||
it("replaces repeat rules with pairs of recursive rules", [&]() {
|
||||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(0))),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -32,7 +32,7 @@ describe("expand_repeats", []() {
|
|||
i_token(10),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -54,7 +54,7 @@ describe("expand_repeats", []() {
|
|||
i_token(10),
|
||||
repeat1(i_token(11))
|
||||
})),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ describe("expand_repeats", []() {
|
|||
i_token(3),
|
||||
repeat1(i_token(4))
|
||||
})),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -106,7 +106,7 @@ describe("expand_repeats", []() {
|
|||
repeat1(i_token(10)),
|
||||
repeat1(i_token(11)),
|
||||
})),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
@ -130,7 +130,7 @@ describe("expand_repeats", []() {
|
|||
InitialSyntaxGrammar grammar{{
|
||||
Variable("rule0", VariableTypeNamed, repeat1(i_token(10))),
|
||||
Variable("rule1", VariableTypeNamed, repeat1(i_token(11))),
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = expand_repeats(grammar);
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "compiler/prepare_grammar/extract_tokens.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, pattern("ij+")),
|
||||
Variable("rule_C", VariableTypeNamed, choice({ str("kl"), blank() })),
|
||||
Variable("rule_D", VariableTypeNamed, repeat1(i_sym(3)))
|
||||
}, {}, {}});
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
|
@ -91,7 +92,7 @@ describe("extract_tokens", []() {
|
|||
i_sym(0),
|
||||
str("ab"),
|
||||
})),
|
||||
}, {}, {}});
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
|
@ -110,7 +111,7 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_A", VariableTypeNamed, seq({ i_sym(1), str("ab") })),
|
||||
Variable("rule_B", VariableTypeNamed, str("cd")),
|
||||
Variable("rule_C", VariableTypeNamed, seq({ str("ef"), str("cd") })),
|
||||
}, {}, {}});
|
||||
}, {}, {}, {}});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
LexicalGrammar &lexical_grammar = get<1>(result);
|
||||
|
|
@ -129,17 +130,26 @@ describe("extract_tokens", []() {
|
|||
});
|
||||
|
||||
it("renumbers the grammar's expected conflict symbols based on any moved rules", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
}, { str(" ") }, { { Symbol(1), Symbol(2) } }});
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, str("ok")),
|
||||
Variable("rule_B", VariableTypeNamed, repeat(i_sym(0))),
|
||||
Variable("rule_C", VariableTypeNamed, repeat(seq({ i_sym(0), i_sym(0) }))),
|
||||
},
|
||||
{
|
||||
str(" ")
|
||||
},
|
||||
{
|
||||
{ Symbol(1, Symbol::NonTerminal), Symbol(2, Symbol::NonTerminal) }
|
||||
},
|
||||
{}
|
||||
});
|
||||
|
||||
InitialSyntaxGrammar &syntax_grammar = get<0>(result);
|
||||
|
||||
AssertThat(syntax_grammar.variables.size(), Equals<size_t>(2));
|
||||
AssertThat(syntax_grammar.expected_conflicts, Equals(set<set<Symbol>>({
|
||||
{ Symbol(0), Symbol(1) },
|
||||
{ Symbol(0, Symbol::NonTerminal), Symbol(1, Symbol::NonTerminal) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
|
@ -150,7 +160,7 @@ describe("extract_tokens", []() {
|
|||
}, {
|
||||
str("y"),
|
||||
pattern("\\s+"),
|
||||
}, {}});
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
|
|
@ -167,11 +177,11 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, {
|
||||
str("y"),
|
||||
}, {}});
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
AssertThat(get<1>(result).separators.size(), Equals<size_t>(0));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, true) })));
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({ Symbol(1, Symbol::Terminal) })));
|
||||
});
|
||||
|
||||
it("updates extra symbols according to the new symbol numbers", [&]() {
|
||||
|
|
@ -181,12 +191,12 @@ describe("extract_tokens", []() {
|
|||
Variable("rule_C", VariableTypeNamed, str("z")),
|
||||
}, {
|
||||
i_sym(2),
|
||||
}, {}});
|
||||
}, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError::none()));
|
||||
|
||||
AssertThat(get<0>(result).extra_tokens, Equals(set<Symbol>({
|
||||
{ Symbol(3, true) },
|
||||
{ Symbol(3, Symbol::Terminal) },
|
||||
})));
|
||||
|
||||
AssertThat(get<1>(result).separators, IsEmpty());
|
||||
|
|
@ -196,11 +206,11 @@ describe("extract_tokens", []() {
|
|||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
}, { i_sym(1) }, {}});
|
||||
}, { i_sym(1) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
CompileError(TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: rule_B")));
|
||||
});
|
||||
|
||||
|
|
@ -208,14 +218,34 @@ describe("extract_tokens", []() {
|
|||
auto result = extract_tokens(InternedGrammar{{
|
||||
Variable("rule_A", VariableTypeNamed, str("x")),
|
||||
Variable("rule_B", VariableTypeNamed, str("y")),
|
||||
}, { choice({ i_sym(1), blank() }) }, {}});
|
||||
}, { choice({ i_sym(1), blank() }) }, {}, {}});
|
||||
|
||||
AssertThat(get<2>(result), !Equals(CompileError::none()));
|
||||
AssertThat(get<2>(result), Equals(
|
||||
CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: (choice (sym 1) (blank))")));
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExtraToken,
|
||||
"Not a token: (choice (non-terminal 1) (blank))"
|
||||
)));
|
||||
});
|
||||
});
|
||||
|
||||
it("returns an error if an external token has the same name as a non-terminal rule", [&]() {
|
||||
auto result = extract_tokens(InternedGrammar{
|
||||
{
|
||||
Variable("rule_A", VariableTypeNamed, seq({ str("x"), i_sym(1) })),
|
||||
Variable("rule_B", VariableTypeNamed, seq({ str("y"), str("z") })),
|
||||
},
|
||||
{},
|
||||
{},
|
||||
{
|
||||
ExternalToken {"rule_A", VariableTypeNamed, Symbol(0, Symbol::NonTerminal)}
|
||||
}
|
||||
});
|
||||
|
||||
AssertThat(get<2>(result), Equals(CompileError(
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
"Name 'rule_A' cannot be used for both an external token and a non-terminal rule"
|
||||
)));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -36,19 +36,19 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(3), 102, AssociativityRight},
|
||||
{Symbol(4), 101, AssociativityLeft},
|
||||
{Symbol(6), 0, AssociativityNone},
|
||||
{Symbol(7), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(3, Symbol::NonTerminal), 102, AssociativityRight},
|
||||
{Symbol(4, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
{Symbol(1), 0, AssociativityNone},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(5), 101, AssociativityLeft},
|
||||
{Symbol(6), 0, AssociativityNone},
|
||||
{Symbol(7), 0, AssociativityNone},
|
||||
{Symbol(1, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(5, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(6, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
{Symbol(7, Symbol::NonTerminal), 0, AssociativityNone},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
|
@ -65,8 +65,8 @@ describe("flatten_grammar", []() {
|
|||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 101, AssociativityLeft},
|
||||
{Symbol(2), 101, AssociativityLeft},
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
{Symbol(2, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
|
||||
|
|
@ -80,7 +80,7 @@ describe("flatten_grammar", []() {
|
|||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
{Symbol(1), 101, AssociativityLeft},
|
||||
{Symbol(1, Symbol::NonTerminal), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
|
|
|||
|
|
@ -3,8 +3,10 @@
|
|||
#include "compiler/grammar.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "helpers/equals_pointer.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
|
|
@ -17,7 +19,7 @@ describe("intern_symbols", []() {
|
|||
{ "x", choice({ sym("y"), sym("_z") }) },
|
||||
{ "y", sym("_z") },
|
||||
{ "_z", str("stuff") }
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -33,7 +35,7 @@ describe("intern_symbols", []() {
|
|||
it("returns an error", []() {
|
||||
Grammar grammar{{
|
||||
{ "x", sym("y") },
|
||||
}, {}, {}};
|
||||
}, {}, {}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -48,7 +50,7 @@ describe("intern_symbols", []() {
|
|||
{ "z", str("stuff") }
|
||||
}, {
|
||||
sym("z")
|
||||
}, {}};
|
||||
}, {}, {}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
|
|
@ -56,6 +58,32 @@ describe("intern_symbols", []() {
|
|||
AssertThat(result.first.extra_tokens.size(), Equals<size_t>(1));
|
||||
AssertThat(*result.first.extra_tokens.begin(), EqualsPointer(i_sym(2)));
|
||||
});
|
||||
|
||||
it("records any rule names that match external token names", [&]() {
|
||||
Grammar grammar{{
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {}, {}, {
|
||||
"w",
|
||||
"z"
|
||||
}};
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.first.external_tokens, Equals(vector<ExternalToken>({
|
||||
{
|
||||
"w",
|
||||
VariableTypeNamed,
|
||||
rules::NONE()
|
||||
},
|
||||
{
|
||||
"z",
|
||||
VariableTypeNamed,
|
||||
Symbol(2, Symbol::NonTerminal)
|
||||
}
|
||||
})))
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ START_TEST
|
|||
describe("Repeat", []() {
|
||||
describe("constructing repeats", [&]() {
|
||||
it("doesn't create redundant repeats", [&]() {
|
||||
auto sym = make_shared<Symbol>(1);
|
||||
auto sym = make_shared<Symbol>(1, Symbol::NonTerminal);
|
||||
auto repeat = Repeat::build(sym);
|
||||
auto outer_repeat = Repeat::build(repeat);
|
||||
|
||||
|
|
|
|||
29
spec/fixtures/error_corpus/python_errors.txt
vendored
Normal file
29
spec/fixtures/error_corpus/python_errors.txt
vendored
Normal file
|
|
@ -0,0 +1,29 @@
|
|||
==========================================
|
||||
errors in if statements
|
||||
==========================================
|
||||
|
||||
if a is:
|
||||
print b
|
||||
print c
|
||||
|
||||
---
|
||||
|
||||
(module
|
||||
(if_statement (identifier) (ERROR)
|
||||
(print_statement (identifier))
|
||||
(print_statement (identifier))))
|
||||
|
||||
==========================================
|
||||
errors in function definitions
|
||||
==========================================
|
||||
|
||||
def a()::
|
||||
b
|
||||
c
|
||||
|
||||
---
|
||||
|
||||
(module
|
||||
(function_definition (identifier) (parameters) (ERROR)
|
||||
(expression_statement (identifier))
|
||||
(expression_statement (identifier))))
|
||||
42
spec/fixtures/external_scanners/extra_external_tokens.c
vendored
Normal file
42
spec/fixtures/external_scanners/extra_external_tokens.c
vendored
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
COMMENT,
|
||||
};
|
||||
|
||||
void *tree_sitter_extra_external_tokens_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_reset(void *payload) {
|
||||
}
|
||||
|
||||
bool tree_sitter_extra_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
|
||||
bool tree_sitter_extra_external_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
|
||||
while (lexer->lookahead == ' ') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '#') {
|
||||
lexer->advance(lexer, false);
|
||||
while (lexer->lookahead != '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
lexer->result_symbol = COMMENT;
|
||||
return true;
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_extra_external_tokens_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
118
spec/fixtures/external_scanners/percent_strings.c
vendored
Normal file
118
spec/fixtures/external_scanners/percent_strings.c
vendored
Normal file
|
|
@ -0,0 +1,118 @@
|
|||
#include <stdbool.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
percent_string,
|
||||
percent_string_start,
|
||||
percent_string_end
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
int32_t open_delimiter;
|
||||
int32_t close_delimiter;
|
||||
uint32_t depth;
|
||||
} Scanner;
|
||||
|
||||
void *tree_sitter_external_scanner_example_external_scanner_create() {
|
||||
Scanner *scanner = malloc(sizeof(Scanner));
|
||||
*scanner = (Scanner){
|
||||
.open_delimiter = 0,
|
||||
.close_delimiter = 0,
|
||||
.depth = 0
|
||||
};
|
||||
return scanner;
|
||||
}
|
||||
|
||||
bool tree_sitter_external_scanner_example_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
Scanner *scanner = payload;
|
||||
|
||||
if (whitelist[percent_string]) {
|
||||
while (lexer->lookahead == ' ' ||
|
||||
lexer->lookahead == '\t' ||
|
||||
lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead != '%') return false;
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
switch (lexer->lookahead) {
|
||||
case '(':
|
||||
scanner->open_delimiter = '(';
|
||||
scanner->close_delimiter = ')';
|
||||
scanner->depth = 1;
|
||||
break;
|
||||
case '[':
|
||||
scanner->open_delimiter = '[';
|
||||
scanner->close_delimiter = ']';
|
||||
scanner->depth = 1;
|
||||
break;
|
||||
case '{':
|
||||
scanner->open_delimiter = '{';
|
||||
scanner->close_delimiter = '}';
|
||||
scanner->depth = 1;
|
||||
break;
|
||||
default:
|
||||
return false;
|
||||
}
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
for (;;) {
|
||||
if (scanner->depth == 0) {
|
||||
lexer->result_symbol = percent_string;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == scanner->open_delimiter) {
|
||||
scanner->depth++;
|
||||
} else if (lexer->lookahead == scanner->close_delimiter) {
|
||||
scanner->depth--;
|
||||
} else if (lexer->lookahead == '#') {
|
||||
lexer->advance(lexer, false);
|
||||
if (lexer->lookahead == '{') {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->result_symbol = percent_string_start;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
} else if (whitelist[percent_string_end]) {
|
||||
if (lexer->lookahead != '}') return false;
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
for (;;) {
|
||||
if (scanner->depth == 0) {
|
||||
lexer->result_symbol = percent_string_end;
|
||||
return true;
|
||||
}
|
||||
|
||||
if (lexer->lookahead == scanner->open_delimiter) {
|
||||
scanner->depth++;
|
||||
} else if (lexer->lookahead == scanner->close_delimiter) {
|
||||
scanner->depth--;
|
||||
}
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_reset(void *payload) {
|
||||
}
|
||||
|
||||
bool tree_sitter_external_scanner_example_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
|
||||
void tree_sitter_external_scanner_example_external_scanner_destroy(void *payload) {
|
||||
free(payload);
|
||||
}
|
||||
63
spec/fixtures/external_scanners/shared_external_tokens.c
vendored
Normal file
63
spec/fixtures/external_scanners/shared_external_tokens.c
vendored
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
#include <stdbool.h>
|
||||
#include <tree_sitter/parser.h>
|
||||
|
||||
enum {
|
||||
STRING,
|
||||
LINE_BREAK
|
||||
};
|
||||
|
||||
void *tree_sitter_shared_external_tokens_external_scanner_create() {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_reset(void *payload) {
|
||||
}
|
||||
|
||||
bool tree_sitter_shared_external_tokens_external_scanner_serialize(void *payload, TSExternalTokenState state) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_deserialize(void *payload, TSExternalTokenState state) {
|
||||
}
|
||||
|
||||
bool tree_sitter_shared_external_tokens_external_scanner_scan(
|
||||
void *payload, TSLexer *lexer, const bool *whitelist) {
|
||||
|
||||
// If a line-break is a valid lookahead token, only skip spaces.
|
||||
if (whitelist[LINE_BREAK]) {
|
||||
while (lexer->lookahead == ' ') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, false);
|
||||
lexer->result_symbol = LINE_BREAK;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
// If a line-break is not a valid lookahead token, skip line breaks as well
|
||||
// as spaces.
|
||||
if (whitelist[STRING]) {
|
||||
while (lexer->lookahead == ' ' || lexer->lookahead == '\n') {
|
||||
lexer->advance(lexer, true);
|
||||
}
|
||||
|
||||
if (lexer->lookahead == '\'') {
|
||||
lexer->advance(lexer, false);
|
||||
|
||||
while (lexer->lookahead != '\'') {
|
||||
lexer->advance(lexer, false);
|
||||
}
|
||||
|
||||
lexer->advance(lexer, false);
|
||||
lexer->result_symbol = STRING;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
void tree_sitter_shared_external_tokens_external_scanner_destroy(void *payload) {
|
||||
}
|
||||
12
spec/helpers/dedent.h
Normal file
12
spec/helpers/dedent.h
Normal file
|
|
@ -0,0 +1,12 @@
|
|||
#include "compiler/util/string_helpers.h"
|
||||
#include <string>
|
||||
|
||||
static std::string dedent(std::string input) {
|
||||
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
|
||||
std::string whitespace = "\n" + std::string(indent_level, ' ');
|
||||
tree_sitter::util::str_replace(&input, whitespace, "\n");
|
||||
return input.substr(
|
||||
input.find_first_not_of("\n "),
|
||||
input.find_last_not_of("\n ") + 1
|
||||
);
|
||||
}
|
||||
|
|
@ -28,10 +28,11 @@ const char *libcompiler_path =
|
|||
"out/Test/libcompiler.a";
|
||||
#endif
|
||||
|
||||
static std::string run_cmd(const char *cmd, const char *args[]) {
|
||||
static std::string run_command(const char *cmd, const char *args[]) {
|
||||
int child_pid = fork();
|
||||
if (child_pid < 0)
|
||||
if (child_pid < 0) {
|
||||
return "fork failed";
|
||||
}
|
||||
|
||||
if (child_pid == 0) {
|
||||
close(0);
|
||||
|
|
@ -39,7 +40,6 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
|
|||
dup2(2, 1);
|
||||
dup2(1, 2);
|
||||
execvp(cmd, (char * const * )args);
|
||||
return "";
|
||||
}
|
||||
|
||||
int status;
|
||||
|
|
@ -47,12 +47,16 @@ static std::string run_cmd(const char *cmd, const char *args[]) {
|
|||
waitpid(child_pid, &status, 0);
|
||||
} while (!WIFEXITED(status));
|
||||
|
||||
if (WEXITSTATUS(status) == 0)
|
||||
if (WEXITSTATUS(status) == 0) {
|
||||
return "";
|
||||
else
|
||||
} else {
|
||||
return "command failed";
|
||||
}
|
||||
}
|
||||
|
||||
return "";
|
||||
static bool file_exists(const string &path) {
|
||||
struct stat file_stat;
|
||||
return stat(path.c_str(), &file_stat) == 0;
|
||||
}
|
||||
|
||||
static int get_modified_time(const string &path) {
|
||||
|
|
@ -67,46 +71,46 @@ static int get_modified_time(const string &path) {
|
|||
|
||||
const TSLanguage *load_language(const string &source_filename,
|
||||
const string &lib_filename,
|
||||
const string &language_name) {
|
||||
string language_function_name = "ts_language_" + language_name;
|
||||
const string &language_name,
|
||||
string external_scanner_filename = "") {
|
||||
string language_function_name = "tree_sitter_" + language_name;
|
||||
string header_dir = getenv("PWD") + string("/include");
|
||||
int source_mtime = get_modified_time(source_filename);
|
||||
int header_mtime = get_modified_time(header_dir + "/tree_sitter/parser.h");
|
||||
int lib_mtime = get_modified_time(lib_filename);
|
||||
int external_scanner_mtime = get_modified_time(external_scanner_filename);
|
||||
|
||||
if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime) {
|
||||
string obj_filename = lib_filename + ".o";
|
||||
const char *compiler_name = getenv("CC");
|
||||
if (!compiler_name) {
|
||||
compiler_name = "gcc";
|
||||
}
|
||||
if (!header_mtime || lib_mtime < header_mtime || lib_mtime < source_mtime ||
|
||||
lib_mtime < external_scanner_mtime) {
|
||||
const char *compiler_name = getenv("CXX");
|
||||
if (!compiler_name) compiler_name = "c++";
|
||||
|
||||
const char *compile_argv[] = {
|
||||
compiler_name,
|
||||
"-x", "c",
|
||||
"-fPIC",
|
||||
"-g",
|
||||
"-I", header_dir.c_str(),
|
||||
"-c", source_filename.c_str(),
|
||||
"-o", obj_filename.c_str(),
|
||||
NULL
|
||||
};
|
||||
string compile_error = run_cmd("gcc", compile_argv);
|
||||
if (!compile_error.empty()) {
|
||||
AssertThat(string(compile_error), IsEmpty());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char *link_argv[] = {
|
||||
vector<const char *> compile_args = {
|
||||
compiler_name,
|
||||
"-shared",
|
||||
"-Wl", obj_filename.c_str(),
|
||||
"-fPIC",
|
||||
"-I", header_dir.c_str(),
|
||||
"-o", lib_filename.c_str(),
|
||||
NULL
|
||||
"-x", "c",
|
||||
source_filename.c_str()
|
||||
};
|
||||
string link_error = run_cmd("gcc", link_argv);
|
||||
if (!link_error.empty()) {
|
||||
AssertThat(link_error, IsEmpty());
|
||||
|
||||
if (!external_scanner_filename.empty()) {
|
||||
compile_args.push_back("-g");
|
||||
string extension = external_scanner_filename.substr(external_scanner_filename.rfind("."));
|
||||
if (extension == ".c") {
|
||||
compile_args.push_back("-xc");
|
||||
} else {
|
||||
compile_args.push_back("-xc++");
|
||||
}
|
||||
compile_args.push_back(external_scanner_filename.c_str());
|
||||
}
|
||||
|
||||
compile_args.push_back(nullptr);
|
||||
|
||||
string compile_error = run_command(compiler_name, compile_args.data());
|
||||
if (!compile_error.empty()) {
|
||||
AssertThat(string(compile_error), IsEmpty());
|
||||
return nullptr;
|
||||
}
|
||||
}
|
||||
|
|
@ -118,19 +122,19 @@ const TSLanguage *load_language(const string &source_filename,
|
|||
return nullptr;
|
||||
}
|
||||
|
||||
void *symbol_value = dlsym(parser_lib, language_function_name.c_str());
|
||||
if (!symbol_value) {
|
||||
void *language_function = dlsym(parser_lib, language_function_name.c_str());
|
||||
if (!language_function) {
|
||||
std::string message(dlerror());
|
||||
AssertThat(message, IsEmpty());
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
typedef TSLanguage * (* LanguageFunction)();
|
||||
LanguageFunction language_fn = reinterpret_cast<LanguageFunction>(symbol_value);
|
||||
return language_fn();
|
||||
return reinterpret_cast<TSLanguage *(*)()>(language_function)();
|
||||
}
|
||||
|
||||
const TSLanguage *load_compile_result(const string &name, const TSCompileResult &compile_result) {
|
||||
const TSLanguage *load_compile_result(const string &name,
|
||||
const TSCompileResult &compile_result,
|
||||
string external_scanner_path) {
|
||||
if (compile_result.error_type != TSCompileErrorTypeNone) {
|
||||
Assert::Failure(string("Compilation failed ") + compile_result.error_message);
|
||||
return nullptr;
|
||||
|
|
@ -146,7 +150,7 @@ const TSLanguage *load_compile_result(const string &name, const TSCompileResult
|
|||
source_file << compile_result.code;
|
||||
source_file.close();
|
||||
|
||||
const TSLanguage *language = load_language(source_filename, lib_filename, name);
|
||||
auto language = load_language(source_filename, lib_filename, name, external_scanner_path);
|
||||
free(compile_result.code);
|
||||
return language;
|
||||
}
|
||||
|
|
@ -158,6 +162,10 @@ const TSLanguage *get_test_language(const string &language_name) {
|
|||
string language_dir = string("spec/fixtures/grammars/") + language_name;
|
||||
string grammar_filename = language_dir + "/src/grammar.json";
|
||||
string parser_filename = language_dir + "/src/parser.c";
|
||||
string external_scanner_filename = language_dir + "/src/scanner.cc";
|
||||
if (!file_exists(external_scanner_filename)) {
|
||||
external_scanner_filename = "";
|
||||
}
|
||||
|
||||
int grammar_mtime = get_modified_time(grammar_filename);
|
||||
if (!grammar_mtime)
|
||||
|
|
@ -192,7 +200,7 @@ const TSLanguage *get_test_language(const string &language_name) {
|
|||
|
||||
mkdir("out/tmp", 0777);
|
||||
string lib_filename = "out/tmp/" + language_name + ".so";
|
||||
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name);
|
||||
const TSLanguage *language = load_language(parser_filename, lib_filename, language_name, external_scanner_filename);
|
||||
loaded_languages[language_name] = language;
|
||||
return language;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -5,7 +5,8 @@
|
|||
#include "tree_sitter/runtime.h"
|
||||
#include <string>
|
||||
|
||||
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &);
|
||||
const TSLanguage *load_compile_result(const std::string &, const TSCompileResult &,
|
||||
std::string external_scanner_path = "");
|
||||
const TSLanguage *get_test_language(const std::string &language_name);
|
||||
|
||||
#endif // HELPERS_LOAD_LANGUAGE_H_
|
||||
|
|
|
|||
|
|
@ -15,7 +15,9 @@ bool operator==(const TSRange &left, const TSRange &right) {
|
|||
}
|
||||
|
||||
bool operator==(const Length &left, const Length &right) {
|
||||
return length_eq(left, right);
|
||||
return left.bytes == right.bytes &&
|
||||
left.chars == right.chars &&
|
||||
left.extent == right.extent;
|
||||
}
|
||||
|
||||
bool operator<(const TSPoint &left, const TSPoint &right) {
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ namespace tree_sitter {
|
|||
using std::ostream;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using rules::Symbol;
|
||||
|
||||
rule_ptr character(const set<uint32_t> &ranges) {
|
||||
return character(ranges, true);
|
||||
|
|
@ -28,11 +29,11 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr i_sym(size_t index) {
|
||||
return make_shared<rules::Symbol>(index);
|
||||
return make_shared<Symbol>(index, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
rule_ptr i_token(size_t index) {
|
||||
return make_shared<rules::Symbol>(index, true);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr metadata(rule_ptr rule, rules::MetadataParams params) {
|
||||
|
|
|
|||
|
|
@ -23,20 +23,21 @@ static void append_to_scope_sequence(ScopeSequence *sequence,
|
|||
ScopeStack *current_scopes,
|
||||
TSNode node, TSDocument *document,
|
||||
const std::string &text) {
|
||||
append_text_to_scope_sequence(sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size());
|
||||
append_text_to_scope_sequence(
|
||||
sequence, current_scopes, text, ts_node_start_byte(node) - sequence->size()
|
||||
);
|
||||
|
||||
string scope = ts_node_type(node, document);
|
||||
current_scopes->push_back(scope);
|
||||
size_t child_count = ts_node_child_count(node);
|
||||
if (child_count > 0) {
|
||||
for (size_t i = 0; i < child_count; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
append_to_scope_sequence(sequence, current_scopes, child, document, text);
|
||||
}
|
||||
} else {
|
||||
size_t length = ts_node_end_byte(node) - ts_node_start_byte(node);
|
||||
append_text_to_scope_sequence(sequence, current_scopes, text, length);
|
||||
current_scopes->push_back(ts_node_type(node, document));
|
||||
|
||||
for (size_t i = 0, n = ts_node_child_count(node); i < n; i++) {
|
||||
TSNode child = ts_node_child(node, i);
|
||||
append_to_scope_sequence(sequence, current_scopes, child, document, text);
|
||||
}
|
||||
|
||||
append_text_to_scope_sequence(
|
||||
sequence, current_scopes, text, ts_node_end_byte(node) - sequence->size()
|
||||
);
|
||||
|
||||
current_scopes->pop_back();
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -10,16 +10,7 @@ namespace tree_sitter {
|
|||
|
||||
ostream &operator<<(ostream &stream, const Grammar &grammar) {
|
||||
stream << string("#<grammar");
|
||||
stream << string(" rules: {");
|
||||
bool started = false;
|
||||
for (auto pair : grammar.rules) {
|
||||
if (started)
|
||||
stream << string(", ");
|
||||
stream << pair.first;
|
||||
stream << string(" => ");
|
||||
stream << pair.second;
|
||||
started = true;
|
||||
}
|
||||
stream << " rules: " << grammar.rules;
|
||||
return stream << string("}>");
|
||||
}
|
||||
|
||||
|
|
@ -85,6 +76,11 @@ ostream &operator<<(ostream &stream, const ParseState &state) {
|
|||
return stream << string(">");
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ExternalToken &external_token) {
|
||||
return stream << "{" << external_token.name << ", " << external_token.type <<
|
||||
"," << external_token.corresponding_internal_token << "}";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const ProductionStep &step) {
|
||||
stream << "(symbol: " << step.symbol << ", precedence:" << to_string(step.precedence);
|
||||
stream << ", associativity: ";
|
||||
|
|
|
|||
|
|
@ -97,6 +97,7 @@ struct AdvanceAction;
|
|||
struct AcceptTokenAction;
|
||||
class ParseAction;
|
||||
class ParseState;
|
||||
struct ExternalToken;
|
||||
struct ProductionStep;
|
||||
struct PrecedenceRange;
|
||||
|
||||
|
|
@ -110,6 +111,7 @@ ostream &operator<<(ostream &, const AdvanceAction &);
|
|||
ostream &operator<<(ostream &, const AcceptTokenAction &);
|
||||
ostream &operator<<(ostream &, const ParseAction &);
|
||||
ostream &operator<<(ostream &, const ParseState &);
|
||||
ostream &operator<<(ostream &, const ExternalToken &);
|
||||
ostream &operator<<(ostream &, const ProductionStep &);
|
||||
ostream &operator<<(ostream &, const PrecedenceRange &);
|
||||
|
||||
|
|
|
|||
|
|
@ -1,19 +1,11 @@
|
|||
#include "spec_helper.h"
|
||||
#include "runtime/alloc.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/dedent.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include <map>
|
||||
|
||||
static string dedent(string input) {
|
||||
size_t indent_level = input.find_first_not_of("\n ") - input.find_first_not_of("\n");
|
||||
string whitespace = "\n" + string(indent_level, ' ');
|
||||
util::str_replace(&input, whitespace, "\n");
|
||||
return input.substr(
|
||||
input.find_first_not_of("\n "),
|
||||
input.find_last_not_of("\n ") + 1
|
||||
);
|
||||
}
|
||||
|
||||
static string fill_template(string input, map<string, string> parameters) {
|
||||
string result = input;
|
||||
for (const auto &pair : parameters) {
|
||||
|
|
@ -507,6 +499,190 @@ describe("compile_grammar", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("external scanners", [&]() {
|
||||
it("can tokenize using arbitrary user-defined scanner functions", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "external_scanner_example",
|
||||
|
||||
"externals": [
|
||||
"_percent_string",
|
||||
"_percent_string_start",
|
||||
"_percent_string_end"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "sum"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
},
|
||||
|
||||
"sum": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "+"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"string": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string"},
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_percent_string_start"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "SYMBOL", "name": "_percent_string_end"}
|
||||
]
|
||||
},
|
||||
]
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "\\a+"
|
||||
}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"external_scanner_example",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/percent_strings.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "x + %(sup (external) scanner?)");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (sum (expression (identifier)) (expression (string))))");
|
||||
|
||||
ts_document_set_input_string(document, "%{sup {} #{x + y} {} scanner?}");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(expression (string (expression (sum (expression (identifier)) (expression (identifier))))))");
|
||||
});
|
||||
|
||||
it("allows external scanners to refer to tokens that are defined internally", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "shared_external_tokens",
|
||||
|
||||
"externals": [
|
||||
"string",
|
||||
"line_break"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "SYMBOL", "name": "line_break"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "string"},
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "SYMBOL", "name": "number"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"},
|
||||
"number": {"type": "PATTERN", "value": "\\d+"},
|
||||
"line_break": {"type": "STRING", "value": "\n"}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"shared_external_tokens",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/shared_external_tokens.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "a b\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (variable) (variable) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "a \nb\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (variable) (variable) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "'hello' 'world'\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (string) (string) (line_break))");
|
||||
|
||||
ts_document_set_input_string(document, "'hello' \n'world'\n");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(statement (string) (string) (line_break))");
|
||||
});
|
||||
|
||||
it("allows external tokens to be used as extras", [&]() {
|
||||
string grammar = R"JSON({
|
||||
"name": "extra_external_tokens",
|
||||
|
||||
"externals": [
|
||||
"comment"
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"},
|
||||
{"type": "SYMBOL", "name": "comment"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"assignment": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "variable"},
|
||||
{"type": "STRING", "value": "="},
|
||||
{"type": "SYMBOL", "name": "variable"}
|
||||
]
|
||||
},
|
||||
|
||||
"variable": {"type": "PATTERN", "value": "\\a+"}
|
||||
}
|
||||
})JSON";
|
||||
|
||||
TSCompileResult result = ts_compile_grammar(grammar.c_str());
|
||||
AssertThat(result.error_message, IsNull());
|
||||
|
||||
ts_document_set_language(document, load_compile_result(
|
||||
"extra_external_tokens",
|
||||
result,
|
||||
"spec/fixtures/external_scanners/extra_external_tokens.c"
|
||||
));
|
||||
|
||||
ts_document_set_input_string(document, "x = # a comment\n y");
|
||||
ts_document_parse(document);
|
||||
assert_root_node("(assignment (variable) (comment) (variable))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the grammar's start symbol is a token", [&]() {
|
||||
it("parses the token", [&]() {
|
||||
TSCompileResult result = ts_compile_grammar(R"JSON(
|
||||
|
|
|
|||
|
|
@ -84,6 +84,7 @@ describe("The Corpus", []() {
|
|||
"json",
|
||||
"c",
|
||||
"cpp",
|
||||
"python",
|
||||
});
|
||||
|
||||
for (auto &language_name : test_languages) {
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include "helpers/tree_helpers.h"
|
||||
#include "helpers/point_helpers.h"
|
||||
#include "helpers/spy_logger.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/spy_input.h"
|
||||
#include "helpers/load_language.h"
|
||||
|
||||
|
|
@ -15,22 +16,22 @@ TSPoint point(size_t row, size_t column) {
|
|||
START_TEST
|
||||
|
||||
describe("Document", [&]() {
|
||||
TSDocument *doc;
|
||||
TSDocument *document;
|
||||
TSNode root;
|
||||
|
||||
before_each([&]() {
|
||||
record_alloc::start();
|
||||
doc = ts_document_new();
|
||||
document = ts_document_new();
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_document_free(doc);
|
||||
ts_document_free(document);
|
||||
record_alloc::stop();
|
||||
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
||||
});
|
||||
|
||||
auto assert_node_string_equals = [&](TSNode node, const string &expected) {
|
||||
char *str = ts_node_string(node, doc);
|
||||
char *str = ts_node_string(node, document);
|
||||
string actual(str);
|
||||
ts_free(str);
|
||||
AssertThat(actual, Equals(expected));
|
||||
|
|
@ -42,11 +43,11 @@ describe("Document", [&]() {
|
|||
before_each([&]() {
|
||||
spy_input = new SpyInput("{\"key\": [null, 2]}", 3);
|
||||
|
||||
ts_document_set_language(doc, get_test_language("json"));
|
||||
ts_document_set_input_string(doc, "{\"key\": [1, 2]}");
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_input_string(document, "{\"key\": [1, 2]}");
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
root = ts_document_root_node(document);
|
||||
assert_node_string_equals(
|
||||
root,
|
||||
"(object (pair (string) (array (number) (number))))");
|
||||
|
|
@ -61,11 +62,11 @@ describe("Document", [&]() {
|
|||
spy_input->content = string((const char *)content, sizeof(content));
|
||||
spy_input->encoding = TSInputEncodingUTF16;
|
||||
|
||||
ts_document_set_input(doc, spy_input->input());
|
||||
ts_document_invalidate(doc);
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
ts_document_invalidate(document);
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
root = ts_document_root_node(document);
|
||||
assert_node_string_equals(
|
||||
root,
|
||||
"(array (true) (false))");
|
||||
|
|
@ -77,27 +78,27 @@ describe("Document", [&]() {
|
|||
spy_input->encoding = TSInputEncodingUTF16;
|
||||
// spy_input->measure_columns_in_bytes
|
||||
|
||||
ts_document_set_input(doc, spy_input->input());
|
||||
ts_document_invalidate(doc);
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
ts_document_invalidate(document);
|
||||
ts_document_parse(document);
|
||||
});
|
||||
|
||||
it("allows the input to be retrieved later", [&]() {
|
||||
ts_document_set_input(doc, spy_input->input());
|
||||
AssertThat(ts_document_input(doc).payload, Equals<void *>(spy_input));
|
||||
AssertThat(ts_document_input(doc).read, Equals(spy_input->input().read));
|
||||
AssertThat(ts_document_input(doc).seek, Equals(spy_input->input().seek));
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
AssertThat(ts_document_input(document).payload, Equals<void *>(spy_input));
|
||||
AssertThat(ts_document_input(document).read, Equals(spy_input->input().read));
|
||||
AssertThat(ts_document_input(document).seek, Equals(spy_input->input().seek));
|
||||
});
|
||||
|
||||
it("does not assume that the document's text has changed", [&]() {
|
||||
ts_document_set_input(doc, spy_input->input());
|
||||
AssertThat(ts_document_root_node(doc), Equals<TSNode>(root));
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
AssertThat(ts_document_root_node(document), Equals<TSNode>(root));
|
||||
AssertThat(ts_node_has_changes(root), IsFalse());
|
||||
AssertThat(spy_input->strings_read, Equals(vector<string>({ "" })));
|
||||
});
|
||||
|
||||
it("reads text from the new input for future parses", [&]() {
|
||||
ts_document_set_input(doc, spy_input->input());
|
||||
ts_document_set_input(document, spy_input->input());
|
||||
|
||||
// Insert 'null', delete '1'.
|
||||
TSInputEdit edit = {};
|
||||
|
|
@ -105,28 +106,28 @@ describe("Document", [&]() {
|
|||
edit.extent_added.column = edit.bytes_added = 4;
|
||||
edit.extent_removed.column = edit.bytes_removed = 1;
|
||||
|
||||
ts_document_edit(doc, edit);
|
||||
ts_document_parse(doc);
|
||||
ts_document_edit(document, edit);
|
||||
ts_document_parse(document);
|
||||
|
||||
TSNode new_root = ts_document_root_node(doc);
|
||||
TSNode new_root = ts_document_root_node(document);
|
||||
assert_node_string_equals(
|
||||
new_root,
|
||||
"(object (pair (string) (array (null) (number))))");
|
||||
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2"})));
|
||||
AssertThat(spy_input->strings_read, Equals(vector<string>({" [null, 2" })));
|
||||
});
|
||||
|
||||
it("reads from the new input correctly when the old input was blank", [&]() {
|
||||
ts_document_set_input_string(doc, "");
|
||||
ts_document_parse(doc);
|
||||
TSNode new_root = ts_document_root_node(doc);
|
||||
ts_document_set_input_string(document, "");
|
||||
ts_document_parse(document);
|
||||
TSNode new_root = ts_document_root_node(document);
|
||||
AssertThat(ts_node_end_char(new_root), Equals<size_t>(0));
|
||||
assert_node_string_equals(
|
||||
new_root,
|
||||
"(ERROR)");
|
||||
|
||||
ts_document_set_input_string(doc, "1");
|
||||
ts_document_parse(doc);
|
||||
new_root = ts_document_root_node(doc);
|
||||
ts_document_set_input_string(document, "1");
|
||||
ts_document_parse(document);
|
||||
new_root = ts_document_root_node(document);
|
||||
AssertThat(ts_node_end_char(new_root), Equals<size_t>(1));
|
||||
assert_node_string_equals(
|
||||
new_root,
|
||||
|
|
@ -136,33 +137,44 @@ describe("Document", [&]() {
|
|||
|
||||
describe("set_language(language)", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_input_string(doc, "{\"key\": [1, 2]}\n");
|
||||
ts_document_set_input_string(document, "{\"key\": [1, 2]}\n");
|
||||
});
|
||||
|
||||
it("uses the given language for future parses", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("json"));
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
root = ts_document_root_node(document);
|
||||
assert_node_string_equals(
|
||||
root,
|
||||
"(object (pair (string) (array (number) (number))))");
|
||||
});
|
||||
|
||||
it("clears out any previous tree", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("json"));
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_parse(document);
|
||||
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
AssertThat(ts_document_root_node(doc).data, Equals<void *>(nullptr));
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
AssertThat(ts_document_root_node(document).data, Equals<void *>(nullptr));
|
||||
|
||||
ts_document_parse(doc);
|
||||
root = ts_document_root_node(doc);
|
||||
ts_document_parse(document);
|
||||
root = ts_document_root_node(document);
|
||||
assert_node_string_equals(
|
||||
root,
|
||||
"(program (expression_statement "
|
||||
"(object (pair (string) (array (number) (number))))))");
|
||||
});
|
||||
|
||||
it("does not allow setting a language with a different version number", [&]() {
|
||||
TSLanguage language = *get_test_language("json");
|
||||
AssertThat(ts_language_version(&language), Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
|
||||
|
||||
language.version++;
|
||||
AssertThat(ts_language_version(&language), !Equals<uint32_t>(TREE_SITTER_LANGUAGE_VERSION));
|
||||
|
||||
ts_document_set_language(document, &language);
|
||||
AssertThat(ts_document_language(document), IsNull());
|
||||
});
|
||||
});
|
||||
|
||||
describe("set_logger(TSLogger)", [&]() {
|
||||
|
|
@ -170,45 +182,39 @@ describe("Document", [&]() {
|
|||
|
||||
before_each([&]() {
|
||||
logger = new SpyLogger();
|
||||
ts_document_set_language(doc, get_test_language("json"));
|
||||
ts_document_set_input_string(doc, "[1, 2]");
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
ts_document_set_input_string(document, "[1, 2]");
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
delete logger;
|
||||
});
|
||||
|
||||
it("calls the debugger with a message for each lex action", [&]() {
|
||||
ts_document_set_logger(doc, logger->logger());
|
||||
ts_document_parse(doc);
|
||||
|
||||
AssertThat(logger->messages, Contains("lookahead char:'1'"));
|
||||
AssertThat(logger->messages, Contains("lookahead char:'['"));
|
||||
});
|
||||
|
||||
it("calls the debugger with a message for each parse action", [&]() {
|
||||
ts_document_set_logger(doc, logger->logger());
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_logger(document, logger->logger());
|
||||
ts_document_parse(document);
|
||||
|
||||
AssertThat(logger->messages, Contains("new_parse"));
|
||||
AssertThat(logger->messages, Contains("lookahead char:'['"));
|
||||
AssertThat(logger->messages, Contains("skip character:' '"));
|
||||
AssertThat(logger->messages, Contains("consume character:'['"));
|
||||
AssertThat(logger->messages, Contains("consume character:'1'"));
|
||||
AssertThat(logger->messages, Contains("reduce sym:array, child_count:4"));
|
||||
AssertThat(logger->messages, Contains("accept"));
|
||||
});
|
||||
|
||||
it("allows the debugger to be retrieved later", [&]() {
|
||||
ts_document_set_logger(doc, logger->logger());
|
||||
AssertThat(ts_document_logger(doc).payload, Equals(logger));
|
||||
ts_document_set_logger(document, logger->logger());
|
||||
AssertThat(ts_document_logger(document).payload, Equals(logger));
|
||||
});
|
||||
|
||||
describe("disabling debugging", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_logger(doc, logger->logger());
|
||||
ts_document_set_logger(doc, {NULL, NULL});
|
||||
ts_document_set_logger(document, logger->logger());
|
||||
ts_document_set_logger(document, {NULL, NULL});
|
||||
});
|
||||
|
||||
it("does not call the debugger any more", [&]() {
|
||||
ts_document_parse(doc);
|
||||
ts_document_parse(document);
|
||||
AssertThat(logger->messages, IsEmpty());
|
||||
});
|
||||
});
|
||||
|
|
@ -218,12 +224,12 @@ describe("Document", [&]() {
|
|||
SpyInput *input;
|
||||
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
input = new SpyInput("{a: null};", 3);
|
||||
ts_document_set_input(doc, input->input());
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_input(document, input->input());
|
||||
ts_document_parse(document);
|
||||
assert_node_string_equals(
|
||||
ts_document_root_node(doc),
|
||||
ts_document_root_node(document),
|
||||
"(program (expression_statement (object (pair (identifier) (null)))))");
|
||||
});
|
||||
|
||||
|
|
@ -231,26 +237,25 @@ describe("Document", [&]() {
|
|||
delete input;
|
||||
});
|
||||
|
||||
auto get_ranges = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
|
||||
auto get_invalidated_ranges_for_edit = [&](std::function<TSInputEdit()> callback) -> vector<TSRange> {
|
||||
TSInputEdit edit = callback();
|
||||
ts_document_edit(doc, edit);
|
||||
ts_document_edit(document, edit);
|
||||
|
||||
TSRange *ranges;
|
||||
uint32_t range_count = 0;
|
||||
|
||||
ts_document_parse_and_get_changed_ranges(doc, &ranges, &range_count);
|
||||
ts_document_parse_and_get_changed_ranges(document, &ranges, &range_count);
|
||||
|
||||
vector<TSRange> result;
|
||||
for (size_t i = 0; i < range_count; i++)
|
||||
for (size_t i = 0; i < range_count; i++) {
|
||||
result.push_back(ranges[i]);
|
||||
}
|
||||
ts_free(ranges);
|
||||
|
||||
return result;
|
||||
};
|
||||
|
||||
it("reports changes when one token has been updated", [&]() {
|
||||
// Replace `null` with `nothing`
|
||||
auto ranges = get_ranges([&]() {
|
||||
auto ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->replace(input->content.find("ull"), 1, "othing");
|
||||
});
|
||||
|
||||
|
|
@ -262,7 +267,7 @@ describe("Document", [&]() {
|
|||
})));
|
||||
|
||||
// Replace `nothing` with `null` again
|
||||
ranges = get_ranges([&]() {
|
||||
ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->undo();
|
||||
});
|
||||
|
||||
|
|
@ -276,7 +281,7 @@ describe("Document", [&]() {
|
|||
|
||||
it("reports changes when tokens have been appended", [&]() {
|
||||
// Add a second key-value pair
|
||||
auto ranges = get_ranges([&]() {
|
||||
auto ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->replace(input->content.find("}"), 0, ", b: false");
|
||||
});
|
||||
|
||||
|
|
@ -288,12 +293,12 @@ describe("Document", [&]() {
|
|||
})));
|
||||
|
||||
// Add a third key-value pair in between the first two
|
||||
ranges = get_ranges([&]() {
|
||||
ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->replace(input->content.find(", b"), 0, ", c: 1");
|
||||
});
|
||||
|
||||
assert_node_string_equals(
|
||||
ts_document_root_node(doc),
|
||||
ts_document_root_node(document),
|
||||
"(program (expression_statement (object "
|
||||
"(pair (identifier) (null)) "
|
||||
"(pair (identifier) (number)) "
|
||||
|
|
@ -307,41 +312,39 @@ describe("Document", [&]() {
|
|||
})));
|
||||
|
||||
// Delete the middle pair.
|
||||
ranges = get_ranges([&]() {
|
||||
ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->undo();
|
||||
});
|
||||
|
||||
assert_node_string_equals(
|
||||
ts_document_root_node(doc),
|
||||
ts_document_root_node(document),
|
||||
"(program (expression_statement (object "
|
||||
"(pair (identifier) (null)) "
|
||||
"(pair (identifier) (false)))))");
|
||||
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
})));
|
||||
AssertThat(ranges, IsEmpty());
|
||||
|
||||
// Delete the second pair.
|
||||
ranges = get_ranges([&]() {
|
||||
ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->undo();
|
||||
});
|
||||
|
||||
assert_node_string_equals(
|
||||
ts_document_root_node(doc),
|
||||
ts_document_root_node(document),
|
||||
"(program (expression_statement (object "
|
||||
"(pair (identifier) (null)))))");
|
||||
|
||||
AssertThat(ranges, Equals(vector<TSRange>({
|
||||
})));
|
||||
AssertThat(ranges, IsEmpty());
|
||||
});
|
||||
|
||||
it("reports changes when trees have been wrapped", [&]() {
|
||||
// Wrap the object in an assignment expression.
|
||||
auto ranges = get_ranges([&]() {
|
||||
auto ranges = get_invalidated_ranges_for_edit([&]() {
|
||||
return input->replace(input->content.find("null"), 0, "b === ");
|
||||
});
|
||||
|
||||
assert_node_string_equals(
|
||||
ts_document_root_node(doc),
|
||||
ts_document_root_node(document),
|
||||
"(program (expression_statement (object "
|
||||
"(pair (identifier) (rel_op (identifier) (null))))))");
|
||||
|
||||
|
|
|
|||
|
|
@ -4,11 +4,13 @@
|
|||
#include "helpers/spy_input.h"
|
||||
#include "helpers/load_language.h"
|
||||
#include "helpers/record_alloc.h"
|
||||
#include "helpers/stderr_logger.h"
|
||||
#include "helpers/dedent.h"
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Parser", [&]() {
|
||||
TSDocument *doc;
|
||||
TSDocument *document;
|
||||
SpyInput *input;
|
||||
TSNode root;
|
||||
size_t chunk_size;
|
||||
|
|
@ -18,90 +20,76 @@ describe("Parser", [&]() {
|
|||
|
||||
chunk_size = 3;
|
||||
input = nullptr;
|
||||
|
||||
doc = ts_document_new();
|
||||
document = ts_document_new();
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
if (doc)
|
||||
ts_document_free(doc);
|
||||
|
||||
if (input)
|
||||
delete input;
|
||||
if (document) ts_document_free(document);
|
||||
if (input) delete input;
|
||||
|
||||
record_alloc::stop();
|
||||
AssertThat(record_alloc::outstanding_allocation_indices(), IsEmpty());
|
||||
});
|
||||
|
||||
auto set_text = [&](const char *text) {
|
||||
auto set_text = [&](string text) {
|
||||
input = new SpyInput(text, chunk_size);
|
||||
ts_document_set_input(doc, input->input());
|
||||
ts_document_parse(doc);
|
||||
ts_document_set_input(document, input->input());
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
AssertThat(ts_node_end_byte(root), Equals(strlen(text)));
|
||||
root = ts_document_root_node(document);
|
||||
AssertThat(ts_node_end_byte(root), Equals(text.size()));
|
||||
input->clear();
|
||||
};
|
||||
|
||||
auto insert_text = [&](size_t position, string text) {
|
||||
size_t prev_size = ts_node_end_byte(root);
|
||||
ts_document_edit(doc, input->replace(position, 0, text));
|
||||
ts_document_parse(doc);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
size_t new_size = ts_node_end_byte(root);
|
||||
AssertThat(new_size, Equals(prev_size + text.size()));
|
||||
};
|
||||
|
||||
auto delete_text = [&](size_t position, size_t length) {
|
||||
size_t prev_size = ts_node_end_byte(root);
|
||||
ts_document_edit(doc, input->replace(position, length, ""));
|
||||
ts_document_parse(doc);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
size_t new_size = ts_node_end_byte(root);
|
||||
AssertThat(new_size, Equals(prev_size - length));
|
||||
};
|
||||
|
||||
auto replace_text = [&](size_t position, size_t length, string new_text) {
|
||||
size_t prev_size = ts_node_end_byte(root);
|
||||
|
||||
ts_document_edit(doc, input->replace(position, length, new_text));
|
||||
ts_document_parse(doc);
|
||||
ts_document_edit(document, input->replace(position, length, new_text));
|
||||
ts_document_parse(document);
|
||||
|
||||
root = ts_document_root_node(doc);
|
||||
root = ts_document_root_node(document);
|
||||
size_t new_size = ts_node_end_byte(root);
|
||||
AssertThat(new_size, Equals(prev_size - length + new_text.size()));
|
||||
};
|
||||
|
||||
auto insert_text = [&](size_t position, string text) {
|
||||
replace_text(position, 0, text);
|
||||
};
|
||||
|
||||
auto delete_text = [&](size_t position, size_t length) {
|
||||
replace_text(position, length, "");
|
||||
};
|
||||
|
||||
auto undo = [&]() {
|
||||
ts_document_edit(document, input->undo());
|
||||
ts_document_parse(document);
|
||||
};
|
||||
|
||||
auto assert_root_node = [&](const string &expected) {
|
||||
TSNode node = ts_document_root_node(doc);
|
||||
char *str = ts_node_string(node, doc);
|
||||
string actual(str);
|
||||
ts_free(str);
|
||||
TSNode node = ts_document_root_node(document);
|
||||
char *node_string = ts_node_string(node, document);
|
||||
string actual(node_string);
|
||||
ts_free(node_string);
|
||||
AssertThat(actual, Equals(expected));
|
||||
};
|
||||
|
||||
auto get_node_text = [&](TSNode node) {
|
||||
size_t start = ts_node_start_byte(node);
|
||||
size_t end = ts_node_end_byte(node);
|
||||
return input->content.substr(start, end - start);
|
||||
};
|
||||
|
||||
describe("handling errors", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, get_test_language("json"));
|
||||
});
|
||||
|
||||
auto get_node_text = [&](TSNode node) {
|
||||
size_t start = ts_node_start_byte(node);
|
||||
size_t end = ts_node_end_byte(node);
|
||||
return input->content.substr(start, end - start);
|
||||
};
|
||||
|
||||
describe("when there is an invalid substring right before a valid token", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
set_text(" [123, @@@@@, true]");
|
||||
|
||||
assert_root_node(
|
||||
"(array (number) (ERROR (UNEXPECTED '@')) (true))");
|
||||
|
||||
TSNode error = ts_node_named_child(root, 1);
|
||||
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
|
||||
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
||||
AssertThat(get_node_text(error), Equals(", @@@@@"));
|
||||
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
|
||||
|
||||
|
|
@ -112,56 +100,59 @@ describe("Parser", [&]() {
|
|||
AssertThat(get_node_text(garbage), Equals("@@@@@"));
|
||||
|
||||
TSNode node_after_error = ts_node_named_child(root, 2);
|
||||
AssertThat(ts_node_type(node_after_error, doc), Equals("true"));
|
||||
AssertThat(ts_node_type(node_after_error, document), Equals("true"));
|
||||
AssertThat(get_node_text(node_after_error), Equals("true"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there is an unexpected string in the middle of a token", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
set_text(" [123, faaaaalse, true]");
|
||||
|
||||
assert_root_node(
|
||||
"(array (number) (ERROR (UNEXPECTED 'a')) (true))");
|
||||
|
||||
TSNode error = ts_node_named_child(root, 1);
|
||||
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
|
||||
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
||||
AssertThat(ts_node_child_count(error), Equals<size_t>(2));
|
||||
|
||||
TSNode comma = ts_node_child(error, 0);
|
||||
AssertThat(ts_node_type(comma, doc), Equals(","));
|
||||
AssertThat(ts_node_type(comma, document), Equals(","));
|
||||
AssertThat(get_node_text(comma), Equals(","));
|
||||
|
||||
TSNode garbage = ts_node_child(error, 1);
|
||||
AssertThat(ts_node_type(garbage, doc), Equals("ERROR"));
|
||||
AssertThat(ts_node_type(garbage, document), Equals("ERROR"));
|
||||
AssertThat(get_node_text(garbage), Equals("faaaaalse"));
|
||||
|
||||
TSNode last = ts_node_named_child(root, 2);
|
||||
AssertThat(ts_node_type(last, doc), Equals("true"));
|
||||
AssertThat(ts_node_type(last, document), Equals("true"));
|
||||
AssertThat(ts_node_start_byte(last), Equals(strlen(" [123, faaaaalse, ")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there is one unexpected token between two valid tokens", [&]() {
|
||||
it("computes the error node's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
set_text(" [123, true false, true]");
|
||||
|
||||
assert_root_node(
|
||||
"(array (number) (true) (ERROR (false)) (true))");
|
||||
|
||||
TSNode error = ts_node_named_child(root, 2);
|
||||
AssertThat(ts_node_type(error, doc), Equals("ERROR"));
|
||||
AssertThat(ts_node_type(error, document), Equals("ERROR"));
|
||||
AssertThat(get_node_text(error), Equals("false"));
|
||||
AssertThat(ts_node_child_count(error), Equals<size_t>(1));
|
||||
|
||||
TSNode last = ts_node_named_child(root, 1);
|
||||
AssertThat(ts_node_type(last, doc), Equals("true"));
|
||||
AssertThat(ts_node_type(last, document), Equals("true"));
|
||||
AssertThat(get_node_text(last), Equals("true"));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when there is an unexpected string at the end of a token", [&]() {
|
||||
it("computes the error's size and position correctly", [&]() {
|
||||
ts_document_set_language(document, get_test_language("json"));
|
||||
set_text(" [123, \"hi\n, true]");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -171,7 +162,7 @@ describe("Parser", [&]() {
|
|||
|
||||
describe("when there is an unterminated error", [&]() {
|
||||
it("maintains a consistent tree", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("a; /* b");
|
||||
assert_root_node(
|
||||
"(ERROR (program (expression_statement (identifier))) (UNEXPECTED EOF))");
|
||||
|
|
@ -180,14 +171,9 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
describe("handling extra tokens", [&]() {
|
||||
// In the javascript example grammar, ASI works by using newlines as
|
||||
// terminators in statements, but also as extra tokens.
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
});
|
||||
|
||||
describe("when the token appears as part of a grammar rule", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
it("incorporates it into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("fn()\n");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -196,7 +182,8 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
describe("when the token appears somewhere else", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
it("incorporates it into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text(
|
||||
"fn()\n"
|
||||
" .otherFn();");
|
||||
|
|
@ -211,7 +198,8 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
describe("when several extra tokens appear in a row", [&]() {
|
||||
it("is incorporated into the tree", [&]() {
|
||||
it("incorporates them into the tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text(
|
||||
"fn()\n\n"
|
||||
"// This is a comment"
|
||||
|
|
@ -230,199 +218,219 @@ describe("Parser", [&]() {
|
|||
});
|
||||
|
||||
describe("editing", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
describe("creating new tokens near the end of the input", [&]() {
|
||||
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("x * (100 + abc);");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(identifier) "
|
||||
"(math_op (number) (identifier)))))");
|
||||
|
||||
insert_text(strlen("x * (100 + abc"), ".d");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(identifier) "
|
||||
"(math_op (number) (member_access (identifier) (identifier))))))");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
|
||||
});
|
||||
});
|
||||
|
||||
describe("inserting text", [&]() {
|
||||
describe("creating new tokens near the end of the input", [&]() {
|
||||
it("updates the parse tree and re-reads only the changed portion of the text", [&]() {
|
||||
set_text("x * (100 + abc);");
|
||||
describe("creating new tokens near the beginning of the input", [&]() {
|
||||
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
|
||||
chunk_size = 2;
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(identifier) "
|
||||
"(math_op (number) (identifier)))))");
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("123 + 456 * (10 + x);");
|
||||
|
||||
insert_text(strlen("x * (100 + abc"), ".d");
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(number) "
|
||||
"(math_op (number) (math_op (number) (identifier))))))");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(identifier) "
|
||||
"(math_op (number) (member_access (identifier) (identifier))))))");
|
||||
insert_text(strlen("123"), " || 5");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ " + abc.d)" })));
|
||||
});
|
||||
});
|
||||
|
||||
describe("creating new tokens near the beginning of the input", [&]() {
|
||||
it("updates the parse tree and re-reads only the changed portion of the input", [&]() {
|
||||
chunk_size = 2;
|
||||
|
||||
set_text("123 + 456 * (10 + x);");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
assert_root_node(
|
||||
"(program (expression_statement (bool_op "
|
||||
"(number) "
|
||||
"(math_op "
|
||||
"(number) "
|
||||
"(math_op (number) (math_op (number) (identifier))))))");
|
||||
"(math_op (number) (math_op (number) (identifier)))))))");
|
||||
|
||||
insert_text(strlen("123"), " || 5");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (bool_op "
|
||||
"(number) "
|
||||
"(math_op "
|
||||
"(number) "
|
||||
"(math_op (number) (math_op (number) (identifier)))))))");
|
||||
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
|
||||
});
|
||||
AssertThat(input->strings_read, Equals(vector<string>({ "123 || 5 +" })));
|
||||
});
|
||||
});
|
||||
|
||||
describe("introducing an error", [&]() {
|
||||
it("gives the error the right size", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
describe("introducing an error", [&]() {
|
||||
it("gives the error the right size", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("var x = y;");
|
||||
|
||||
set_text("var x = y;");
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (identifier))))");
|
||||
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (identifier))))");
|
||||
insert_text(strlen("var x = y"), " *");
|
||||
|
||||
insert_text(strlen("var x = y"), " *");
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (identifier)) (ERROR)))");
|
||||
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (identifier)) (ERROR)))");
|
||||
insert_text(strlen("var x = y *"), " z");
|
||||
|
||||
insert_text(strlen("var x = y *"), " z");
|
||||
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (math_op (identifier) (identifier)))))");
|
||||
});
|
||||
assert_root_node(
|
||||
"(program (var_declaration (var_assignment "
|
||||
"(identifier) (math_op (identifier) (identifier)))))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("into the middle of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
set_text("abc * 123;");
|
||||
describe("into the middle of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("abc * 123;");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
|
||||
insert_text(strlen("ab"), "XYZ");
|
||||
insert_text(strlen("ab"), "XYZ");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
|
||||
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
|
||||
AssertThat(ts_node_type(node, doc), Equals("identifier"));
|
||||
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
|
||||
});
|
||||
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
|
||||
AssertThat(ts_node_type(node, document), Equals("identifier"));
|
||||
AssertThat(ts_node_end_byte(node), Equals(strlen("abXYZc")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("at the end of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
set_text("abc * 123;");
|
||||
describe("at the end of an existing token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("abc * 123;");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
|
||||
insert_text(strlen("abc"), "XYZ");
|
||||
insert_text(strlen("abc"), "XYZ");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op (identifier) (number))))");
|
||||
|
||||
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
|
||||
AssertThat(ts_node_type(node, doc), Equals("identifier"));
|
||||
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
|
||||
});
|
||||
TSNode node = ts_node_named_descendant_for_char_range(root, 1, 1);
|
||||
AssertThat(ts_node_type(node, document), Equals("identifier"));
|
||||
AssertThat(ts_node_end_byte(node), Equals(strlen("abcXYZ")));
|
||||
});
|
||||
});
|
||||
|
||||
describe("into a node containing a extra token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
set_text("123 *\n"
|
||||
describe("inserting text into a node containing a extra token", [&]() {
|
||||
it("updates the parse tree", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("123 *\n"
|
||||
"// a-comment\n"
|
||||
"abc;");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(number) "
|
||||
"(comment) "
|
||||
"(identifier))))");
|
||||
|
||||
insert_text(
|
||||
strlen("123 *\n"
|
||||
"// a-comment\n"
|
||||
"abc;");
|
||||
"abc"),
|
||||
"XYZ");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(number) "
|
||||
"(comment) "
|
||||
"(identifier))))");
|
||||
|
||||
insert_text(
|
||||
strlen("123 *\n"
|
||||
"// a-comment\n"
|
||||
"abc"),
|
||||
"XYZ");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(number) "
|
||||
"(comment) "
|
||||
"(identifier))))");
|
||||
});
|
||||
assert_root_node(
|
||||
"(program (expression_statement (math_op "
|
||||
"(number) "
|
||||
"(comment) "
|
||||
"(identifier))))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("deleting text", [&]() {
|
||||
describe("when a critical token is removed", [&]() {
|
||||
it("updates the parse tree, creating an error", [&]() {
|
||||
set_text("123 * 456; 789 * 123;");
|
||||
describe("when a critical token is removed", [&]() {
|
||||
it("updates the parse tree, creating an error", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("123 * 456; 789 * 123;");
|
||||
|
||||
assert_root_node(
|
||||
"(program "
|
||||
"(expression_statement (math_op (number) (number))) "
|
||||
"(expression_statement (math_op (number) (number))))");
|
||||
assert_root_node(
|
||||
"(program "
|
||||
"(expression_statement (math_op (number) (number))) "
|
||||
"(expression_statement (math_op (number) (number))))");
|
||||
|
||||
delete_text(strlen("123 "), 2);
|
||||
delete_text(strlen("123 "), 2);
|
||||
|
||||
assert_root_node(
|
||||
"(program "
|
||||
"(expression_statement (number) (ERROR (number))) "
|
||||
"(expression_statement (math_op (number) (number))))");
|
||||
});
|
||||
assert_root_node(
|
||||
"(program "
|
||||
"(expression_statement (number) (ERROR (number))) "
|
||||
"(expression_statement (math_op (number) (number))))");
|
||||
});
|
||||
});
|
||||
|
||||
describe("replacing text", [&]() {
|
||||
it("does not try to re-use nodes that are within the edited region", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
describe("with external tokens", [&]() {
|
||||
it("maintains the external scanner's state during incremental parsing", [&]() {
|
||||
ts_document_set_language(document, get_test_language("python"));
|
||||
string text = dedent(R"PYTHON(
|
||||
if a:
|
||||
print b
|
||||
return c
|
||||
)PYTHON");
|
||||
|
||||
set_text("{ x: (b.c) };");
|
||||
set_text(text);
|
||||
assert_root_node("(module "
|
||||
"(if_statement (identifier) "
|
||||
"(print_statement (identifier))) "
|
||||
"(return_statement (expression_list (identifier))))");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (object (pair "
|
||||
"(identifier) (member_access (identifier) (identifier))))))");
|
||||
replace_text(text.find("return"), 0, " ");
|
||||
assert_root_node("(module "
|
||||
"(if_statement (identifier) "
|
||||
"(print_statement (identifier)) "
|
||||
"(return_statement (expression_list (identifier)))))");
|
||||
|
||||
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (object (pair "
|
||||
"(identifier) (member_access (identifier) (identifier))))))");
|
||||
undo();
|
||||
assert_root_node("(module "
|
||||
"(if_statement (identifier) "
|
||||
"(print_statement (identifier))) "
|
||||
"(return_statement (expression_list (identifier))))");
|
||||
});
|
||||
});
|
||||
|
||||
it("does not try to re-use nodes that are within the edited region", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("{ x: (b.c) };");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (object (pair "
|
||||
"(identifier) (member_access (identifier) (identifier))))))");
|
||||
|
||||
replace_text(strlen("{ x: "), strlen("(b.c)"), "b.c");
|
||||
|
||||
assert_root_node(
|
||||
"(program (expression_statement (object (pair "
|
||||
"(identifier) (member_access (identifier) (identifier))))))");
|
||||
});
|
||||
|
||||
it("updates the document's parse count", [&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
AssertThat(ts_document_parse_count(doc), Equals<size_t>(0));
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
AssertThat(ts_document_parse_count(document), Equals<size_t>(0));
|
||||
|
||||
set_text("{ x: (b.c) };");
|
||||
AssertThat(ts_document_parse_count(doc), Equals<size_t>(1));
|
||||
AssertThat(ts_document_parse_count(document), Equals<size_t>(1));
|
||||
|
||||
insert_text(strlen("{ x"), "yz");
|
||||
AssertThat(ts_document_parse_count(doc), Equals<size_t>(2));
|
||||
AssertThat(ts_document_parse_count(document), Equals<size_t>(2));
|
||||
});
|
||||
});
|
||||
|
||||
describe("lexing", [&]() {
|
||||
before_each([&]() {
|
||||
ts_document_set_language(doc, get_test_language("javascript"));
|
||||
});
|
||||
|
||||
describe("handling tokens containing wildcard patterns (e.g. comments)", [&]() {
|
||||
it("terminates them at the end of the document", [&]() {
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("x; // this is a comment");
|
||||
|
||||
assert_root_node(
|
||||
|
|
@ -437,6 +445,7 @@ describe("Parser", [&]() {
|
|||
|
||||
it("recognizes UTF8 characters as single characters", [&]() {
|
||||
// 'ΩΩΩ — ΔΔ';
|
||||
ts_document_set_language(document, get_test_language("javascript"));
|
||||
set_text("'\u03A9\u03A9\u03A9 \u2014 \u0394\u0394';");
|
||||
|
||||
assert_root_node(
|
||||
|
|
|
|||
|
|
@ -521,6 +521,31 @@ describe("Stack", [&]() {
|
|||
free_slice_array(&pop.slices);
|
||||
});
|
||||
});
|
||||
|
||||
describe("setting external token state", [&]() {
|
||||
TSExternalTokenState external_token_state1, external_token_state2;
|
||||
|
||||
it("allows the state to be retrieved", [&]() {
|
||||
AssertThat(ts_stack_external_token_state(stack, 0), Equals(nullptr));
|
||||
|
||||
ts_stack_set_external_token_state(stack, 0, &external_token_state1);
|
||||
AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
|
||||
|
||||
ts_stack_copy_version(stack, 0);
|
||||
AssertThat(ts_stack_external_token_state(stack, 0), Equals(&external_token_state1));
|
||||
});
|
||||
|
||||
it("does not merge stack versions with different external token states", [&]() {
|
||||
ts_stack_copy_version(stack, 0);
|
||||
ts_stack_push(stack, 0, trees[0], false, 5);
|
||||
ts_stack_push(stack, 1, trees[0], false, 5);
|
||||
|
||||
ts_stack_set_external_token_state(stack, 0, &external_token_state1);
|
||||
ts_stack_set_external_token_state(stack, 0, &external_token_state2);
|
||||
|
||||
AssertThat(ts_stack_merge(stack, 0, 1), IsFalse());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
|
|||
|
|
@ -22,47 +22,32 @@ void assert_consistent(const Tree *tree) {
|
|||
|
||||
START_TEST
|
||||
|
||||
enum {
|
||||
cat = 1,
|
||||
dog,
|
||||
eel,
|
||||
fox,
|
||||
goat,
|
||||
hog,
|
||||
};
|
||||
|
||||
describe("Tree", []() {
|
||||
Tree *tree1, *tree2, *parent1;
|
||||
enum {
|
||||
symbol1 = 1,
|
||||
symbol2,
|
||||
symbol3,
|
||||
symbol4,
|
||||
symbol5,
|
||||
symbol6,
|
||||
symbol7,
|
||||
symbol8,
|
||||
symbol9,
|
||||
};
|
||||
|
||||
TSSymbolMetadata visible = {true, true, false, true};
|
||||
TSSymbolMetadata invisible = {false, false, false, true};
|
||||
|
||||
before_each([&]() {
|
||||
tree1 = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
|
||||
tree2 = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
|
||||
|
||||
ts_tree_retain(tree1);
|
||||
ts_tree_retain(tree2);
|
||||
parent1 = ts_tree_make_node(dog, 2, tree_array({
|
||||
tree1,
|
||||
tree2,
|
||||
}), visible);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_tree_release(tree1);
|
||||
ts_tree_release(tree2);
|
||||
ts_tree_release(parent1);
|
||||
});
|
||||
|
||||
describe("make_leaf(sym, size, padding, is_hidden)", [&]() {
|
||||
it("does not record that it is fragile", [&]() {
|
||||
AssertThat(tree1->fragile_left, IsFalse());
|
||||
AssertThat(tree1->fragile_right, IsFalse());
|
||||
describe("make_leaf", [&]() {
|
||||
it("does not mark the tree as fragile", [&]() {
|
||||
Tree *tree = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
|
||||
AssertThat(tree->fragile_left, IsFalse());
|
||||
AssertThat(tree->fragile_right, IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
describe("make_error(size, padding, lookahead_char)", [&]() {
|
||||
it("records that it is fragile", [&]() {
|
||||
describe("make_error", [&]() {
|
||||
it("marks the tree as fragile", [&]() {
|
||||
Tree *error_tree = ts_tree_make_error(
|
||||
length_zero(),
|
||||
length_zero(),
|
||||
|
|
@ -75,15 +60,33 @@ describe("Tree", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("make_node(symbol, child_count, children, is_hidden)", [&]() {
|
||||
it("computes its size based on its child nodes", [&]() {
|
||||
AssertThat(parent1->size.bytes, Equals<size_t>(
|
||||
tree1->size.bytes + + tree2->padding.bytes + tree2->size.bytes));
|
||||
AssertThat(parent1->size.chars, Equals<size_t>(
|
||||
tree1->size.chars + + tree2->padding.chars + tree2->size.chars));
|
||||
describe("make_node", [&]() {
|
||||
Tree *tree1, *tree2, *parent1;
|
||||
|
||||
before_each([&]() {
|
||||
tree1 = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
|
||||
tree2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
|
||||
|
||||
ts_tree_retain(tree1);
|
||||
ts_tree_retain(tree2);
|
||||
parent1 = ts_tree_make_node(symbol3, 2, tree_array({
|
||||
tree1,
|
||||
tree2,
|
||||
}), visible);
|
||||
});
|
||||
|
||||
it("computes its padding based on its first child", [&]() {
|
||||
after_each([&]() {
|
||||
ts_tree_release(tree1);
|
||||
ts_tree_release(tree2);
|
||||
ts_tree_release(parent1);
|
||||
});
|
||||
|
||||
it("computes its size and padding based on its child nodes", [&]() {
|
||||
AssertThat(parent1->size.bytes, Equals<size_t>(
|
||||
tree1->size.bytes + tree2->padding.bytes + tree2->size.bytes));
|
||||
AssertThat(parent1->size.chars, Equals<size_t>(
|
||||
tree1->size.chars + tree2->padding.chars + tree2->size.chars));
|
||||
|
||||
AssertThat(parent1->padding.bytes, Equals<size_t>(tree1->padding.bytes));
|
||||
AssertThat(parent1->padding.chars, Equals<size_t>(tree1->padding.chars));
|
||||
});
|
||||
|
|
@ -97,7 +100,7 @@ describe("Tree", []() {
|
|||
|
||||
ts_tree_retain(tree1);
|
||||
ts_tree_retain(tree2);
|
||||
parent = ts_tree_make_node(eel, 2, tree_array({
|
||||
parent = ts_tree_make_node(symbol3, 2, tree_array({
|
||||
tree1,
|
||||
tree2,
|
||||
}), visible);
|
||||
|
|
@ -121,7 +124,7 @@ describe("Tree", []() {
|
|||
|
||||
ts_tree_retain(tree1);
|
||||
ts_tree_retain(tree2);
|
||||
parent = ts_tree_make_node(eel, 2, tree_array({
|
||||
parent = ts_tree_make_node(symbol3, 2, tree_array({
|
||||
tree1,
|
||||
tree2,
|
||||
}), visible);
|
||||
|
|
@ -145,7 +148,7 @@ describe("Tree", []() {
|
|||
|
||||
ts_tree_retain(tree1);
|
||||
ts_tree_retain(tree2);
|
||||
parent = ts_tree_make_node(eel, 2, tree_array({
|
||||
parent = ts_tree_make_node(symbol3, 2, tree_array({
|
||||
tree1,
|
||||
tree2,
|
||||
}), visible);
|
||||
|
|
@ -162,14 +165,14 @@ describe("Tree", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("edit(InputEdit)", [&]() {
|
||||
describe("edit", [&]() {
|
||||
Tree *tree = nullptr;
|
||||
|
||||
before_each([&]() {
|
||||
tree = ts_tree_make_node(cat, 3, tree_array({
|
||||
ts_tree_make_leaf(dog, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
ts_tree_make_leaf(eel, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
ts_tree_make_leaf(fox, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
tree = ts_tree_make_node(symbol1, 3, tree_array({
|
||||
ts_tree_make_leaf(symbol2, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
ts_tree_make_leaf(symbol3, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
ts_tree_make_leaf(symbol4, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
|
||||
}), visible);
|
||||
|
||||
AssertThat(tree->padding, Equals<Length>({2, 2, {0, 2}}));
|
||||
|
|
@ -180,7 +183,6 @@ describe("Tree", []() {
|
|||
ts_tree_release(tree);
|
||||
});
|
||||
|
||||
|
||||
describe("edits within a tree's padding", [&]() {
|
||||
it("resizes the padding of the tree and its leftmost descendants", [&]() {
|
||||
TSInputEdit edit;
|
||||
|
|
@ -312,69 +314,124 @@ describe("Tree", []() {
|
|||
});
|
||||
});
|
||||
|
||||
describe("equality", [&]() {
|
||||
describe("eq", [&]() {
|
||||
Tree *leaf;
|
||||
|
||||
before_each([&]() {
|
||||
leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, {5, 4, {0, 4}}, visible);
|
||||
});
|
||||
|
||||
after_each([&]() {
|
||||
ts_tree_release(leaf);
|
||||
});
|
||||
|
||||
it("returns true for identical trees", [&]() {
|
||||
Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
|
||||
AssertThat(ts_tree_eq(tree1, tree1_copy), IsTrue());
|
||||
Tree *leaf_copy = ts_tree_make_leaf(symbol1, {2, 1, {1, 1}}, {5, 4, {1, 4}}, visible);
|
||||
AssertThat(ts_tree_eq(leaf, leaf_copy), IsTrue());
|
||||
|
||||
Tree *tree2_copy = ts_tree_make_leaf(cat, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
|
||||
AssertThat(ts_tree_eq(tree2, tree2_copy), IsTrue());
|
||||
|
||||
Tree *parent2 = ts_tree_make_node(dog, 2, tree_array({
|
||||
tree1_copy,
|
||||
tree2_copy,
|
||||
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
|
||||
leaf,
|
||||
leaf_copy,
|
||||
}), visible);
|
||||
ts_tree_retain(leaf);
|
||||
ts_tree_retain(leaf_copy);
|
||||
|
||||
AssertThat(ts_tree_eq(parent1, parent2), IsTrue());
|
||||
Tree *parent_copy = ts_tree_make_node(symbol2, 2, tree_array({
|
||||
leaf,
|
||||
leaf_copy,
|
||||
}), visible);
|
||||
ts_tree_retain(leaf);
|
||||
ts_tree_retain(leaf_copy);
|
||||
|
||||
ts_tree_release(parent2);
|
||||
AssertThat(ts_tree_eq(parent, parent_copy), IsTrue());
|
||||
|
||||
ts_tree_release(leaf_copy);
|
||||
ts_tree_release(parent);
|
||||
ts_tree_release(parent_copy);
|
||||
});
|
||||
|
||||
it("returns false for trees with different symbols", [&]() {
|
||||
Tree *different_tree = ts_tree_make_leaf(
|
||||
tree1->symbol + 1,
|
||||
tree1->padding,
|
||||
tree1->size,
|
||||
Tree *different_leaf = ts_tree_make_leaf(
|
||||
leaf->symbol + 1,
|
||||
leaf->padding,
|
||||
leaf->size,
|
||||
visible);
|
||||
|
||||
AssertThat(ts_tree_eq(tree1, different_tree), IsFalse());
|
||||
ts_tree_release(different_tree);
|
||||
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
|
||||
ts_tree_release(different_leaf);
|
||||
});
|
||||
|
||||
it("returns false for trees with different options", [&]() {
|
||||
Tree *tree1_copy = ts_tree_make_leaf(cat, tree1->padding, tree1->size, invisible);
|
||||
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
|
||||
ts_tree_release(tree1_copy);
|
||||
Tree *different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, leaf->size, invisible);
|
||||
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
|
||||
ts_tree_release(different_leaf);
|
||||
});
|
||||
|
||||
it("returns false for trees with different sizes", [&]() {
|
||||
Tree *tree1_copy = ts_tree_make_leaf(cat, {2, 1, {0, 1}}, tree1->size, invisible);
|
||||
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
|
||||
ts_tree_release(tree1_copy);
|
||||
Tree *different_leaf = ts_tree_make_leaf(symbol1, {2, 1, {0, 1}}, leaf->size, invisible);
|
||||
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
|
||||
ts_tree_release(different_leaf);
|
||||
|
||||
tree1_copy = ts_tree_make_leaf(cat, tree1->padding, {5, 4, {1, 10}}, invisible);
|
||||
AssertThat(ts_tree_eq(tree1, tree1_copy), IsFalse());
|
||||
ts_tree_release(tree1_copy);
|
||||
different_leaf = ts_tree_make_leaf(symbol1, leaf->padding, {5, 4, {1, 10}}, invisible);
|
||||
AssertThat(ts_tree_eq(leaf, different_leaf), IsFalse());
|
||||
ts_tree_release(different_leaf);
|
||||
});
|
||||
|
||||
it("returns false for trees with different children", [&]() {
|
||||
Tree *different_tree = ts_tree_make_leaf(
|
||||
tree1->symbol + 1,
|
||||
tree1->padding,
|
||||
tree1->size,
|
||||
visible);
|
||||
Tree *leaf2 = ts_tree_make_leaf(symbol2, {1, 1, {0, 1}}, {3, 3, {0, 3}}, visible);
|
||||
|
||||
ts_tree_retain(different_tree);
|
||||
ts_tree_retain(tree2);
|
||||
Tree *different_parent = ts_tree_make_node(dog, 2, tree_array({
|
||||
different_tree, tree2,
|
||||
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
|
||||
leaf,
|
||||
leaf2,
|
||||
}), visible);
|
||||
ts_tree_retain(leaf);
|
||||
ts_tree_retain(leaf2);
|
||||
|
||||
Tree *different_parent = ts_tree_make_node(symbol2, 2, tree_array({
|
||||
leaf2,
|
||||
leaf,
|
||||
}), visible);
|
||||
ts_tree_retain(leaf2);
|
||||
ts_tree_retain(leaf);
|
||||
|
||||
AssertThat(ts_tree_eq(different_parent, parent), IsFalse());
|
||||
AssertThat(ts_tree_eq(parent, different_parent), IsFalse());
|
||||
|
||||
ts_tree_release(leaf2);
|
||||
ts_tree_release(parent);
|
||||
ts_tree_release(different_parent);
|
||||
});
|
||||
});
|
||||
|
||||
describe("last_external_token_state", [&]() {
|
||||
Length padding = {1, 1, {0, 1}};
|
||||
Length size = {2, 2, {0, 2}};
|
||||
|
||||
auto make_external = [](Tree *tree) {
|
||||
tree->has_external_tokens = true;
|
||||
tree->has_external_token_state = true;
|
||||
return tree;
|
||||
};
|
||||
|
||||
it("returns the last serialized external token state in the given tree", [&]() {
|
||||
Tree *tree1, *tree2, *tree3, *tree4, *tree5, *tree6, *tree7, *tree8, *tree9;
|
||||
|
||||
tree1 = ts_tree_make_node(symbol1, 2, tree_array({
|
||||
(tree2 = ts_tree_make_node(symbol2, 3, tree_array({
|
||||
(tree3 = make_external(ts_tree_make_leaf(symbol3, padding, size, visible))),
|
||||
(tree4 = ts_tree_make_leaf(symbol4, padding, size, visible)),
|
||||
(tree5 = ts_tree_make_leaf(symbol5, padding, size, visible)),
|
||||
}), visible)),
|
||||
(tree6 = ts_tree_make_node(symbol6, 2, tree_array({
|
||||
(tree7 = ts_tree_make_node(symbol7, 1, tree_array({
|
||||
(tree8 = ts_tree_make_leaf(symbol8, padding, size, visible)),
|
||||
}), visible)),
|
||||
(tree9 = ts_tree_make_leaf(symbol9, padding, size, visible)),
|
||||
}), visible)),
|
||||
}), visible);
|
||||
|
||||
AssertThat(ts_tree_eq(different_parent, parent1), IsFalse());
|
||||
AssertThat(ts_tree_eq(parent1, different_parent), IsFalse());
|
||||
|
||||
ts_tree_release(different_tree);
|
||||
ts_tree_release(different_parent);
|
||||
auto state = ts_tree_last_external_token_state(tree1);
|
||||
AssertThat(state, Equals(&tree3->external_token_state));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -64,7 +64,7 @@ class LexTableBuilder {
|
|||
private:
|
||||
void add_lex_state_for_parse_state(ParseState *parse_state) {
|
||||
parse_state->lex_state_id =
|
||||
add_lex_state(item_set_for_tokens(parse_state->expected_inputs()));
|
||||
add_lex_state(item_set_for_terminals(parse_state->terminal_entries));
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
|
|
@ -112,24 +112,27 @@ class LexTableBuilder {
|
|||
void mark_fragile_tokens() {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(entry.first);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(homonym)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_token()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol(homonym, Symbol::Terminal))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(entry.first);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(extension)) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol(extension, Symbol::Terminal))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -150,24 +153,27 @@ class LexTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
LexItemSet item_set_for_tokens(const set<Symbol> &symbols) {
|
||||
LexItemSet item_set_for_terminals(const map<Symbol, ParseTableEntry> &terminals) {
|
||||
LexItemSet result;
|
||||
for (const Symbol &symbol : symbols)
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol))
|
||||
for (const rule_ptr &separator_rule : separator_rules)
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
for (const auto &pair : terminals) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_token()) {
|
||||
for (const rule_ptr &rule : rules_for_symbol(symbol)) {
|
||||
for (const rule_ptr &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
symbol,
|
||||
Metadata::separator(
|
||||
Seq::build({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule) }))));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
vector<rule_ptr> rules_for_symbol(const rules::Symbol &symbol) {
|
||||
if (!symbol.is_token)
|
||||
return {};
|
||||
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return { CharacterSet().include(0).copy() };
|
||||
|
||||
|
|
|
|||
|
|
@ -52,7 +52,10 @@ class ParseTableBuilder {
|
|||
allow_any_conflict(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = Symbol(0, grammar.variables.empty());
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol(0, Symbol::Terminal) :
|
||||
Symbol(0, Symbol::NonTerminal);
|
||||
|
||||
Production start_production({
|
||||
ProductionStep(start_symbol, 0, rules::AssociativityNone),
|
||||
});
|
||||
|
|
@ -63,7 +66,7 @@ class ParseTableBuilder {
|
|||
add_parse_state(ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ END_OF_INPUT().index }),
|
||||
LookaheadSet({ END_OF_INPUT() }),
|
||||
},
|
||||
}));
|
||||
|
||||
|
|
@ -107,21 +110,25 @@ class ParseTableBuilder {
|
|||
void build_error_parse_state() {
|
||||
ParseState error_state;
|
||||
|
||||
for (const Symbol::Index index : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(index, true));
|
||||
for (const Symbol symbol : parse_table.mergeable_symbols) {
|
||||
add_out_of_context_parse_state(&error_state, symbol);
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.extra_tokens) {
|
||||
if (!error_state.terminal_entries.count(symbol.index)) {
|
||||
error_state.terminal_entries[symbol.index].actions.push_back(ParseAction::ShiftExtra());
|
||||
if (!error_state.terminal_entries.count(symbol)) {
|
||||
error_state.terminal_entries[symbol].actions.push_back(ParseAction::ShiftExtra());
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, false));
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::External));
|
||||
}
|
||||
|
||||
error_state.terminal_entries[END_OF_INPUT().index].actions.push_back(ParseAction::Recover(0));
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
add_out_of_context_parse_state(&error_state, Symbol(i, Symbol::NonTerminal));
|
||||
}
|
||||
|
||||
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
|
||||
parse_table.states[0] = error_state;
|
||||
}
|
||||
|
||||
|
|
@ -130,10 +137,10 @@ class ParseTableBuilder {
|
|||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
if (symbol.is_token) {
|
||||
error_state->terminal_entries[symbol.index].actions.assign({ ParseAction::Recover(state) });
|
||||
} else {
|
||||
if (symbol.is_non_terminal()) {
|
||||
error_state->nonterminal_entries[symbol.index] = state;
|
||||
} else {
|
||||
error_state->terminal_entries[symbol].actions.assign({ ParseAction::Recover(state) });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -152,9 +159,9 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
map<Symbol::Index, ParseItemSet> terminal_successors;
|
||||
map<Symbol, ParseItemSet> terminal_successors;
|
||||
map<Symbol::Index, ParseItemSet> nonterminal_successors;
|
||||
set<Symbol::Index> lookaheads_with_conflicts;
|
||||
set<Symbol> lookaheads_with_conflicts;
|
||||
|
||||
for (const auto &pair : item_set.entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
|
|
@ -168,7 +175,7 @@ class ParseTableBuilder {
|
|||
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
|
||||
|
||||
int precedence = item.precedence();
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
for (Symbol lookahead : *lookahead_symbols.entries) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
|
||||
// Only add the highest-precedence Reduce actions to the parse table.
|
||||
|
|
@ -203,10 +210,10 @@ class ParseTableBuilder {
|
|||
Symbol symbol = item.production->at(item.step_index).symbol;
|
||||
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
|
||||
|
||||
if (symbol.is_token) {
|
||||
terminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
if (symbol.is_non_terminal()) {
|
||||
nonterminal_successors[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
terminal_successors[symbol].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -214,7 +221,7 @@ class ParseTableBuilder {
|
|||
// Add a Shift action for each possible successor state. Shift actions for
|
||||
// terminal lookaheads can conflict with Reduce actions added previously.
|
||||
for (auto &pair : terminal_successors) {
|
||||
Symbol::Index lookahead = pair.first;
|
||||
Symbol lookahead = pair.first;
|
||||
ParseItemSet &next_item_set = pair.second;
|
||||
ParseStateId next_state_id = add_parse_state(next_item_set);
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
|
|
@ -223,7 +230,7 @@ class ParseTableBuilder {
|
|||
if (!allow_any_conflict) {
|
||||
if (had_existing_action)
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
recovery_states[Symbol(lookahead, true)].add(next_item_set);
|
||||
recovery_states[lookahead].add(next_item_set);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -234,10 +241,10 @@ class ParseTableBuilder {
|
|||
ParseStateId next_state = add_parse_state(next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
|
||||
if (!allow_any_conflict)
|
||||
recovery_states[Symbol(lookahead, false)].add(next_item_set);
|
||||
recovery_states[Symbol(lookahead, Symbol::NonTerminal)].add(next_item_set);
|
||||
}
|
||||
|
||||
for (Symbol::Index lookahead : lookaheads_with_conflicts) {
|
||||
for (Symbol lookahead : lookaheads_with_conflicts) {
|
||||
string conflict = handle_conflict(item_set, state_id, lookahead);
|
||||
if (!conflict.empty()) return conflict;
|
||||
}
|
||||
|
|
@ -245,9 +252,9 @@ class ParseTableBuilder {
|
|||
ParseAction shift_extra = ParseAction::ShiftExtra();
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens) {
|
||||
if (!state.terminal_entries.count(extra_symbol.index) ||
|
||||
if (!state.terminal_entries.count(extra_symbol) ||
|
||||
state.has_shift_action() || allow_any_conflict) {
|
||||
parse_table.add_terminal_action(state_id, extra_symbol.index, shift_extra);
|
||||
parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -257,7 +264,6 @@ class ParseTableBuilder {
|
|||
void mark_fragile_actions() {
|
||||
for (ParseState &state : parse_table.states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
const Symbol symbol(entry.first, true);
|
||||
auto &actions = entry.second.actions;
|
||||
|
||||
for (ParseAction &action : actions) {
|
||||
|
|
@ -359,7 +365,7 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol::Index lookahead) {
|
||||
Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
int reduction_precedence = entry.actions.front().precedence();
|
||||
set<ParseItem> shift_items;
|
||||
|
|
@ -468,7 +474,7 @@ class ParseTableBuilder {
|
|||
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
|
||||
}
|
||||
|
||||
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
|
||||
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
|
||||
description += "\n\n";
|
||||
|
||||
description += "Possible interpretations:\n\n";
|
||||
|
|
@ -487,7 +493,7 @@ class ParseTableBuilder {
|
|||
description += " " + symbol_name(step.symbol);
|
||||
}
|
||||
description += ")";
|
||||
description += " \u2022 " + symbol_name(Symbol(lookahead, true)) + " \u2026";
|
||||
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
|
||||
description += "\n";
|
||||
}
|
||||
}
|
||||
|
|
@ -564,14 +570,23 @@ class ParseTableBuilder {
|
|||
return "END_OF_INPUT";
|
||||
else
|
||||
return "";
|
||||
} else if (symbol.is_token) {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed)
|
||||
return variable.name;
|
||||
else
|
||||
return "'" + variable.name + "'";
|
||||
} else {
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
|
||||
switch (symbol.type) {
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed)
|
||||
return variable.name;
|
||||
else
|
||||
return "'" + variable.name + "'";
|
||||
}
|
||||
case Symbol::NonTerminal: {
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
case Symbol::External:
|
||||
default: {
|
||||
return grammar.external_tokens[symbol.index].name;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,8 +12,8 @@ using rules::Symbol;
|
|||
|
||||
LookaheadSet::LookaheadSet() : entries(nullptr) {}
|
||||
|
||||
LookaheadSet::LookaheadSet(const set<Symbol::Index> &symbols)
|
||||
: entries(make_shared<set<Symbol::Index>>(symbols)) {}
|
||||
LookaheadSet::LookaheadSet(const set<Symbol> &symbols)
|
||||
: entries(make_shared<set<Symbol>>(symbols)) {}
|
||||
|
||||
bool LookaheadSet::empty() const {
|
||||
return !entries.get() || entries->empty();
|
||||
|
|
@ -23,7 +23,7 @@ bool LookaheadSet::operator==(const LookaheadSet &other) const {
|
|||
return *entries == *other.entries;
|
||||
}
|
||||
|
||||
bool LookaheadSet::contains(const Symbol::Index &symbol) const {
|
||||
bool LookaheadSet::contains(const Symbol &symbol) const {
|
||||
return entries->find(symbol) != entries->end();
|
||||
}
|
||||
|
||||
|
|
@ -31,15 +31,15 @@ bool LookaheadSet::insert_all(const LookaheadSet &other) {
|
|||
if (!other.entries.get())
|
||||
return false;
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
entries = make_shared<set<Symbol>>();
|
||||
size_t previous_size = entries->size();
|
||||
entries->insert(other.entries->begin(), other.entries->end());
|
||||
return entries->size() > previous_size;
|
||||
}
|
||||
|
||||
bool LookaheadSet::insert(const Symbol::Index &symbol) {
|
||||
bool LookaheadSet::insert(const Symbol &symbol) {
|
||||
if (!entries.get())
|
||||
entries = make_shared<set<Symbol::Index>>();
|
||||
entries = make_shared<set<Symbol>>();
|
||||
return entries->insert(symbol).second;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -11,15 +11,15 @@ namespace build_tables {
|
|||
class LookaheadSet {
|
||||
public:
|
||||
LookaheadSet();
|
||||
explicit LookaheadSet(const std::set<rules::Symbol::Index> &);
|
||||
explicit LookaheadSet(const std::set<rules::Symbol> &);
|
||||
|
||||
bool empty() const;
|
||||
bool operator==(const LookaheadSet &) const;
|
||||
bool contains(const rules::Symbol::Index &) const;
|
||||
bool contains(const rules::Symbol &) const;
|
||||
bool insert_all(const LookaheadSet &);
|
||||
bool insert(const rules::Symbol::Index &);
|
||||
bool insert(const rules::Symbol &);
|
||||
|
||||
std::shared_ptr<std::set<rules::Symbol::Index>> entries;
|
||||
std::shared_ptr<std::set<rules::Symbol>> entries;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ bool ParseItem::operator<(const ParseItem &other) const {
|
|||
}
|
||||
|
||||
Symbol ParseItem::lhs() const {
|
||||
return Symbol(variable_index);
|
||||
return Symbol(variable_index, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
bool ParseItem::is_done() const {
|
||||
|
|
@ -105,38 +105,6 @@ size_t ParseItemSet::unfinished_item_signature() const {
|
|||
return result;
|
||||
}
|
||||
|
||||
ParseItemSet::ActionMap ParseItemSet::actions() const {
|
||||
ParseItemSet::ActionMap result;
|
||||
|
||||
for (const auto &pair : entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
const LookaheadSet &lookahead_symbols = pair.second;
|
||||
|
||||
if (item.step_index == item.production->size()) {
|
||||
int precedence = item.precedence();
|
||||
for (const Symbol::Index lookahead : *lookahead_symbols.entries) {
|
||||
Action &action = result.terminal_actions[lookahead];
|
||||
if (precedence > action.completion_precedence) {
|
||||
action.completions.assign({ &item });
|
||||
} else if (precedence == action.completion_precedence) {
|
||||
action.completions.push_back({ &item });
|
||||
}
|
||||
}
|
||||
} else {
|
||||
Symbol symbol = item.production->at(item.step_index).symbol;
|
||||
ParseItem new_item(item.lhs(), *item.production, item.step_index + 1);
|
||||
|
||||
if (symbol.is_token) {
|
||||
result.terminal_actions[symbol.index].continuation.entries[new_item] = lookahead_symbols;
|
||||
} else {
|
||||
result.nonterminal_continuations[symbol.index].entries[new_item] = lookahead_symbols;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void ParseItemSet::add(const ParseItemSet &other) {
|
||||
for (const auto &pair : other.entries)
|
||||
entries[pair.first].insert_all(pair.second);
|
||||
|
|
|
|||
|
|
@ -41,16 +41,6 @@ class ParseItemSet {
|
|||
ParseItemSet();
|
||||
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
struct Completion;
|
||||
struct Action;
|
||||
|
||||
struct ActionMap {
|
||||
std::map<rules::Symbol::Index, Action> terminal_actions;
|
||||
std::map<rules::Symbol::Index, ParseItemSet> nonterminal_continuations;
|
||||
};
|
||||
|
||||
ActionMap actions() const;
|
||||
|
||||
bool operator==(const ParseItemSet &) const;
|
||||
void add(const ParseItemSet &);
|
||||
size_t unfinished_item_signature() const;
|
||||
|
|
@ -58,22 +48,6 @@ class ParseItemSet {
|
|||
std::map<ParseItem, LookaheadSet> entries;
|
||||
};
|
||||
|
||||
struct ParseItemSet::Completion {
|
||||
const ParseItem *item;
|
||||
int precedence;
|
||||
rules::Associativity associativity;
|
||||
|
||||
bool operator<(const ParseItemSet::Completion &other) {
|
||||
return precedence < other.precedence;
|
||||
}
|
||||
};
|
||||
|
||||
struct ParseItemSet::Action {
|
||||
ParseItemSet continuation;
|
||||
std::vector<const ParseItem *> completions;
|
||||
int completion_precedence;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,17 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
set<Symbol::Index> processed_non_terminals;
|
||||
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i, true);
|
||||
first_sets.insert({symbol, LookaheadSet({ static_cast<Symbol::Index>(i) })});
|
||||
Symbol symbol(i, Symbol::Terminal);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
|
||||
Symbol symbol(i, Symbol::External);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
LookaheadSet first_set;
|
||||
|
||||
processed_non_terminals.clear();
|
||||
|
|
@ -42,10 +47,10 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (current_symbol.is_token) {
|
||||
first_set.insert(current_symbol.index);
|
||||
if (!current_symbol.is_non_terminal()) {
|
||||
first_set.insert(current_symbol);
|
||||
} else if (processed_non_terminals.insert(current_symbol.index).second) {
|
||||
for (const Production &production : grammar.productions(current_symbol)) {
|
||||
for (const Production &production : grammar.variables[current_symbol.index].productions) {
|
||||
if (!production.empty()) {
|
||||
symbols_to_process.push_back(production[0].symbol);
|
||||
}
|
||||
|
|
@ -59,11 +64,11 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
vector<ParseItemSetComponent> components_to_process;
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol(i);
|
||||
Symbol symbol(i, Symbol::NonTerminal);
|
||||
map<ParseItem, pair<LookaheadSet, bool>> cache_entry;
|
||||
|
||||
components_to_process.clear();
|
||||
for (const Production &production : grammar.productions(symbol)) {
|
||||
for (const Production &production : grammar.variables[i].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(symbol, production, 0),
|
||||
LookaheadSet(),
|
||||
|
|
@ -87,7 +92,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
|
||||
if (component_is_new) {
|
||||
Symbol next_symbol = item.next_symbol();
|
||||
if (next_symbol.is_built_in() || next_symbol.is_token)
|
||||
if (!next_symbol.is_non_terminal() || next_symbol.is_built_in())
|
||||
continue;
|
||||
|
||||
LookaheadSet next_lookaheads;
|
||||
|
|
@ -102,7 +107,7 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
propagates_lookaheads = false;
|
||||
}
|
||||
|
||||
for (const Production &production : grammar.productions(next_symbol)) {
|
||||
for (const Production &production : grammar.variables[next_symbol.index].productions) {
|
||||
components_to_process.push_back(ParseItemSetComponent{
|
||||
ParseItem(next_symbol, production, 0),
|
||||
next_lookaheads,
|
||||
|
|
@ -130,7 +135,7 @@ void ParseItemSetBuilder::apply_transitive_closure(ParseItemSet *item_set) {
|
|||
const LookaheadSet &lookaheads = pair.second;
|
||||
|
||||
const Symbol &next_symbol = item.next_symbol();
|
||||
if (!next_symbol.is_token && !next_symbol.is_built_in()) {
|
||||
if (next_symbol.is_non_terminal() && !next_symbol.is_built_in()) {
|
||||
LookaheadSet next_lookaheads;
|
||||
size_t next_step = item.step_index + 1;
|
||||
if (next_step == item.production->size()) {
|
||||
|
|
|
|||
|
|
@ -47,8 +47,8 @@ class FirstCharacters : public CharacterAggregator<true, false> {};
|
|||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol::Index> result;
|
||||
set<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
set<Symbol> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
|
|
@ -79,7 +79,7 @@ set<Symbol::Index> recovery_tokens(const LexicalGrammar &grammar) {
|
|||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.insert(i);
|
||||
result.insert(Symbol(i, Symbol::Terminal));
|
||||
}
|
||||
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ struct LexicalGrammar;
|
|||
|
||||
namespace build_tables {
|
||||
|
||||
std::set<rules::Symbol::Index> recovery_tokens(const LexicalGrammar &);
|
||||
std::set<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -11,9 +11,11 @@
|
|||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/util/string_helpers.h"
|
||||
#include "tree_sitter/runtime.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace generate_code {
|
||||
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
|
|
@ -22,6 +24,7 @@ using std::string;
|
|||
using std::to_string;
|
||||
using std::vector;
|
||||
using util::escape_char;
|
||||
using rules::Symbol;
|
||||
|
||||
static Variable EOF_ENTRY("end", VariableTypeNamed, rule_ptr());
|
||||
|
||||
|
|
@ -73,9 +76,8 @@ class CCodeGenerator {
|
|||
const LexicalGrammar lexical_grammar;
|
||||
map<string, string> sanitized_names;
|
||||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<pair<size_t, set<rules::Symbol>>> in_progress_symbols;
|
||||
vector<set<Symbol::Index>> external_scanner_states;
|
||||
size_t next_parse_action_list_index;
|
||||
size_t next_in_progress_symbol_list_index;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name, const ParseTable &parse_table,
|
||||
|
|
@ -87,19 +89,26 @@ class CCodeGenerator {
|
|||
lex_table(lex_table),
|
||||
syntax_grammar(syntax_grammar),
|
||||
lexical_grammar(lexical_grammar),
|
||||
next_parse_action_list_index(0),
|
||||
next_in_progress_symbol_list_index(0) {}
|
||||
next_parse_action_list_index(0) {}
|
||||
|
||||
string code() {
|
||||
buffer = "";
|
||||
|
||||
add_includes();
|
||||
add_state_and_symbol_counts();
|
||||
add_warning_pragma();
|
||||
add_stats();
|
||||
add_symbol_enum();
|
||||
add_symbol_names_list();
|
||||
add_symbol_node_types_list();
|
||||
add_symbol_metadata_list();
|
||||
add_lex_function();
|
||||
add_lex_states_list();
|
||||
add_lex_modes_list();
|
||||
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
add_external_token_enum();
|
||||
add_external_scanner_symbol_map();
|
||||
add_external_scanner_states_list();
|
||||
}
|
||||
|
||||
add_parse_table();
|
||||
add_parser_export();
|
||||
|
||||
|
|
@ -112,10 +121,25 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_state_and_symbol_counts() {
|
||||
void add_warning_pragma() {
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_stats() {
|
||||
size_t token_count = 1 + lexical_grammar.variables.size();
|
||||
for (const ExternalToken &external_token : syntax_grammar.external_tokens) {
|
||||
if (external_token.corresponding_internal_token == rules::NONE()) {
|
||||
token_count++;
|
||||
}
|
||||
}
|
||||
|
||||
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line("#define TOKEN_COUNT " + to_string(lexical_grammar.variables.size() + 1));
|
||||
line("#define TOKEN_COUNT " + to_string(token_count));
|
||||
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -124,7 +148,7 @@ class CCodeGenerator {
|
|||
indent([&]() {
|
||||
size_t i = 1;
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
const Symbol &symbol = entry.first;
|
||||
if (!symbol.is_built_in()) {
|
||||
line(symbol_id(symbol) + " = " + to_string(i) + ",");
|
||||
i++;
|
||||
|
|
@ -146,11 +170,11 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_symbol_node_types_list() {
|
||||
void add_symbol_metadata_list() {
|
||||
line("static const TSSymbolMetadata ts_symbol_metadata[SYMBOL_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
const rules::Symbol &symbol = entry.first;
|
||||
const Symbol &symbol = entry.first;
|
||||
line("[" + symbol_id(symbol) + "] = {");
|
||||
indent([&]() {
|
||||
switch (symbol_type(symbol)) {
|
||||
|
|
@ -198,13 +222,102 @@ class CCodeGenerator {
|
|||
line();
|
||||
}
|
||||
|
||||
void add_lex_states_list() {
|
||||
line("static TSStateId ts_lex_states[STATE_COUNT] = {");
|
||||
void add_lex_modes_list() {
|
||||
add_external_scanner_state({});
|
||||
|
||||
map<Symbol::Index, Symbol::Index> external_tokens_by_corresponding_internal_token;
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
for (size_t j = 0; j < syntax_grammar.external_tokens.size(); j++) {
|
||||
const ExternalToken &external_token = syntax_grammar.external_tokens[j];
|
||||
if (external_token.corresponding_internal_token.index == Symbol::Index(i)) {
|
||||
external_tokens_by_corresponding_internal_token.insert({i, j});
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
line("static TSLexMode ts_lex_modes[STATE_COUNT] = {");
|
||||
indent([&]() {
|
||||
size_t state_id = 0;
|
||||
for (const auto &state : parse_table.states)
|
||||
line("[" + to_string(state_id++) + "] = " +
|
||||
to_string(state.lex_state_id) + ",");
|
||||
|
||||
for (const auto &state : parse_table.states) {
|
||||
line("[" + to_string(state_id++) + "] = {.lex_state = ");
|
||||
add(to_string(state.lex_state_id));
|
||||
|
||||
bool needs_external_scanner = false;
|
||||
set<Symbol::Index> external_token_indices;
|
||||
for (const auto &pair : state.terminal_entries) {
|
||||
Symbol symbol = pair.first;
|
||||
if (symbol.is_external()) {
|
||||
needs_external_scanner = true;
|
||||
external_token_indices.insert(symbol.index);
|
||||
} else if (symbol.is_token()) {
|
||||
auto corresponding_external_token =
|
||||
external_tokens_by_corresponding_internal_token.find(symbol.index);
|
||||
if (corresponding_external_token != external_tokens_by_corresponding_internal_token.end()) {
|
||||
external_token_indices.insert(corresponding_external_token->second);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (needs_external_scanner) {
|
||||
add(", .external_lex_state = " + add_external_scanner_state(external_token_indices));
|
||||
}
|
||||
|
||||
add("},");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
string add_external_scanner_state(set<Symbol::Index> external_token_ids) {
|
||||
for (size_t i = 0, n = external_scanner_states.size(); i < n; i++)
|
||||
if (external_scanner_states[i] == external_token_ids)
|
||||
return to_string(i);
|
||||
external_scanner_states.push_back(external_token_ids);
|
||||
return to_string(external_scanner_states.size() - 1);
|
||||
}
|
||||
|
||||
void add_external_token_enum() {
|
||||
line("enum {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++)
|
||||
line(external_token_id(i) + ",");
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_external_scanner_symbol_map() {
|
||||
line("TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
for (size_t i = 0; i < syntax_grammar.external_tokens.size(); i++) {
|
||||
line("[" + external_token_id(i) + "] = " + symbol_id(Symbol(i, Symbol::External)) + ",");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_external_scanner_states_list() {
|
||||
line("static bool ts_external_scanner_states[");
|
||||
add(to_string(external_scanner_states.size()));
|
||||
add("][EXTERNAL_TOKEN_COUNT] = {");
|
||||
indent([&]() {
|
||||
size_t i = 0;
|
||||
for (const auto &valid_external_lookaheads : external_scanner_states) {
|
||||
if (!valid_external_lookaheads.empty()) {
|
||||
line("[" + to_string(i) + "] = {");
|
||||
indent([&]() {
|
||||
for (Symbol::Index id : valid_external_lookaheads) {
|
||||
line("[" + external_token_id(id) + "] = true,");
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
}
|
||||
i++;
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
|
|
@ -214,9 +327,6 @@ class CCodeGenerator {
|
|||
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("#pragma GCC diagnostic push");
|
||||
line("#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\"");
|
||||
line();
|
||||
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
|
||||
|
||||
indent([&]() {
|
||||
|
|
@ -224,12 +334,12 @@ class CCodeGenerator {
|
|||
line("[" + to_string(state_id++) + "] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : state.nonterminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first)) + "] = STATE(");
|
||||
line("[" + symbol_id(Symbol(entry.first, Symbol::NonTerminal)) + "] = STATE(");
|
||||
add(to_string(entry.second));
|
||||
add("),");
|
||||
}
|
||||
for (const auto &entry : state.terminal_entries) {
|
||||
line("[" + symbol_id(rules::Symbol(entry.first, true)) + "] = ACTIONS(");
|
||||
line("[" + symbol_id(entry.first) + "] = ACTIONS(");
|
||||
add(to_string(add_parse_action_list_id(entry.second)));
|
||||
add("),");
|
||||
}
|
||||
|
|
@ -242,12 +352,42 @@ class CCodeGenerator {
|
|||
line();
|
||||
add_parse_action_list();
|
||||
line();
|
||||
line("#pragma GCC diagnostic pop");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_parser_export() {
|
||||
line("EXPORT_LANGUAGE(ts_language_" + name + ");");
|
||||
string language_function_name = "tree_sitter_" + name;
|
||||
string external_scanner_name = language_function_name + "_external_scanner";
|
||||
|
||||
if (!syntax_grammar.external_tokens.empty()) {
|
||||
line("void *" + external_scanner_name + "_create();");
|
||||
line("void " + external_scanner_name + "_destroy();");
|
||||
line("void " + external_scanner_name + "_reset(void *);");
|
||||
line("bool " + external_scanner_name + "_scan(void *, TSLexer *, const bool *);");
|
||||
line("bool " + external_scanner_name + "_serialize(void *, TSExternalTokenState);");
|
||||
line("void " + external_scanner_name + "_deserialize(void *, const TSExternalTokenState);");
|
||||
line();
|
||||
}
|
||||
|
||||
line("const TSLanguage *" + language_function_name + "() {");
|
||||
indent([&]() {
|
||||
line("GET_LANGUAGE(");
|
||||
if (syntax_grammar.external_tokens.empty()) {
|
||||
add(");");
|
||||
} else {
|
||||
indent([&]() {
|
||||
line("(const bool *)ts_external_scanner_states,");
|
||||
line("ts_external_scanner_symbol_map,");
|
||||
line(external_scanner_name + "_create,");
|
||||
line(external_scanner_name + "_destroy,");
|
||||
line(external_scanner_name + "_reset,");
|
||||
line(external_scanner_name + "_scan,");
|
||||
line(external_scanner_name + "_serialize,");
|
||||
line(external_scanner_name + "_deserialize,");
|
||||
});
|
||||
line(");");
|
||||
}
|
||||
});
|
||||
line("}");
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -379,22 +519,13 @@ class CCodeGenerator {
|
|||
return result;
|
||||
}
|
||||
|
||||
size_t add_in_progress_symbol_list_id(const set<rules::Symbol> &symbols) {
|
||||
for (const auto &pair : in_progress_symbols) {
|
||||
if (pair.second == symbols) {
|
||||
return pair.first;
|
||||
}
|
||||
}
|
||||
|
||||
size_t result = next_in_progress_symbol_list_index;
|
||||
in_progress_symbols.push_back({ result, symbols });
|
||||
next_in_progress_symbol_list_index += 1 + symbols.size();
|
||||
return result;
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
string symbol_id(const rules::Symbol &symbol) {
|
||||
string external_token_id(Symbol::Index index) {
|
||||
return "ts_external_token_" + syntax_grammar.external_tokens[index].name;
|
||||
}
|
||||
|
||||
string symbol_id(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "ts_builtin_sym_end";
|
||||
|
||||
|
|
@ -411,25 +542,33 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
string symbol_name(const rules::Symbol &symbol) {
|
||||
string symbol_name(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "END";
|
||||
return entry_for_symbol(symbol).first;
|
||||
}
|
||||
|
||||
VariableType symbol_type(const rules::Symbol &symbol) {
|
||||
VariableType symbol_type(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return VariableTypeHidden;
|
||||
return entry_for_symbol(symbol).second;
|
||||
}
|
||||
|
||||
pair<string, VariableType> entry_for_symbol(const rules::Symbol &symbol) {
|
||||
if (symbol.is_token) {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
} else {
|
||||
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
pair<string, VariableType> entry_for_symbol(const Symbol &symbol) {
|
||||
switch (symbol.type) {
|
||||
case Symbol::NonTerminal: {
|
||||
const SyntaxVariable &variable = syntax_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::Terminal: {
|
||||
const Variable &variable = lexical_grammar.variables[symbol.index];
|
||||
return { variable.name, variable.type };
|
||||
}
|
||||
case Symbol::External:
|
||||
default: {
|
||||
const ExternalToken &token = syntax_grammar.external_tokens[symbol.index];
|
||||
return { token.name, token.type };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ struct Grammar {
|
|||
std::vector<std::pair<std::string, rule_ptr>> rules;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::vector<std::vector<std::string>> expected_conflicts;
|
||||
std::vector<std::string> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -210,7 +210,7 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
string error_message;
|
||||
string name;
|
||||
Grammar grammar;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json;
|
||||
json_value name_json, rules_json, extras_json, conflicts_json, external_tokens_json;
|
||||
|
||||
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
|
||||
char parse_error[json_error_max];
|
||||
|
|
@ -302,6 +302,25 @@ ParseGrammarResult parse_grammar(const string &input) {
|
|||
}
|
||||
}
|
||||
|
||||
external_tokens_json = grammar_json->operator[]("externals");
|
||||
if (external_tokens_json.type != json_none) {
|
||||
if (external_tokens_json.type != json_array) {
|
||||
error_message = "External tokens must be an array";
|
||||
goto error;
|
||||
}
|
||||
|
||||
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
|
||||
json_value *token_name_json = external_tokens_json.u.array.values[i];
|
||||
if (token_name_json->type != json_string) {
|
||||
error_message = "External token values must be strings";
|
||||
goto error;
|
||||
}
|
||||
|
||||
string token_name = token_name_json->u.string.ptr;
|
||||
grammar.external_tokens.push_back(token_name);
|
||||
}
|
||||
}
|
||||
|
||||
json_value_free(grammar_json);
|
||||
return { name, grammar, "" };
|
||||
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/parse_table.h"
|
||||
#include <string>
|
||||
#include "compiler/precedence_range.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
|
|
@ -28,7 +29,7 @@ ParseAction::ParseAction()
|
|||
extra(false),
|
||||
fragile(false),
|
||||
state_index(-1),
|
||||
symbol(Symbol(-1)),
|
||||
symbol(rules::NONE()),
|
||||
consumed_symbol_count(0),
|
||||
production(nullptr) {}
|
||||
|
||||
|
|
@ -43,11 +44,11 @@ ParseAction ParseAction::Accept() {
|
|||
}
|
||||
|
||||
ParseAction ParseAction::Shift(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, Symbol(-1), 0, nullptr);
|
||||
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Recover(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeRecover, state_index, Symbol(-1), 0,
|
||||
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
|
||||
nullptr);
|
||||
}
|
||||
|
||||
|
|
@ -150,9 +151,7 @@ bool ParseState::has_shift_action() const {
|
|||
set<Symbol> ParseState::expected_inputs() const {
|
||||
set<Symbol> result;
|
||||
for (auto &entry : terminal_entries)
|
||||
result.insert(Symbol(entry.first, true));
|
||||
for (auto &entry : nonterminal_entries)
|
||||
result.insert(Symbol(entry.first, false));
|
||||
result.insert(entry.first);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -182,33 +181,24 @@ ParseStateId ParseTable::add_state() {
|
|||
return states.size() - 1;
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::set_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
ParseAction action) {
|
||||
states[state_id].terminal_entries[index].actions.clear();
|
||||
return add_terminal_action(state_id, index, action);
|
||||
}
|
||||
|
||||
ParseAction &ParseTable::add_terminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
Symbol lookahead,
|
||||
ParseAction action) {
|
||||
Symbol symbol(index, true);
|
||||
if (action.type == ParseActionTypeShift && action.extra)
|
||||
symbols[symbol].extra = true;
|
||||
symbols[lookahead].extra = true;
|
||||
else
|
||||
symbols[symbol].structural = true;
|
||||
symbols[lookahead].structural = true;
|
||||
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[index];
|
||||
ParseTableEntry &entry = states[state_id].terminal_entries[lookahead];
|
||||
entry.actions.push_back(action);
|
||||
return *entry.actions.rbegin();
|
||||
}
|
||||
|
||||
void ParseTable::set_nonterminal_action(ParseStateId state_id,
|
||||
Symbol::Index index,
|
||||
Symbol::Index lookahead,
|
||||
ParseStateId next_state_id) {
|
||||
Symbol symbol(index, false);
|
||||
symbols[symbol].structural = true;
|
||||
states[state_id].nonterminal_entries[index] = next_state_id;
|
||||
symbols[Symbol(lookahead, Symbol::NonTerminal)].structural = true;
|
||||
states[state_id].nonterminal_entries[lookahead] = next_state_id;
|
||||
}
|
||||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
|
|
@ -226,12 +216,12 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
return false;
|
||||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(index);
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
|
|
@ -242,25 +232,25 @@ bool ParseTable::merge_state(size_t i, size_t j) {
|
|||
}
|
||||
}
|
||||
|
||||
set<Symbol::Index> symbols_to_merge;
|
||||
set<Symbol> symbols_to_merge;
|
||||
|
||||
for (auto &entry : other.terminal_entries) {
|
||||
Symbol::Index index = entry.first;
|
||||
Symbol lookahead = entry.first;
|
||||
const vector<ParseAction> &actions = entry.second.actions;
|
||||
|
||||
if (!state.terminal_entries.count(index)) {
|
||||
if (mergeable_symbols.count(index) == 0 && !Symbol::is_built_in(index))
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (mergeable_symbols.count(lookahead) == 0 && !lookahead.is_built_in())
|
||||
return false;
|
||||
if (actions.back().type != ParseActionTypeReduce)
|
||||
return false;
|
||||
if (!has_entry(state, entry.second))
|
||||
return false;
|
||||
symbols_to_merge.insert(index);
|
||||
symbols_to_merge.insert(lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol::Index &index : symbols_to_merge)
|
||||
state.terminal_entries[index] = other.terminal_entries.find(index)->second;
|
||||
for (const Symbol &lookahead : symbols_to_merge)
|
||||
state.terminal_entries[lookahead] = other.terminal_entries.find(lookahead)->second;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -76,7 +76,7 @@ class ParseState {
|
|||
void each_referenced_state(std::function<void(ParseStateId *)>);
|
||||
bool has_shift_action() const;
|
||||
|
||||
std::map<rules::Symbol::Index, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
size_t shift_actions_signature;
|
||||
|
|
@ -91,15 +91,14 @@ class ParseTable {
|
|||
public:
|
||||
std::set<rules::Symbol> all_symbols() const;
|
||||
ParseStateId add_state();
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, int, ParseAction);
|
||||
ParseAction &set_terminal_action(ParseStateId state_id, int index, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId state_id, int index, ParseStateId);
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
bool merge_state(size_t i, size_t j);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
|
||||
std::set<rules::Symbol::Index> mergeable_symbols;
|
||||
std::set<rules::Symbol> mergeable_symbols;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class ExpandRepeats : public rules::IdentityRuleFn {
|
|||
rule_ptr inner_rule = apply(rule->content);
|
||||
size_t index = aux_rules.size();
|
||||
string helper_rule_name = rule_name + "_repeat" + to_string(++repeat_count);
|
||||
Symbol repeat_symbol(offset + index);
|
||||
Symbol repeat_symbol(offset + index, Symbol::NonTerminal);
|
||||
existing_repeats.push_back({ rule->copy(), repeat_symbol });
|
||||
aux_rules.push_back(
|
||||
Variable(helper_rule_name, VariableTypeAuxiliary,
|
||||
|
|
@ -65,6 +65,7 @@ InitialSyntaxGrammar expand_repeats(const InitialSyntaxGrammar &grammar) {
|
|||
result.variables = grammar.variables;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
ExpandRepeats expander(result.variables.size());
|
||||
for (auto &variable : result.variables)
|
||||
|
|
|
|||
|
|
@ -38,7 +38,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
map<Symbol, Symbol> replacements;
|
||||
|
||||
Symbol replace_symbol(const Symbol &symbol) {
|
||||
if (symbol.is_built_in() || symbol.is_token)
|
||||
if (!symbol.is_non_terminal())
|
||||
return symbol;
|
||||
|
||||
auto replacement_pair = replacements.find(symbol);
|
||||
|
|
@ -49,7 +49,7 @@ class SymbolReplacer : public rules::IdentityRuleFn {
|
|||
for (const auto &pair : replacements)
|
||||
if (pair.first.index < symbol.index)
|
||||
new_index--;
|
||||
return Symbol(new_index);
|
||||
return Symbol(new_index, Symbol::NonTerminal);
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -60,14 +60,14 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
for (size_t i = 0; i < tokens.size(); i++)
|
||||
if (tokens[i].rule->operator==(*input)) {
|
||||
token_usage_counts[i]++;
|
||||
return make_shared<Symbol>(i, true);
|
||||
return make_shared<Symbol>(i, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr rule = input->copy();
|
||||
size_t index = tokens.size();
|
||||
tokens.push_back(Variable(token_description(rule), entry_type, rule));
|
||||
token_usage_counts.push_back(1);
|
||||
return make_shared<Symbol>(index, true);
|
||||
return make_shared<Symbol>(index, Symbol::Terminal);
|
||||
}
|
||||
|
||||
rule_ptr apply_to(const rules::String *rule) {
|
||||
|
|
@ -90,9 +90,8 @@ class TokenExtractor : public rules::IdentityRuleFn {
|
|||
vector<Variable> tokens;
|
||||
};
|
||||
|
||||
static CompileError ubiq_token_err(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidUbiquitousToken,
|
||||
"Not a token: " + message);
|
||||
static CompileError extra_token_error(const string &message) {
|
||||
return CompileError(TSCompileErrorTypeInvalidExtraToken, "Not a token: " + message);
|
||||
}
|
||||
|
||||
tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
||||
|
|
@ -122,11 +121,10 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
size_t i = 0;
|
||||
for (const Variable &variable : processed_variables) {
|
||||
auto symbol = variable.rule->as<Symbol>();
|
||||
if (symbol && symbol->is_token && !symbol->is_built_in() &&
|
||||
extractor.token_usage_counts[symbol->index] == 1) {
|
||||
if (symbol && symbol->is_token() && extractor.token_usage_counts[symbol->index] == 1) {
|
||||
lexical_grammar.variables[symbol->index].type = variable.type;
|
||||
lexical_grammar.variables[symbol->index].name = variable.name;
|
||||
symbol_replacer.replacements.insert({ Symbol(i), *symbol });
|
||||
symbol_replacer.replacements.insert({ Symbol(i, Symbol::NonTerminal), *symbol });
|
||||
} else {
|
||||
syntax_grammar.variables.push_back(variable);
|
||||
}
|
||||
|
|
@ -158,7 +156,7 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
bool used_elsewhere_in_grammar = false;
|
||||
for (const Variable &variable : lexical_grammar.variables) {
|
||||
if (variable.rule->operator==(*rule)) {
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, true));
|
||||
syntax_grammar.extra_tokens.insert(Symbol(i, Symbol::Terminal));
|
||||
used_elsewhere_in_grammar = true;
|
||||
}
|
||||
i++;
|
||||
|
|
@ -175,17 +173,39 @@ tuple<InitialSyntaxGrammar, LexicalGrammar, CompileError> extract_tokens(
|
|||
auto symbol = rule->as<Symbol>();
|
||||
if (!symbol)
|
||||
return make_tuple(syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(rule->to_string()));
|
||||
extra_token_error(rule->to_string()));
|
||||
|
||||
Symbol new_symbol = symbol_replacer.replace_symbol(*symbol);
|
||||
if (!new_symbol.is_token)
|
||||
if (new_symbol.is_non_terminal()) {
|
||||
return make_tuple(
|
||||
syntax_grammar, lexical_grammar,
|
||||
ubiq_token_err(syntax_grammar.variables[new_symbol.index].name));
|
||||
extra_token_error(syntax_grammar.variables[new_symbol.index].name));
|
||||
}
|
||||
|
||||
syntax_grammar.extra_tokens.insert(new_symbol);
|
||||
}
|
||||
|
||||
for (const ExternalToken &external_token : grammar.external_tokens) {
|
||||
Symbol internal_token = symbol_replacer.replace_symbol(external_token.corresponding_internal_token);
|
||||
|
||||
if (internal_token.is_non_terminal()) {
|
||||
return make_tuple(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
CompileError(
|
||||
TSCompileErrorTypeInvalidExternalToken,
|
||||
"Name '" + external_token.name + "' cannot be used for both an external token and a non-terminal rule"
|
||||
)
|
||||
);
|
||||
}
|
||||
|
||||
syntax_grammar.external_tokens.push_back({
|
||||
external_token.name,
|
||||
external_token.type,
|
||||
internal_token
|
||||
});
|
||||
}
|
||||
|
||||
return make_tuple(syntax_grammar, lexical_grammar, CompileError::none());
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -92,6 +92,7 @@ pair<SyntaxGrammar, CompileError> flatten_grammar(const InitialSyntaxGrammar &gr
|
|||
SyntaxGrammar result;
|
||||
result.expected_conflicts = grammar.expected_conflicts;
|
||||
result.extra_tokens = grammar.extra_tokens;
|
||||
result.external_tokens = grammar.external_tokens;
|
||||
|
||||
bool is_start = true;
|
||||
for (const Variable &variable : grammar.variables) {
|
||||
|
|
|
|||
|
|
@ -1,13 +1,12 @@
|
|||
#ifndef COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
#define COMPILER_PREPARE_GRAMMAR_INITIAL_SYNTAX_GRAMMAR_H_
|
||||
|
||||
#include <vector>
|
||||
#include <string>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include "tree_sitter/compiler.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/variable.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/variable.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -16,6 +15,7 @@ struct InitialSyntaxGrammar {
|
|||
std::vector<Variable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
#include "compiler/rules/blank.h"
|
||||
#include "compiler/rules/named_symbol.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace prepare_grammar {
|
||||
|
|
@ -17,8 +18,9 @@ using std::vector;
|
|||
using std::set;
|
||||
using std::pair;
|
||||
using std::make_shared;
|
||||
using rules::Symbol;
|
||||
|
||||
class InternSymbols : public rules::IdentityRuleFn {
|
||||
class SymbolInterner : public rules::IdentityRuleFn {
|
||||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
|
|
@ -34,11 +36,14 @@ class InternSymbols : public rules::IdentityRuleFn {
|
|||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return make_shared<Symbol>(i, Symbol::NonTerminal);
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++)
|
||||
if (grammar.external_tokens[i] == rule_name)
|
||||
return make_shared<rules::Symbol>(i, Symbol::External);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
explicit SymbolInterner(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
|
@ -50,16 +55,35 @@ CompileError missing_rule_error(string rule_name) {
|
|||
|
||||
pair<InternedGrammar, CompileError> intern_symbols(const Grammar &grammar) {
|
||||
InternedGrammar result;
|
||||
InternSymbols interner(grammar);
|
||||
|
||||
for (auto &external_token_name : grammar.external_tokens) {
|
||||
Symbol corresponding_internal_token = rules::NONE();
|
||||
for (size_t i = 0, n = grammar.rules.size(); i < n; i++) {
|
||||
if (grammar.rules[i].first == external_token_name) {
|
||||
corresponding_internal_token = Symbol(i, Symbol::NonTerminal);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
result.external_tokens.push_back(ExternalToken{
|
||||
external_token_name,
|
||||
external_token_name[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
corresponding_internal_token
|
||||
});
|
||||
}
|
||||
|
||||
SymbolInterner interner(grammar);
|
||||
|
||||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return { result, missing_rule_error(interner.missing_rule_name) };
|
||||
|
||||
result.variables.push_back(Variable(
|
||||
pair.first, pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
new_rule));
|
||||
result.variables.push_back(Variable{
|
||||
pair.first,
|
||||
pair.first[0] == '_' ? VariableTypeHidden : VariableTypeNamed,
|
||||
new_rule
|
||||
});
|
||||
}
|
||||
|
||||
for (auto &rule : grammar.extra_tokens) {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ struct InternedGrammar {
|
|||
std::vector<Variable> variables;
|
||||
std::vector<rule_ptr> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace prepare_grammar
|
||||
|
|
|
|||
|
|
@ -4,15 +4,15 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
|
||||
Symbol END_OF_INPUT() {
|
||||
return Symbol(-1, true);
|
||||
return Symbol(-1, Symbol::Terminal);
|
||||
}
|
||||
|
||||
Symbol START() {
|
||||
return Symbol(-2);
|
||||
return Symbol(-2, Symbol::NonTerminal);
|
||||
}
|
||||
|
||||
Symbol NONE() {
|
||||
return Symbol(-3);
|
||||
return Symbol(-3, Symbol::Type(-1));
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -11,12 +11,10 @@ using std::string;
|
|||
using std::to_string;
|
||||
using util::hash_combine;
|
||||
|
||||
Symbol::Symbol(Symbol::Index index) : index(index), is_token(false) {}
|
||||
|
||||
Symbol::Symbol(Symbol::Index index, bool is_token) : index(index), is_token(is_token) {}
|
||||
Symbol::Symbol(Symbol::Index index, Symbol::Type type) : index(index), type(type) {}
|
||||
|
||||
bool Symbol::operator==(const Symbol &other) const {
|
||||
return (other.index == index) && (other.is_token == is_token);
|
||||
return (other.index == index) && (other.type == type);
|
||||
}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
|
|
@ -27,7 +25,7 @@ bool Symbol::operator==(const Rule &rule) const {
|
|||
size_t Symbol::hash_code() const {
|
||||
size_t result = 0;
|
||||
hash_combine(&result, index);
|
||||
hash_combine(&result, is_token);
|
||||
hash_combine<int>(&result, type);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -36,14 +34,22 @@ rule_ptr Symbol::copy() const {
|
|||
}
|
||||
|
||||
string Symbol::to_string() const {
|
||||
string name = is_token ? "token" : "sym";
|
||||
return "(" + name + " " + std::to_string(index) + ")";
|
||||
switch (type) {
|
||||
case Symbol::Terminal:
|
||||
return "(terminal " + std::to_string(index) + ")";
|
||||
case Symbol::NonTerminal:
|
||||
return "(non-terminal " + std::to_string(index) + ")";
|
||||
case Symbol::External:
|
||||
return "(external " + std::to_string(index) + ")";
|
||||
default:
|
||||
return "(none)";
|
||||
}
|
||||
}
|
||||
|
||||
bool Symbol::operator<(const Symbol &other) const {
|
||||
if (is_token && !other.is_token)
|
||||
if (type < other.type)
|
||||
return true;
|
||||
if (!is_token && other.is_token)
|
||||
if (other.type < type)
|
||||
return false;
|
||||
return (index < other.index);
|
||||
}
|
||||
|
|
@ -56,6 +62,18 @@ bool Symbol::is_built_in() const {
|
|||
return is_built_in(index);
|
||||
}
|
||||
|
||||
bool Symbol::is_token() const {
|
||||
return type == Symbol::Terminal;
|
||||
}
|
||||
|
||||
bool Symbol::is_external() const {
|
||||
return type == Symbol::External;
|
||||
}
|
||||
|
||||
bool Symbol::is_non_terminal() const {
|
||||
return type == Symbol::NonTerminal;
|
||||
}
|
||||
|
||||
void Symbol::accept(Visitor *visitor) const {
|
||||
visitor->visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,9 +11,13 @@ class Symbol : public Rule {
|
|||
public:
|
||||
typedef int Index;
|
||||
|
||||
typedef enum {
|
||||
External,
|
||||
Terminal,
|
||||
NonTerminal,
|
||||
} Type;
|
||||
|
||||
explicit Symbol(Index index);
|
||||
Symbol(Index index, bool is_token);
|
||||
Symbol(Index index, Type type);
|
||||
|
||||
bool operator==(const Symbol &other) const;
|
||||
bool operator==(const Rule &other) const;
|
||||
|
|
@ -26,9 +30,12 @@ class Symbol : public Rule {
|
|||
bool operator<(const Symbol &other) const;
|
||||
static bool is_built_in(Index);
|
||||
bool is_built_in() const;
|
||||
bool is_token() const;
|
||||
bool is_external() const;
|
||||
bool is_non_terminal() const;
|
||||
|
||||
Index index;
|
||||
bool is_token;
|
||||
Type type;
|
||||
};
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -16,6 +16,7 @@ class String;
|
|||
class Symbol;
|
||||
class Pattern;
|
||||
class Metadata;
|
||||
class ExternalToken;
|
||||
|
||||
class Visitor {
|
||||
public:
|
||||
|
|
@ -29,6 +30,7 @@ class Visitor {
|
|||
virtual void visit(const String *rule) = 0;
|
||||
virtual void visit(const NamedSymbol *rule) = 0;
|
||||
virtual void visit(const Symbol *rule) = 0;
|
||||
virtual void visit(const ExternalToken *rule) = 0;
|
||||
virtual ~Visitor();
|
||||
};
|
||||
|
||||
|
|
@ -86,6 +88,10 @@ class RuleFn : private Visitor {
|
|||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
virtual T apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
|
@ -126,6 +132,10 @@ class RuleFn : private Visitor {
|
|||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
void visit(const ExternalToken *rule) {
|
||||
value_ = apply_to(rule);
|
||||
}
|
||||
|
||||
private:
|
||||
T value_;
|
||||
};
|
||||
|
|
@ -170,6 +180,9 @@ class RuleFn<void> : private Visitor {
|
|||
virtual void apply_to(const Symbol *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
virtual void apply_to(const ExternalToken *rule) {
|
||||
return default_apply((const Rule *)rule);
|
||||
}
|
||||
|
||||
void visit(const Blank *rule) {
|
||||
apply_to(rule);
|
||||
|
|
@ -201,6 +214,9 @@ class RuleFn<void> : private Visitor {
|
|||
void visit(const Symbol *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
void visit(const ExternalToken *rule) {
|
||||
apply_to(rule);
|
||||
}
|
||||
};
|
||||
|
||||
class IdentityRuleFn : public RuleFn<rule_ptr> {
|
||||
|
|
|
|||
|
|
@ -13,8 +13,6 @@ using std::pair;
|
|||
using std::vector;
|
||||
using std::set;
|
||||
|
||||
static const vector<Production> NO_PRODUCTIONS;
|
||||
|
||||
SyntaxVariable::SyntaxVariable(const string &name, VariableType type,
|
||||
const vector<Production> &productions)
|
||||
: name(name), productions(productions), type(type) {}
|
||||
|
|
@ -23,18 +21,14 @@ ProductionStep::ProductionStep(const rules::Symbol &symbol, int precedence,
|
|||
rules::Associativity associativity)
|
||||
: symbol(symbol), precedence(precedence), associativity(associativity) {}
|
||||
|
||||
bool ExternalToken::operator==(const ExternalToken &other) const {
|
||||
return name == other.name && type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
|
||||
bool ProductionStep::operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
const vector<Production> &SyntaxGrammar::productions(
|
||||
const rules::Symbol &symbol) const {
|
||||
if (symbol.is_built_in() || symbol.is_token) {
|
||||
return NO_PRODUCTIONS;
|
||||
} else {
|
||||
return variables[symbol.index].productions;
|
||||
}
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -10,6 +10,14 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct ExternalToken {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
bool operator==(const ExternalToken &) const;
|
||||
};
|
||||
|
||||
struct ProductionStep {
|
||||
ProductionStep(const rules::Symbol &, int, rules::Associativity);
|
||||
bool operator==(const ProductionStep &) const;
|
||||
|
|
@ -33,11 +41,10 @@ struct SyntaxVariable {
|
|||
typedef std::set<rules::Symbol> ConflictSet;
|
||||
|
||||
struct SyntaxGrammar {
|
||||
const std::vector<Production> &productions(const rules::Symbol &) const;
|
||||
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -36,8 +36,9 @@ const TSLanguage *ts_document_language(TSDocument *self) {
|
|||
}
|
||||
|
||||
void ts_document_set_language(TSDocument *self, const TSLanguage *language) {
|
||||
if (language->version != TREE_SITTER_LANGUAGE_VERSION) return;
|
||||
ts_document_invalidate(self);
|
||||
self->parser.language = language;
|
||||
parser_set_language(&self->parser, language);
|
||||
if (self->tree) {
|
||||
ts_tree_release(self->tree);
|
||||
self->tree = NULL;
|
||||
|
|
|
|||
|
|
@ -34,6 +34,10 @@ uint32_t ts_language_symbol_count(const TSLanguage *language) {
|
|||
return language->symbol_count;
|
||||
}
|
||||
|
||||
uint32_t ts_language_version(const TSLanguage *language) {
|
||||
return language->version;
|
||||
}
|
||||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *language,
|
||||
TSSymbol symbol) {
|
||||
if (symbol == ts_builtin_sym_error)
|
||||
|
|
|
|||
|
|
@ -19,6 +19,10 @@ void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry
|
|||
|
||||
TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol);
|
||||
|
||||
static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) {
|
||||
return 0 < symbol && symbol < self->external_token_count + 1;
|
||||
}
|
||||
|
||||
static inline const TSParseAction *ts_language_actions(const TSLanguage *self,
|
||||
TSStateId state,
|
||||
TSSymbol symbol,
|
||||
|
|
@ -49,6 +53,16 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
|||
}
|
||||
}
|
||||
|
||||
static inline const bool *
|
||||
ts_language_enabled_external_tokens(const TSLanguage *self,
|
||||
unsigned external_scanner_state) {
|
||||
if (external_scanner_state == 0) {
|
||||
return NULL;
|
||||
} else {
|
||||
return self->external_scanner.states + self->external_token_count * external_scanner_state;
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -21,12 +21,11 @@ static inline void length_set_unknown_chars(Length *self) {
|
|||
}
|
||||
|
||||
static inline Length length_min(Length len1, Length len2) {
|
||||
return (len1.chars < len2.chars) ? len1 : len2;
|
||||
return (len1.bytes < len2.bytes) ? len1 : len2;
|
||||
}
|
||||
|
||||
static inline Length length_add(Length len1, Length len2) {
|
||||
Length result;
|
||||
result.chars = len1.chars + len2.chars;
|
||||
result.bytes = len1.bytes + len2.bytes;
|
||||
result.extent = point_add(len1.extent, len2.extent);
|
||||
|
||||
|
|
@ -57,10 +56,4 @@ static inline Length length_zero() {
|
|||
return (Length){ 0, 0, {0, 0} };
|
||||
}
|
||||
|
||||
static inline bool length_eq(Length self, Length other) {
|
||||
return self.bytes == other.bytes && self.chars == other.chars &&
|
||||
self.extent.row == other.extent.row &&
|
||||
self.extent.column == other.extent.column;
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -11,11 +11,8 @@
|
|||
self->logger.log(self->logger.payload, TSLogTypeLex, self->debug_buffer); \
|
||||
}
|
||||
|
||||
#define LOG_LOOKAHEAD() \
|
||||
LOG((0 < self->data.lookahead && self->data.lookahead < 256) \
|
||||
? "lookahead char:'%c'" \
|
||||
: "lookahead char:%d", \
|
||||
self->data.lookahead);
|
||||
#define LOG_CHARACTER(message, character) \
|
||||
LOG(character < 255 ? message " character:'%c'" : message " character:%d", character)
|
||||
|
||||
static const char empty_chunk[2] = { 0, 0 };
|
||||
|
||||
|
|
@ -42,11 +39,9 @@ static void ts_lexer__get_lookahead(Lexer *self) {
|
|||
utf8proc_iterate(chunk, size, &self->data.lookahead);
|
||||
else
|
||||
self->lookahead_size = utf16_iterate(chunk, size, &self->data.lookahead);
|
||||
|
||||
LOG_LOOKAHEAD();
|
||||
}
|
||||
|
||||
static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
|
||||
static void ts_lexer__advance(void *payload, bool skip) {
|
||||
Lexer *self = (Lexer *)payload;
|
||||
if (self->chunk == empty_chunk)
|
||||
return;
|
||||
|
|
@ -63,10 +58,10 @@ static void ts_lexer__advance(void *payload, TSStateId state, bool skip) {
|
|||
}
|
||||
|
||||
if (skip) {
|
||||
LOG("skip_separator state:%d", state);
|
||||
LOG_CHARACTER("skip", self->data.lookahead);
|
||||
self->token_start_position = self->current_position;
|
||||
} else {
|
||||
LOG("advance state:%d", state);
|
||||
LOG_CHARACTER("consume", self->data.lookahead);
|
||||
}
|
||||
|
||||
if (self->current_position.bytes >= self->chunk_start + self->chunk_size)
|
||||
|
|
@ -93,6 +88,7 @@ void ts_lexer_init(Lexer *self) {
|
|||
.payload = NULL,
|
||||
.log = NULL
|
||||
},
|
||||
.last_external_token_state = NULL,
|
||||
};
|
||||
ts_lexer_reset(self, length_zero());
|
||||
}
|
||||
|
|
@ -115,17 +111,16 @@ static inline void ts_lexer__reset(Lexer *self, Length position) {
|
|||
void ts_lexer_set_input(Lexer *self, TSInput input) {
|
||||
self->input = input;
|
||||
ts_lexer__reset(self, length_zero());
|
||||
self->last_external_token_state = NULL;
|
||||
}
|
||||
|
||||
void ts_lexer_reset(Lexer *self, Length position) {
|
||||
if (!length_eq(position, self->current_position))
|
||||
if (position.bytes != self->current_position.bytes) {
|
||||
ts_lexer__reset(self, position);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void ts_lexer_start(Lexer *self, TSStateId lex_state) {
|
||||
LOG("start_lex state:%d, pos:%u", lex_state, self->current_position.chars);
|
||||
|
||||
void ts_lexer_start(Lexer *self) {
|
||||
self->token_start_position = self->current_position;
|
||||
self->data.result_symbol = 0;
|
||||
|
||||
|
|
|
|||
|
|
@ -25,12 +25,13 @@ typedef struct {
|
|||
TSInput input;
|
||||
TSLogger logger;
|
||||
char debug_buffer[TS_DEBUG_BUFFER_SIZE];
|
||||
const TSExternalTokenState *last_external_token_state;
|
||||
} Lexer;
|
||||
|
||||
void ts_lexer_init(Lexer *);
|
||||
void ts_lexer_set_input(Lexer *, TSInput);
|
||||
void ts_lexer_reset(Lexer *, Length);
|
||||
void ts_lexer_start(Lexer *, TSStateId);
|
||||
void ts_lexer_start(Lexer *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,7 +39,15 @@ static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) {
|
|||
static inline uint32_t ts_node__relevant_child_count(TSNode self,
|
||||
bool include_anonymous) {
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
return include_anonymous ? tree->visible_child_count : tree->named_child_count;
|
||||
if (tree->child_count > 0) {
|
||||
if (include_anonymous) {
|
||||
return tree->visible_child_count;
|
||||
} else {
|
||||
return tree->named_child_count;
|
||||
}
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
static inline TSNode ts_node__direct_parent(TSNode self, uint32_t *index) {
|
||||
|
|
@ -324,11 +332,21 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
|
|||
}
|
||||
|
||||
uint32_t ts_node_child_count(TSNode self) {
|
||||
return ts_node__tree(self)->visible_child_count;
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
if (tree->child_count > 0) {
|
||||
return tree->visible_child_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ts_node_named_child_count(TSNode self) {
|
||||
return ts_node__tree(self)->named_child_count;
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
if (tree->child_count > 0) {
|
||||
return tree->named_child_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
TSNode ts_node_next_sibling(TSNode self) {
|
||||
|
|
|
|||
|
|
@ -109,28 +109,6 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
|
|||
return did_break_down;
|
||||
}
|
||||
|
||||
static void parser__pop_reusable_node(ReusableNode *reusable_node) {
|
||||
reusable_node->byte_index += ts_tree_total_bytes(reusable_node->tree);
|
||||
while (reusable_node->tree) {
|
||||
Tree *parent = reusable_node->tree->context.parent;
|
||||
uint32_t next_index = reusable_node->tree->context.index + 1;
|
||||
if (parent && parent->child_count > next_index) {
|
||||
reusable_node->tree = parent->children[next_index];
|
||||
return;
|
||||
}
|
||||
reusable_node->tree = parent;
|
||||
}
|
||||
}
|
||||
|
||||
static bool parser__breakdown_reusable_node(ReusableNode *reusable_node) {
|
||||
if (reusable_node->tree->child_count == 0) {
|
||||
return false;
|
||||
} else {
|
||||
reusable_node->tree = reusable_node->tree->children[0];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
|
||||
TSStateId state,
|
||||
ReusableNode *reusable_node) {
|
||||
|
|
@ -140,12 +118,11 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
|
|||
reusable_node->tree->fragile_left ||
|
||||
reusable_node->tree->fragile_right)) {
|
||||
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
parser__breakdown_reusable_node(reusable_node);
|
||||
reusable_node_breakdown(reusable_node);
|
||||
result = true;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
LOG("lookahead sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
ts_tree_release(*lookahead);
|
||||
ts_tree_retain(*lookahead = reusable_node->tree);
|
||||
}
|
||||
|
|
@ -153,16 +130,20 @@ static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
|
|||
return result;
|
||||
}
|
||||
|
||||
static void parser__pop_reusable_node_leaf(ReusableNode *reusable_node) {
|
||||
while (reusable_node->tree->child_count > 0)
|
||||
reusable_node->tree = reusable_node->tree->children[0];
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) {
|
||||
return self.lex_state == other.lex_state &&
|
||||
self.external_lex_state == other.external_lex_state;
|
||||
}
|
||||
|
||||
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
|
||||
TableEntry *table_entry) {
|
||||
if (tree->first_leaf.lex_state == self->language->lex_states[state])
|
||||
TSLexMode current_lex_mode = self->language->lex_modes[state];
|
||||
if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode))
|
||||
return true;
|
||||
if (current_lex_mode.external_lex_state != 0)
|
||||
return false;
|
||||
if (tree->size.bytes == 0)
|
||||
return false;
|
||||
if (!table_entry->is_reusable)
|
||||
return false;
|
||||
if (!table_entry->depends_on_lookahead)
|
||||
|
|
@ -208,28 +189,76 @@ static bool parser__condense_stack(Parser *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
||||
TSStateId start_state = self->language->lex_states[parse_state];
|
||||
TSStateId current_state = start_state;
|
||||
Length start_position = self->lexer.current_position;
|
||||
LOG("lex state:%d", start_state);
|
||||
static void parser__restore_external_scanner(Parser *self, StackVersion version) {
|
||||
const TSExternalTokenState *state = ts_stack_external_token_state(self->stack, version);
|
||||
if (self->lexer.last_external_token_state != state) {
|
||||
LOG("restore_external_scanner");
|
||||
self->lexer.last_external_token_state = state;
|
||||
if (state) {
|
||||
self->language->external_scanner.deserialize(
|
||||
self->external_scanner_payload,
|
||||
*state
|
||||
);
|
||||
} else {
|
||||
self->language->external_scanner.reset(self->external_scanner_payload);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static Tree *parser__lex(Parser *self, StackVersion version) {
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
Length start_position = ts_stack_top_position(self->stack, version);
|
||||
TSLexMode lex_mode = self->language->lex_modes[parse_state];
|
||||
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_lex_state
|
||||
);
|
||||
|
||||
bool found_external_token = false;
|
||||
bool found_error = false;
|
||||
bool skipped_error = false;
|
||||
int32_t first_error_character = 0;
|
||||
Length error_start_position, error_end_position;
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
|
||||
ts_lexer_start(&self->lexer, start_state);
|
||||
for (;;) {
|
||||
Length current_position = self->lexer.current_position;
|
||||
|
||||
while (!self->language->lex_fn(&self->lexer.data, current_state)) {
|
||||
if (current_state != ERROR_STATE) {
|
||||
if (valid_external_tokens) {
|
||||
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
|
||||
current_position.extent.row, current_position.extent.column);
|
||||
parser__restore_external_scanner(self, version);
|
||||
ts_lexer_start(&self->lexer);
|
||||
if (self->language->external_scanner.scan(self->external_scanner_payload,
|
||||
&self->lexer.data, valid_external_tokens)) {
|
||||
found_external_token = true;
|
||||
break;
|
||||
}
|
||||
ts_lexer_reset(&self->lexer, current_position);
|
||||
}
|
||||
|
||||
LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
|
||||
current_position.extent.row, current_position.extent.column);
|
||||
ts_lexer_start(&self->lexer);
|
||||
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (!found_error) {
|
||||
LOG("retry_in_error_mode");
|
||||
current_state = ERROR_STATE;
|
||||
found_error = true;
|
||||
lex_mode = self->language->lex_modes[ERROR_STATE];
|
||||
valid_external_tokens = ts_language_enabled_external_tokens(
|
||||
self->language,
|
||||
lex_mode.external_lex_state
|
||||
);
|
||||
ts_lexer_reset(&self->lexer, start_position);
|
||||
ts_lexer_start(&self->lexer, current_state);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!skipped_error) {
|
||||
LOG("skip_unrecognized_character");
|
||||
skipped_error = true;
|
||||
error_start_position = self->lexer.token_start_position;
|
||||
first_error_character = self->lexer.data.lookahead;
|
||||
}
|
||||
|
|
@ -239,15 +268,13 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
|||
self->lexer.data.result_symbol = ts_builtin_sym_error;
|
||||
break;
|
||||
}
|
||||
self->lexer.data.advance(&self->lexer, ERROR_STATE, false);
|
||||
self->lexer.data.advance(&self->lexer, false);
|
||||
}
|
||||
|
||||
skipped_error = true;
|
||||
error_end_position = self->lexer.current_position;
|
||||
}
|
||||
|
||||
Tree *result;
|
||||
|
||||
if (skipped_error) {
|
||||
Length padding = length_sub(error_start_position, start_position);
|
||||
Length size = length_sub(error_end_position, error_start_position);
|
||||
|
|
@ -255,20 +282,28 @@ static Tree *parser__lex(Parser *self, TSStateId parse_state) {
|
|||
result = ts_tree_make_error(size, padding, first_error_character);
|
||||
} else {
|
||||
TSSymbol symbol = self->lexer.data.result_symbol;
|
||||
Length padding =
|
||||
length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.current_position,
|
||||
self->lexer.token_start_position);
|
||||
result =
|
||||
ts_tree_make_leaf(symbol, padding, size,
|
||||
ts_language_symbol_metadata(self->language, symbol));
|
||||
if (found_external_token) {
|
||||
symbol = self->language->external_scanner.symbol_map[symbol];
|
||||
}
|
||||
|
||||
Length padding = length_sub(self->lexer.token_start_position, start_position);
|
||||
Length size = length_sub(self->lexer.current_position, self->lexer.token_start_position);
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
|
||||
result = ts_tree_make_leaf(symbol, padding, size, metadata);
|
||||
|
||||
if (found_external_token) {
|
||||
result->has_external_tokens = true;
|
||||
result->has_external_token_state = true;
|
||||
memset(result->external_token_state, 0, sizeof(TSExternalTokenState));
|
||||
self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state);
|
||||
self->lexer.last_external_token_state = &result->external_token_state;
|
||||
}
|
||||
}
|
||||
|
||||
if (!result)
|
||||
return NULL;
|
||||
|
||||
result->parse_state = parse_state;
|
||||
result->first_leaf.lex_state = start_state;
|
||||
result->first_leaf.lex_mode = lex_mode;
|
||||
|
||||
LOG("lexed_lookahead sym:%s, size:%u", SYM_NAME(result->symbol), result->size.bytes);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -277,21 +312,31 @@ static void parser__clear_cached_token(Parser *self) {
|
|||
self->cached_token = NULL;
|
||||
}
|
||||
|
||||
static inline bool ts_external_token_state_eq(const TSExternalTokenState *self,
|
||||
const TSExternalTokenState *other) {
|
||||
if (self == other) {
|
||||
return true;
|
||||
} else if (!self || !other) {
|
||||
return false;
|
||||
} else {
|
||||
return memcmp(self, other, sizeof(TSExternalTokenState)) == 0;
|
||||
}
|
||||
}
|
||||
|
||||
static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
ReusableNode *reusable_node,
|
||||
bool *is_fresh) {
|
||||
Length position = ts_stack_top_position(self->stack, version);
|
||||
|
||||
while (reusable_node->tree) {
|
||||
if (reusable_node->byte_index > position.bytes) {
|
||||
LOG("before_reusable sym:%s, pos:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
|
||||
LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
break;
|
||||
}
|
||||
|
||||
if (reusable_node->byte_index < position.bytes) {
|
||||
LOG("past_reusable sym:%s, pos:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol), reusable_node->byte_index);
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
|
||||
reusable_node_pop(reusable_node);
|
||||
continue;
|
||||
}
|
||||
|
||||
|
|
@ -299,8 +344,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
|||
LOG("cant_reuse_changed tree:%s, size:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol),
|
||||
reusable_node->tree->size.bytes);
|
||||
if (!parser__breakdown_reusable_node(reusable_node)) {
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
if (!reusable_node_breakdown(reusable_node)) {
|
||||
reusable_node_pop(reusable_node);
|
||||
parser__breakdown_top_of_stack(self, version);
|
||||
}
|
||||
continue;
|
||||
|
|
@ -310,8 +355,21 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
|||
LOG("cant_reuse_error tree:%s, size:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol),
|
||||
reusable_node->tree->size.bytes);
|
||||
if (!parser__breakdown_reusable_node(reusable_node)) {
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
if (!reusable_node_breakdown(reusable_node)) {
|
||||
reusable_node_pop(reusable_node);
|
||||
parser__breakdown_top_of_stack(self, version);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (!ts_external_token_state_eq(
|
||||
reusable_node->preceding_external_token_state,
|
||||
ts_stack_external_token_state(self->stack, version))) {
|
||||
LOG("cant_reuse_external_tokens tree:%s, size:%u",
|
||||
SYM_NAME(reusable_node->tree->symbol),
|
||||
reusable_node->tree->size.bytes);
|
||||
if (!reusable_node_breakdown(reusable_node)) {
|
||||
reusable_node_pop(reusable_node);
|
||||
parser__breakdown_top_of_stack(self, version);
|
||||
}
|
||||
continue;
|
||||
|
|
@ -327,9 +385,8 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
|||
return self->cached_token;
|
||||
}
|
||||
|
||||
ts_lexer_reset(&self->lexer, position);
|
||||
TSStateId parse_state = ts_stack_top_state(self->stack, version);
|
||||
return parser__lex(self, parse_state);
|
||||
*is_fresh = true;
|
||||
return parser__lex(self, version);
|
||||
}
|
||||
|
||||
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
|
||||
|
|
@ -407,6 +464,10 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state,
|
|||
|
||||
bool is_pending = lookahead->child_count > 0;
|
||||
ts_stack_push(self->stack, version, lookahead, is_pending, state);
|
||||
if (lookahead->has_external_token_state) {
|
||||
ts_stack_set_external_token_state(
|
||||
self->stack, version, ts_tree_last_external_token_state(lookahead));
|
||||
}
|
||||
ts_tree_release(lookahead);
|
||||
}
|
||||
|
||||
|
|
@ -729,9 +790,13 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
|
|||
LOG("new_parse");
|
||||
}
|
||||
|
||||
if (self->language->external_scanner.reset) {
|
||||
self->language->external_scanner.reset(self->external_scanner_payload);
|
||||
}
|
||||
|
||||
ts_lexer_set_input(&self->lexer, input);
|
||||
ts_stack_clear(self->stack);
|
||||
self->reusable_node = (ReusableNode){ previous_tree, 0 };
|
||||
self->reusable_node = reusable_node_new(previous_tree);
|
||||
self->cached_token = NULL;
|
||||
self->finished_tree = NULL;
|
||||
}
|
||||
|
|
@ -950,30 +1015,29 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
|
|||
static void parser__advance(Parser *self, StackVersion version,
|
||||
ReusableNode *reusable_node) {
|
||||
bool validated_lookahead = false;
|
||||
Tree *lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
|
||||
for (;;) {
|
||||
TSStateId state = ts_stack_top_state(self->stack, version);
|
||||
|
||||
TableEntry table_entry;
|
||||
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol,
|
||||
&table_entry);
|
||||
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
|
||||
|
||||
if (!validated_lookahead) {
|
||||
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
if (lookahead == reusable_node->tree)
|
||||
parser__pop_reusable_node_leaf(reusable_node);
|
||||
else
|
||||
if (lookahead == reusable_node->tree) {
|
||||
reusable_node_pop_leaf(reusable_node);
|
||||
} else {
|
||||
parser__clear_cached_token(self);
|
||||
}
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
continue;
|
||||
}
|
||||
|
||||
validated_lookahead = true;
|
||||
LOG("lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol),
|
||||
lookahead->size.bytes);
|
||||
LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
|
||||
}
|
||||
|
||||
bool reduction_stopped_at_error = false;
|
||||
|
|
@ -996,12 +1060,11 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
if (lookahead->child_count > 0) {
|
||||
if (parser__breakdown_lookahead(self, &lookahead, state,
|
||||
reusable_node)) {
|
||||
if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
|
||||
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
reusable_node_pop(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node);
|
||||
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -1011,7 +1074,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
parser__shift(self, version, next_state, lookahead, extra);
|
||||
|
||||
if (lookahead == reusable_node->tree)
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
reusable_node_pop(reusable_node);
|
||||
|
||||
ts_tree_release(lookahead);
|
||||
return;
|
||||
|
|
@ -1053,7 +1116,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
|
||||
case TSParseActionTypeRecover: {
|
||||
while (lookahead->child_count > 0) {
|
||||
parser__breakdown_reusable_node(reusable_node);
|
||||
reusable_node_breakdown(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
lookahead = reusable_node->tree;
|
||||
ts_tree_retain(lookahead);
|
||||
|
|
@ -1061,7 +1124,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
|
||||
parser__recover(self, version, action.params.to_state, lookahead);
|
||||
if (lookahead == reusable_node->tree)
|
||||
parser__pop_reusable_node(reusable_node);
|
||||
reusable_node_pop(reusable_node);
|
||||
ts_tree_release(lookahead);
|
||||
return;
|
||||
}
|
||||
|
|
@ -1103,6 +1166,18 @@ bool parser_init(Parser *self) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void parser_set_language(Parser *self, const TSLanguage *language) {
|
||||
if (self->external_scanner_payload && self->language->external_scanner.destroy)
|
||||
self->language->external_scanner.destroy(self->external_scanner_payload);
|
||||
|
||||
if (language && language->external_scanner.create)
|
||||
self->external_scanner_payload = language->external_scanner.create();
|
||||
else
|
||||
self->external_scanner_payload = NULL;
|
||||
|
||||
self->language = language;
|
||||
}
|
||||
|
||||
void parser_destroy(Parser *self) {
|
||||
if (self->stack)
|
||||
ts_stack_delete(self->stack);
|
||||
|
|
@ -1112,6 +1187,7 @@ void parser_destroy(Parser *self) {
|
|||
array_delete(&self->tree_path1);
|
||||
if (self->tree_path2.contents)
|
||||
array_delete(&self->tree_path2);
|
||||
parser_set_language(self, NULL);
|
||||
}
|
||||
|
||||
Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
|
||||
|
|
@ -1128,15 +1204,14 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree) {
|
|||
|
||||
while (!ts_stack_is_halted(self->stack, version)) {
|
||||
position = ts_stack_top_position(self->stack, version).chars;
|
||||
if (position > last_position ||
|
||||
(version > 0 && position == last_position))
|
||||
if (position > last_position || (version > 0 && position == last_position))
|
||||
break;
|
||||
|
||||
LOG("process version:%d, version_count:%u, state:%d, row:%u, col:%u",
|
||||
version, ts_stack_version_count(self->stack),
|
||||
ts_stack_top_state(self->stack, version),
|
||||
ts_stack_top_position(self->stack, version).extent.row + 1,
|
||||
ts_stack_top_position(self->stack, version).extent.column + 1);
|
||||
ts_stack_top_position(self->stack, version).extent.row,
|
||||
ts_stack_top_position(self->stack, version).extent.column);
|
||||
|
||||
parser__advance(self, version, &reusable_node);
|
||||
LOG_STACK();
|
||||
|
|
|
|||
|
|
@ -8,13 +8,9 @@ extern "C" {
|
|||
#include "runtime/stack.h"
|
||||
#include "runtime/array.h"
|
||||
#include "runtime/lexer.h"
|
||||
#include "runtime/reusable_node.h"
|
||||
#include "runtime/reduce_action.h"
|
||||
|
||||
typedef struct {
|
||||
Tree *tree;
|
||||
uint32_t byte_index;
|
||||
} ReusableNode;
|
||||
|
||||
typedef struct {
|
||||
Lexer lexer;
|
||||
Stack *stack;
|
||||
|
|
@ -29,11 +25,14 @@ typedef struct {
|
|||
ReusableNode reusable_node;
|
||||
TreePath tree_path1;
|
||||
TreePath tree_path2;
|
||||
void *external_scanner_payload;
|
||||
Tree *last_external_token;
|
||||
} Parser;
|
||||
|
||||
bool parser_init(Parser *);
|
||||
void parser_destroy(Parser *);
|
||||
Tree *parser_parse(Parser *, TSInput, Tree *);
|
||||
void parser_set_language(Parser *, const TSLanguage *);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
50
src/runtime/reusable_node.h
Normal file
50
src/runtime/reusable_node.h
Normal file
|
|
@ -0,0 +1,50 @@
|
|||
#include "runtime/tree.h"
|
||||
|
||||
typedef struct {
|
||||
Tree *tree;
|
||||
uint32_t byte_index;
|
||||
bool has_preceding_external_token;
|
||||
const TSExternalTokenState *preceding_external_token_state;
|
||||
} ReusableNode;
|
||||
|
||||
static inline ReusableNode reusable_node_new(Tree *tree) {
|
||||
return (ReusableNode){
|
||||
.tree = tree,
|
||||
.byte_index = 0,
|
||||
.has_preceding_external_token = false,
|
||||
.preceding_external_token_state = NULL,
|
||||
};
|
||||
}
|
||||
|
||||
static inline void reusable_node_pop(ReusableNode *self) {
|
||||
self->byte_index += ts_tree_total_bytes(self->tree);
|
||||
if (self->tree->has_external_tokens) {
|
||||
self->has_preceding_external_token = true;
|
||||
self->preceding_external_token_state = ts_tree_last_external_token_state(self->tree);
|
||||
}
|
||||
|
||||
while (self->tree) {
|
||||
Tree *parent = self->tree->context.parent;
|
||||
uint32_t next_index = self->tree->context.index + 1;
|
||||
if (parent && parent->child_count > next_index) {
|
||||
self->tree = parent->children[next_index];
|
||||
return;
|
||||
}
|
||||
self->tree = parent;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void reusable_node_pop_leaf(ReusableNode *self) {
|
||||
while (self->tree->child_count > 0)
|
||||
self->tree = self->tree->children[0];
|
||||
reusable_node_pop(self);
|
||||
}
|
||||
|
||||
static inline bool reusable_node_breakdown(ReusableNode *self) {
|
||||
if (self->tree->child_count == 0) {
|
||||
return false;
|
||||
} else {
|
||||
self->tree = self->tree->children[0];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -50,6 +50,7 @@ typedef struct {
|
|||
StackNode *node;
|
||||
bool is_halted;
|
||||
unsigned push_count;
|
||||
const TSExternalTokenState *external_token_state;
|
||||
} StackHead;
|
||||
|
||||
struct Stack {
|
||||
|
|
@ -168,11 +169,13 @@ static void stack_node_add_link(StackNode *self, StackLink link) {
|
|||
}
|
||||
|
||||
static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
||||
unsigned push_count) {
|
||||
unsigned push_count,
|
||||
const TSExternalTokenState *external_token_state) {
|
||||
StackHead head = {
|
||||
.node = node,
|
||||
.is_halted = false,
|
||||
.push_count = push_count,
|
||||
.external_token_state = external_token_state,
|
||||
};
|
||||
array_push(&self->heads, head);
|
||||
stack_node_retain(node);
|
||||
|
|
@ -180,7 +183,8 @@ static StackVersion ts_stack__add_version(Stack *self, StackNode *node,
|
|||
}
|
||||
|
||||
static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
|
||||
unsigned push_count) {
|
||||
unsigned push_count,
|
||||
const TSExternalTokenState *external_token_state) {
|
||||
for (uint32_t i = self->slices.size - 1; i + 1 > 0; i--) {
|
||||
StackVersion version = self->slices.contents[i].version;
|
||||
if (self->heads.contents[version].node == node) {
|
||||
|
|
@ -190,7 +194,7 @@ static void ts_stack__add_slice(Stack *self, StackNode *node, TreeArray *trees,
|
|||
}
|
||||
}
|
||||
|
||||
StackVersion version = ts_stack__add_version(self, node, push_count);
|
||||
StackVersion version = ts_stack__add_version(self, node, push_count, external_token_state);
|
||||
StackSlice slice = { *trees, version };
|
||||
array_push(&self->slices, slice);
|
||||
}
|
||||
|
|
@ -202,6 +206,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
unsigned push_count = head->push_count;
|
||||
const TSExternalTokenState *external_token_state = head->external_token_state;
|
||||
Iterator iterator = {
|
||||
.node = head->node,
|
||||
.trees = array_new(),
|
||||
|
|
@ -229,7 +234,8 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version,
|
|||
if (!should_stop)
|
||||
ts_tree_array_copy(trees, &trees);
|
||||
array_reverse(&trees);
|
||||
ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count);
|
||||
ts_stack__add_slice(self, node, &trees, push_count + iterator->push_count,
|
||||
external_token_state);
|
||||
}
|
||||
|
||||
if (should_stop) {
|
||||
|
|
@ -288,7 +294,12 @@ Stack *ts_stack_new() {
|
|||
self->base_node =
|
||||
stack_node_new(NULL, NULL, false, 1, length_zero(), &self->node_pool);
|
||||
stack_node_retain(self->base_node);
|
||||
array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
|
||||
array_push(&self->heads, ((StackHead){
|
||||
self->base_node,
|
||||
false,
|
||||
0,
|
||||
NULL
|
||||
}));
|
||||
|
||||
return self;
|
||||
}
|
||||
|
|
@ -327,11 +338,19 @@ unsigned ts_stack_push_count(const Stack *self, StackVersion version) {
|
|||
return array_get(&self->heads, version)->push_count;
|
||||
}
|
||||
|
||||
void ts_stack_decrease_push_count(const Stack *self, StackVersion version,
|
||||
void ts_stack_decrease_push_count(Stack *self, StackVersion version,
|
||||
unsigned decrement) {
|
||||
array_get(&self->heads, version)->push_count -= decrement;
|
||||
}
|
||||
|
||||
const TSExternalTokenState *ts_stack_external_token_state(const Stack *self, StackVersion version) {
|
||||
return array_get(&self->heads, version)->external_token_state;
|
||||
}
|
||||
|
||||
void ts_stack_set_external_token_state(Stack *self, StackVersion version, const TSExternalTokenState *state) {
|
||||
array_get(&self->heads, version)->external_token_state = state;
|
||||
}
|
||||
|
||||
ErrorStatus ts_stack_error_status(const Stack *self, StackVersion version) {
|
||||
StackHead *head = array_get(&self->heads, version);
|
||||
return (ErrorStatus){
|
||||
|
|
@ -480,7 +499,8 @@ bool ts_stack_merge(Stack *self, StackVersion version, StackVersion new_version)
|
|||
if (new_node->state == node->state &&
|
||||
new_node->position.chars == node->position.chars &&
|
||||
new_node->error_count == node->error_count &&
|
||||
new_node->error_cost == node->error_cost) {
|
||||
new_node->error_cost == node->error_cost &&
|
||||
new_head->external_token_state == head->external_token_state) {
|
||||
for (uint32_t j = 0; j < new_node->link_count; j++)
|
||||
stack_node_add_link(node, new_node->links[j]);
|
||||
if (new_head->push_count > head->push_count)
|
||||
|
|
@ -505,7 +525,12 @@ void ts_stack_clear(Stack *self) {
|
|||
for (uint32_t i = 0; i < self->heads.size; i++)
|
||||
stack_node_release(self->heads.contents[i].node, &self->node_pool);
|
||||
array_clear(&self->heads);
|
||||
array_push(&self->heads, ((StackHead){ self->base_node, false, 0 }));
|
||||
array_push(&self->heads, ((StackHead){
|
||||
self->base_node,
|
||||
false,
|
||||
0,
|
||||
NULL
|
||||
}));
|
||||
}
|
||||
|
||||
bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
||||
|
|
@ -528,8 +553,20 @@ bool ts_stack_print_dot_graph(Stack *self, const char **symbol_names, FILE *f) {
|
|||
fprintf(
|
||||
f,
|
||||
"node_head_%u -> node_%p [label=%u, fontcolor=blue, weight=10000, "
|
||||
"labeltooltip=\"push_count: %u\"]\n",
|
||||
"labeltooltip=\"push_count: %u",
|
||||
i, head->node, i, head->push_count);
|
||||
|
||||
if (head->external_token_state) {
|
||||
const TSExternalTokenState *s = head->external_token_state;
|
||||
fprintf(f,
|
||||
"\nexternal_token_state: "
|
||||
"%2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X %2X",
|
||||
(*s)[0], (*s)[1], (*s)[2], (*s)[3], (*s)[4], (*s)[5], (*s)[6], (*s)[7],
|
||||
(*s)[8], (*s)[9], (*s)[10], (*s)[11], (*s)[12], (*s)[13], (*s)[14], (*s)[15]
|
||||
);
|
||||
}
|
||||
|
||||
fprintf(f, "\"]\n");
|
||||
array_push(&self->iterators, ((Iterator){.node = head->node }));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -65,7 +65,11 @@ TSStateId ts_stack_top_state(const Stack *, StackVersion);
|
|||
|
||||
unsigned ts_stack_push_count(const Stack *, StackVersion);
|
||||
|
||||
void ts_stack_decrease_push_count(const Stack *, StackVersion, unsigned);
|
||||
void ts_stack_decrease_push_count(Stack *, StackVersion, unsigned);
|
||||
|
||||
const TSExternalTokenState *ts_stack_external_token_state(const Stack *, StackVersion);
|
||||
|
||||
void ts_stack_set_external_token_state(Stack *, StackVersion, const TSExternalTokenState *);
|
||||
|
||||
/*
|
||||
* Get the position at the top of the given version of the stack. If the stack
|
||||
|
|
|
|||
|
|
@ -25,10 +25,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
|
|||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.has_changes = false,
|
||||
.first_leaf = {
|
||||
.symbol = sym,
|
||||
.lex_state = 0
|
||||
}
|
||||
.first_leaf.symbol = sym,
|
||||
};
|
||||
return result;
|
||||
}
|
||||
|
|
@ -111,6 +108,8 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
self->named_child_count = 0;
|
||||
self->visible_child_count = 0;
|
||||
self->error_cost = 0;
|
||||
self->has_external_tokens = false;
|
||||
self->has_external_token_state = false;
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
Tree *child = children[i];
|
||||
|
|
@ -128,11 +127,14 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
self->visible_child_count++;
|
||||
if (child->named)
|
||||
self->named_child_count++;
|
||||
} else {
|
||||
} else if (child->child_count > 0) {
|
||||
self->visible_child_count += child->visible_child_count;
|
||||
self->named_child_count += child->named_child_count;
|
||||
}
|
||||
|
||||
if (child->has_external_tokens) self->has_external_tokens = true;
|
||||
if (child->has_external_token_state) self->has_external_token_state = true;
|
||||
|
||||
if (child->symbol == ts_builtin_sym_error) {
|
||||
self->fragile_left = self->fragile_right = true;
|
||||
self->parse_state = TS_TREE_STATE_NONE;
|
||||
|
|
@ -377,6 +379,21 @@ void ts_tree_edit(Tree *self, const TSInputEdit *edit) {
|
|||
}
|
||||
}
|
||||
|
||||
const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *tree) {
|
||||
while (tree->child_count > 0) {
|
||||
for (uint32_t i = tree->child_count - 1; i + 1 > 0; i--) {
|
||||
Tree *child = tree->children[i];
|
||||
if (child->has_external_token_state) {
|
||||
tree = child;
|
||||
break;
|
||||
} else if (child->has_external_tokens) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
return &tree->external_token_state;
|
||||
}
|
||||
|
||||
static size_t ts_tree__write_char_to_string(char *s, size_t n, int32_t c) {
|
||||
if (c == 0)
|
||||
return snprintf(s, n, "EOF");
|
||||
|
|
|
|||
|
|
@ -22,10 +22,13 @@ typedef struct Tree {
|
|||
} context;
|
||||
|
||||
uint32_t child_count;
|
||||
uint32_t visible_child_count;
|
||||
uint32_t named_child_count;
|
||||
union {
|
||||
struct Tree **children;
|
||||
struct {
|
||||
uint32_t visible_child_count;
|
||||
uint32_t named_child_count;
|
||||
struct Tree **children;
|
||||
};
|
||||
TSExternalTokenState external_token_state;
|
||||
int32_t lookahead_char;
|
||||
};
|
||||
|
||||
|
|
@ -38,7 +41,7 @@ typedef struct Tree {
|
|||
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId lex_state;
|
||||
TSLexMode lex_mode;
|
||||
} first_leaf;
|
||||
|
||||
unsigned short ref_count;
|
||||
|
|
@ -48,6 +51,8 @@ typedef struct Tree {
|
|||
bool fragile_left : 1;
|
||||
bool fragile_right : 1;
|
||||
bool has_changes : 1;
|
||||
bool has_external_tokens : 1;
|
||||
bool has_external_token_state : 1;
|
||||
} Tree;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -81,6 +86,7 @@ void ts_tree_assign_parents(Tree *, TreePath *);
|
|||
void ts_tree_edit(Tree *, const TSInputEdit *edit);
|
||||
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
|
||||
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);
|
||||
const TSExternalTokenState *ts_tree_last_external_token_state(const Tree *);
|
||||
|
||||
static inline uint32_t ts_tree_total_bytes(const Tree *self) {
|
||||
return self->padding.bytes + self->size.bytes;
|
||||
|
|
|
|||
|
|
@ -21,20 +21,20 @@ static void range_array_add(RangeArray *results, TSPoint start, TSPoint end) {
|
|||
}
|
||||
}
|
||||
|
||||
static bool tree_path_descend(TreePath *path, TSPoint position) {
|
||||
static bool tree_path_descend(TreePath *path, Length position) {
|
||||
uint32_t original_size = path->size;
|
||||
|
||||
bool did_descend;
|
||||
do {
|
||||
did_descend = false;
|
||||
TreePathEntry entry = *array_back(path);
|
||||
Length child_position = entry.position;
|
||||
Length child_left = entry.position;
|
||||
for (uint32_t i = 0; i < entry.tree->child_count; i++) {
|
||||
Tree *child = entry.tree->children[i];
|
||||
Length child_right_position =
|
||||
length_add(child_position, ts_tree_total_size(child));
|
||||
if (point_lt(position, child_right_position.extent)) {
|
||||
TreePathEntry child_entry = { child, child_position, i };
|
||||
if (child->visible) {
|
||||
Length child_right = length_add(child_left, ts_tree_total_size(child));
|
||||
if (position.bytes < child_right.bytes) {
|
||||
TreePathEntry child_entry = { child, child_left, i };
|
||||
if (child->visible || child->child_count == 0) {
|
||||
array_push(path, child_entry);
|
||||
return true;
|
||||
} else if (child->visible_child_count > 0) {
|
||||
|
|
@ -43,39 +43,44 @@ static bool tree_path_descend(TreePath *path, TSPoint position) {
|
|||
break;
|
||||
}
|
||||
}
|
||||
child_position = child_right_position;
|
||||
child_left = child_right;
|
||||
}
|
||||
} while (did_descend);
|
||||
|
||||
path->size = original_size;
|
||||
return false;
|
||||
}
|
||||
|
||||
static uint32_t tree_path_advance(TreePath *path) {
|
||||
uint32_t ascend_count = 0;
|
||||
|
||||
while (path->size > 0) {
|
||||
TreePathEntry entry = array_pop(path);
|
||||
if (path->size == 0)
|
||||
break;
|
||||
if (path->size == 0) break;
|
||||
TreePathEntry parent_entry = *array_back(path);
|
||||
if (parent_entry.tree->visible) ascend_count++;
|
||||
Length position =
|
||||
length_add(entry.position, ts_tree_total_size(entry.tree));
|
||||
|
||||
Length position = length_add(entry.position, ts_tree_total_size(entry.tree));
|
||||
for (uint32_t i = entry.child_index + 1; i < parent_entry.tree->child_count; i++) {
|
||||
Tree *next_child = parent_entry.tree->children[i];
|
||||
if (next_child->visible || next_child->visible_child_count > 0) {
|
||||
if (next_child->visible ||
|
||||
next_child->child_count == 0 ||
|
||||
next_child->visible_child_count > 0) {
|
||||
if (parent_entry.tree->visible) ascend_count--;
|
||||
array_push(path, ((TreePathEntry){
|
||||
.tree = next_child,
|
||||
.child_index = i,
|
||||
.position = position,
|
||||
}));
|
||||
if (!next_child->visible)
|
||||
tree_path_descend(path, (TSPoint){ 0, 0 });
|
||||
if (!next_child->visible) {
|
||||
tree_path_descend(path, length_zero());
|
||||
}
|
||||
return ascend_count;
|
||||
}
|
||||
position = length_add(position, ts_tree_total_size(next_child));
|
||||
}
|
||||
}
|
||||
|
||||
return ascend_count;
|
||||
}
|
||||
|
||||
|
|
@ -94,8 +99,27 @@ static void tree_path_init(TreePath *path, Tree *tree) {
|
|||
.position = { 0, 0, { 0, 0 } },
|
||||
.child_index = 0,
|
||||
}));
|
||||
if (!tree->visible)
|
||||
tree_path_descend(path, (TSPoint){ 0, 0 });
|
||||
if (!tree->visible) {
|
||||
tree_path_descend(path, length_zero());
|
||||
}
|
||||
}
|
||||
|
||||
Tree *tree_path_visible_tree(TreePath *self) {
|
||||
for (uint32_t i = self->size - 1; i + 1 > 0; i--) {
|
||||
Tree *tree = self->contents[i].tree;
|
||||
if (tree->visible) return tree;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
Length tree_path_start_position(TreePath *self) {
|
||||
TreePathEntry entry = *array_back(self);
|
||||
return length_add(entry.position, entry.tree->padding);
|
||||
}
|
||||
|
||||
Length tree_path_end_position(TreePath *self) {
|
||||
TreePathEntry entry = *array_back(self);
|
||||
return length_add(length_add(entry.position, entry.tree->padding), entry.tree->size);
|
||||
}
|
||||
|
||||
static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
|
||||
|
|
@ -112,67 +136,59 @@ static bool tree_must_eq(Tree *old_tree, Tree *new_tree) {
|
|||
|
||||
static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
|
||||
TSRange **ranges, uint32_t *range_count) {
|
||||
TSPoint position = { 0, 0 };
|
||||
Length position = length_zero();
|
||||
RangeArray results = array_new();
|
||||
|
||||
while (old_path->size && new_path->size) {
|
||||
bool is_changed = false;
|
||||
TSPoint next_position = position;
|
||||
Length next_position = position;
|
||||
|
||||
TreePathEntry old_entry = *array_back(old_path);
|
||||
TreePathEntry new_entry = *array_back(new_path);
|
||||
Tree *old_tree = old_entry.tree;
|
||||
Tree *new_tree = new_entry.tree;
|
||||
uint32_t old_start_byte = old_entry.position.bytes + old_tree->padding.bytes;
|
||||
uint32_t new_start_byte = new_entry.position.bytes + new_tree->padding.bytes;
|
||||
TSPoint old_start_point =
|
||||
point_add(old_entry.position.extent, old_tree->padding.extent);
|
||||
TSPoint new_start_point =
|
||||
point_add(new_entry.position.extent, new_tree->padding.extent);
|
||||
TSPoint old_end_point = point_add(old_start_point, old_tree->size.extent);
|
||||
TSPoint new_end_point = point_add(new_start_point, new_tree->size.extent);
|
||||
Tree *old_tree = tree_path_visible_tree(old_path);
|
||||
Tree *new_tree = tree_path_visible_tree(new_path);
|
||||
Length old_start = tree_path_start_position(old_path);
|
||||
Length new_start = tree_path_start_position(new_path);
|
||||
Length old_end = tree_path_end_position(old_path);
|
||||
Length new_end = tree_path_end_position(new_path);
|
||||
|
||||
// #define NAME(t) (ts_language_symbol_name(language, ((Tree *)(t))->symbol))
|
||||
// printf("At [%-2lu, %-2lu] Compare (%-20s\t [%-2lu, %-2lu] - [%lu, %lu])\tvs\t(%-20s\t [%lu, %lu] - [%lu, %lu])\n",
|
||||
// position.row, position.column, NAME(old_tree), old_start_point.row,
|
||||
// old_start_point.column, old_end_point.row, old_end_point.column,
|
||||
// NAME(new_tree), new_start_point.row, new_start_point.column,
|
||||
// new_end_point.row, new_end_point.column);
|
||||
// printf("At [%-2u, %-2u] Compare (%-20s\t [%-2u, %-2u] - [%u, %u])\tvs\t(%-20s\t [%u, %u] - [%u, %u])\n",
|
||||
// position.extent.row, position.extent.column,
|
||||
// NAME(old_tree), old_start.extent.row, old_start.extent.column, old_end.extent.row, old_end.extent.column,
|
||||
// NAME(new_tree), new_start.extent.row, new_start.extent.column, new_end.extent.row, new_end.extent.column);
|
||||
|
||||
if (point_lt(position, old_start_point)) {
|
||||
if (point_lt(position, new_start_point)) {
|
||||
next_position = point_min(old_start_point, new_start_point);
|
||||
if (position.bytes < old_start.bytes) {
|
||||
if (position.bytes < new_start.bytes) {
|
||||
next_position = length_min(old_start, new_start);
|
||||
} else {
|
||||
is_changed = true;
|
||||
next_position = old_start_point;
|
||||
next_position = old_start;
|
||||
}
|
||||
} else if (point_lt(position, new_start_point)) {
|
||||
} else if (position.bytes < new_start.bytes) {
|
||||
is_changed = true;
|
||||
next_position = new_start_point;
|
||||
} else if (old_start_byte == new_start_byte &&
|
||||
tree_must_eq(old_tree, new_tree)) {
|
||||
next_position = old_end_point;
|
||||
next_position = new_start;
|
||||
} else if (old_start.bytes == new_start.bytes && tree_must_eq(old_tree, new_tree)) {
|
||||
next_position = old_end;
|
||||
} else if (old_tree->symbol == new_tree->symbol) {
|
||||
if (tree_path_descend(old_path, position)) {
|
||||
if (!tree_path_descend(new_path, position)) {
|
||||
tree_path_ascend(old_path, 1);
|
||||
is_changed = true;
|
||||
next_position = new_end_point;
|
||||
next_position = new_end;
|
||||
}
|
||||
} else if (tree_path_descend(new_path, position)) {
|
||||
tree_path_ascend(new_path, 1);
|
||||
is_changed = true;
|
||||
next_position = old_end_point;
|
||||
next_position = old_end;
|
||||
} else {
|
||||
next_position = point_min(old_end_point, new_end_point);
|
||||
next_position = length_min(old_end, new_end);
|
||||
}
|
||||
} else {
|
||||
is_changed = true;
|
||||
next_position = point_min(old_end_point, new_end_point);
|
||||
next_position = length_min(old_end, new_end);
|
||||
}
|
||||
|
||||
bool at_old_end = point_lte(old_end_point, next_position);
|
||||
bool at_new_end = point_lte(new_end_point, next_position);
|
||||
bool at_old_end = old_end.bytes <= next_position.bytes;
|
||||
bool at_new_end = new_end.bytes <= next_position.bytes;
|
||||
|
||||
if (at_new_end && at_old_end) {
|
||||
uint32_t old_ascend_count = tree_path_advance(old_path);
|
||||
|
|
@ -190,7 +206,7 @@ static void tree_path_get_changes(TreePath *old_path, TreePath *new_path,
|
|||
tree_path_ascend(new_path, ascend_count);
|
||||
}
|
||||
|
||||
if (is_changed) range_array_add(&results, position, next_position);
|
||||
if (is_changed) range_array_add(&results, position.extent, next_position.extent);
|
||||
position = next_position;
|
||||
}
|
||||
|
||||
|
|
|
|||
32
todo.md
32
todo.md
|
|
@ -1,32 +0,0 @@
|
|||
TODO
|
||||
====
|
||||
|
||||
### Handling ambiguity (GLR)
|
||||
* Add a simple way to specify syntactic ambiguity resolutions in the Grammar (e.g. 'prefer declarations to statements' in C), similar to bison's `dprec`
|
||||
construct.
|
||||
|
||||
### Runtime System
|
||||
* Refactoring: make separate symbol for unexpected characters than for interior error nodes.
|
||||
|
||||
### Testing / Quality
|
||||
* Start running the clang-analyzer on the codebase on Travis-CI.
|
||||
* Use the Valgrind leak checker to fix the memory leaks in the runtime library.
|
||||
* Randomize the editing in the language tests, using a seed that can be specified in order to reproduce failures.
|
||||
|
||||
### Ubiquitous token handling
|
||||
* Fix the unintuitive tree that results when ubiquitous tokens are last child of their parent node.
|
||||
|
||||
### Error handling
|
||||
* Use information about nesting depth of tokens like '(' and ')' to make error recovery more accurate.
|
||||
|
||||
### Grammar Features
|
||||
* Regexp assertions
|
||||
- [ ] '^'
|
||||
- [ ] '$'
|
||||
- [ ] '\b'
|
||||
* Composing languages
|
||||
- [ ] Rule for referencing named grammar
|
||||
- [ ] Grammar registry object in runtime
|
||||
- [ ] Parsing returns control to parent language
|
||||
* Indentation tokens
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue