Merge pull request #100 from tree-sitter/fix-randomized-test-failures

Fix some bugs found using randomized tests
This commit is contained in:
Max Brunsfeld 2017-08-31 12:51:15 -07:00 committed by GitHub
commit 7d7e5f2c2c
13 changed files with 293 additions and 303 deletions

View file

@ -11,7 +11,6 @@
'externals/json-parser',
],
'sources': [
'src/compiler/build_tables/build_parse_table.cc',
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_item_transitions.cc',
@ -20,6 +19,7 @@
'src/compiler/build_tables/lookahead_set.cc',
'src/compiler/build_tables/parse_item.cc',
'src/compiler/build_tables/parse_item_set_builder.cc',
'src/compiler/build_tables/parse_table_builder.cc',
'src/compiler/build_tables/rule_can_be_blank.cc',
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',

View file

@ -1,22 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
#define COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_
#include <utility>
#include <vector>
#include "compiler/parse_table.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
std::pair<ParseTable, CompileError> build_parse_table(const SyntaxGrammar &,
const LexicalGrammar &);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_BUILD_PARSE_TABLE_H_

View file

@ -1,7 +1,7 @@
#include "compiler/build_tables/build_tables.h"
#include <tuple>
#include "compiler/build_tables/lex_table_builder.h"
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/syntax_grammar.h"
#include "compiler/lexical_grammar.h"
#include "compiler/compile_error.h"
@ -9,19 +9,25 @@
namespace tree_sitter {
namespace build_tables {
using std::string;
using std::tuple;
using std::vector;
using std::make_tuple;
tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &grammar,
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar
) {
auto parse_table_result = build_parse_table(grammar, lexical_grammar);
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
auto parse_table_builder = ParseTableBuilder::create(
syntax_grammar,
lexical_grammar,
lex_table_builder.get()
);
auto parse_table_result = parse_table_builder->build();
ParseTable parse_table = parse_table_result.first;
const CompileError error = parse_table_result.second;
LexTable lex_table = LexTableBuilder::create(lexical_grammar)->build(&parse_table);
LexTable lex_table = lex_table_builder->build(&parse_table);
return make_tuple(parse_table, lex_table, error);
}

View file

@ -1,9 +1,7 @@
#ifndef COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#define COMPILER_BUILD_TABLES_BUILD_TABLES_H_
#include <string>
#include <tuple>
#include <vector>
#include "compiler/parse_table.h"
#include "compiler/lex_table.h"
#include "compiler/compile_error.h"
@ -16,7 +14,9 @@ struct LexicalGrammar;
namespace build_tables {
std::tuple<ParseTable, LexTable, CompileError> build_tables(
const SyntaxGrammar &, const LexicalGrammar &);
const SyntaxGrammar &,
const LexicalGrammar &
);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -1,16 +0,0 @@
#ifndef COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
#define COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_
namespace tree_sitter {
namespace build_tables {
enum ConflictType {
ConflictTypeNone,
ConflictTypeResolved,
ConflictTypeUnresolved
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_CONFLICT_TYPE_H_

View file

@ -90,6 +90,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
separator_rules.push_back(Blank{});
separator_start_characters = separator_character_aggregator.result;
clear();
}
LexTable build(ParseTable *parse_table) {
@ -105,8 +106,6 @@ class LexTableBuilderImpl : public LexTableBuilder {
bool detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals_by_terminal_index) {
clear();
StartingCharacterAggregator left_starting_characters;
StartingCharacterAggregator right_starting_characters;
left_starting_characters.apply(grammar.variables[left].rule);
@ -144,7 +143,9 @@ class LexTableBuilderImpl : public LexTableBuilder {
current_conflict_detection_token_index = right;
current_conflict_detection_following_characters = following_characters_entry->second;
add_lex_state(item_set_for_terminals({{Symbol::terminal(left), {}}, {Symbol::terminal(right), {}}}));
return current_conflict_value;
bool result = current_conflict_value;
clear();
return result;
}
bool is_keyword(const LexicalVariable &variable) {

View file

@ -1,4 +1,4 @@
#include "compiler/build_tables/build_parse_table.h"
#include "compiler/build_tables/parse_table_builder.h"
#include <algorithm>
#include <map>
#include <set>
@ -26,6 +26,7 @@ using std::map;
using std::move;
using std::string;
using std::to_string;
using std::unique_ptr;
using std::unordered_map;
using rules::Associativity;
using rules::Symbol;
@ -39,7 +40,7 @@ struct ParseStateQueueEntry {
ParseStateId state_id;
};
class ParseTableBuilder {
class ParseTableBuilderImpl : public ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
unordered_map<Symbol, ParseItemSet> recovery_item_sets_by_lookahead;
@ -48,16 +49,21 @@ class ParseTableBuilder {
deque<ParseStateQueueEntry> parse_state_queue;
ParseTable parse_table;
ParseItemSetBuilder item_set_builder;
LexTableBuilder *lex_table_builder;
set<ParseAction> fragile_reductions;
vector<set<Symbol>> incompatible_tokens_by_token_index;
vector<set<Symbol::Index>> following_tokens_by_token_index;
bool processing_recovery_states;
public:
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar)
: grammar(grammar),
lexical_grammar(lex_grammar),
item_set_builder(grammar, lex_grammar),
ParseTableBuilderImpl(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
) : grammar(syntax_grammar),
lexical_grammar(lexical_grammar),
item_set_builder(syntax_grammar, lexical_grammar),
lex_table_builder(lex_table_builder),
incompatible_tokens_by_token_index(lexical_grammar.variables.size()),
following_tokens_by_token_index(lexical_grammar.variables.size()),
processing_recovery_states(false) {}
@ -350,7 +356,6 @@ class ParseTableBuilder {
}
void compute_unmergable_token_pairs() {
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
auto &incompatible_indices = incompatible_tokens_by_token_index[i];
@ -798,9 +803,18 @@ class ParseTableBuilder {
}
};
pair<ParseTable, CompileError> build_parse_table(
const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) {
return ParseTableBuilder(grammar, lex_grammar).build();
unique_ptr<ParseTableBuilder> ParseTableBuilder::create(
const SyntaxGrammar &syntax_grammar,
const LexicalGrammar &lexical_grammar,
LexTableBuilder *lex_table_builder
) {
return unique_ptr<ParseTableBuilder>(
new ParseTableBuilderImpl(syntax_grammar, lexical_grammar, lex_table_builder)
);
}
pair<ParseTable, CompileError> ParseTableBuilder::build() {
return static_cast<ParseTableBuilderImpl *>(this)->build();
}
} // namespace build_tables

View file

@ -0,0 +1,33 @@
#ifndef COMPILER_BUILD_TABLES_PARSE_TABLE_BUILDER_H_
#define COMPILER_BUILD_TABLES_PARSE_TABLE_BUILDER_H_
#include <memory>
#include "compiler/parse_table.h"
#include "compiler/compile_error.h"
namespace tree_sitter {
struct ParseTable;
struct SyntaxGrammar;
struct LexicalGrammar;
namespace build_tables {
class LexTableBuilder;
class ParseTableBuilder {
public:
static std::unique_ptr<ParseTableBuilder> create(
const SyntaxGrammar &,
const LexicalGrammar &,
LexTableBuilder *
);
std::pair<ParseTable, CompileError> build();
protected:
ParseTableBuilder() = default;
};
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_PARSE_TABLE_BUILDER_H_

View file

@ -110,7 +110,6 @@ void ts_lexer_init(Lexer *self) {
.payload = NULL,
.log = NULL
},
.last_external_token = NULL,
};
ts_lexer_reset(self, length_zero());
}
@ -134,7 +133,6 @@ static inline void ts_lexer__reset(Lexer *self, Length position) {
void ts_lexer_set_input(Lexer *self, TSInput input) {
self->input = input;
ts_lexer__reset(self, length_zero());
ts_lexer_set_last_external_token(self, NULL);
}
void ts_lexer_reset(Lexer *self, Length position) {
@ -157,9 +155,3 @@ void ts_lexer_start(Lexer *self) {
void ts_lexer_advance_to_end(Lexer *self) {
while (self->data.lookahead != 0) ts_lexer__advance(self, false);
}
void ts_lexer_set_last_external_token(Lexer *self, Tree *token) {
if (token) ts_tree_retain(token);
if (self->last_external_token) ts_tree_release(self->last_external_token);
self->last_external_token = token;
}

View file

@ -24,7 +24,6 @@ typedef struct {
TSInput input;
TSLogger logger;
char debug_buffer[TREE_SITTER_SERIALIZATION_BUFFER_SIZE];
Tree *last_external_token;
} Lexer;
void ts_lexer_init(Lexer *);
@ -32,7 +31,6 @@ void ts_lexer_set_input(Lexer *, TSInput);
void ts_lexer_reset(Lexer *, Length);
void ts_lexer_start(Lexer *);
void ts_lexer_advance_to_end(Lexer *);
void ts_lexer_set_last_external_token(Lexer *, Tree *);
#ifdef __cplusplus
}

View file

@ -14,16 +14,10 @@
#include "runtime/error_costs.h"
#define LOG(...) \
if (self->lexer.logger.log) { \
if (self->lexer.logger.log || self->print_debugging_graphs) { \
snprintf(self->lexer.debug_buffer, TREE_SITTER_SERIALIZATION_BUFFER_SIZE, __VA_ARGS__); \
self->lexer.logger.log(self->lexer.logger.payload, TSLogTypeParse, \
self->lexer.debug_buffer); \
parser__log(self); \
} \
if (self->print_debugging_graphs) { \
fprintf(stderr, "graph {\nlabel=\""); \
fprintf(stderr, __VA_ARGS__); \
fprintf(stderr, "\"\n}\n\n"); \
}
#define LOG_STACK() \
if (self->print_debugging_graphs) { \
@ -59,9 +53,23 @@ typedef struct {
TSSymbol lookahead_symbol;
} SkipPrecedingTreesSession;
static void parser__push(Parser *self, StackVersion version, Tree *tree, TSStateId state) {
ts_stack_push(self->stack, version, tree, false, state);
ts_tree_release(tree);
static void parser__log(Parser *self) {
if (self->lexer.logger.log) {
self->lexer.logger.log(
self->lexer.logger.payload,
TSLogTypeParse,
self->lexer.debug_buffer
);
}
if (self->print_debugging_graphs) {
fprintf(stderr, "graph {\nlabel=\"");
for (char *c = &self->lexer.debug_buffer[0]; *c != 0; c++) {
if (*c == '"') fputc('\\', stderr);
fputc(*c, stderr);
}
fprintf(stderr, "\"\n}\n\n");
}
}
static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
@ -95,7 +103,8 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
for (uint32_t j = 1; j < slice.trees.size; j++) {
Tree *tree = slice.trees.contents[j];
parser__push(self, slice.version, tree, state);
ts_stack_push(self->stack, slice.version, tree, false, state);
ts_tree_release(tree);
}
LOG("breakdown_top_of_stack tree:%s", SYM_NAME(parent->symbol));
@ -111,66 +120,35 @@ static bool parser__breakdown_top_of_stack(Parser *self, StackVersion version) {
return did_break_down;
}
static bool parser__breakdown_lookahead(Parser *self, Tree **lookahead,
static void parser__breakdown_lookahead(Parser *self, Tree **lookahead,
TSStateId state,
ReusableNode *reusable_node) {
bool result = false;
while (reusable_node->tree->child_count > 0 &&
(self->is_split || reusable_node->tree->parse_state != state ||
reusable_node->tree->fragile_left ||
reusable_node->tree->fragile_right)) {
bool did_break_down = false;
while (reusable_node->tree->child_count > 0 && reusable_node->tree->parse_state != state) {
LOG("state_mismatch sym:%s", SYM_NAME(reusable_node->tree->symbol));
reusable_node_breakdown(reusable_node);
result = true;
did_break_down = true;
}
if (result) {
if (did_break_down) {
ts_tree_release(*lookahead);
ts_tree_retain(*lookahead = reusable_node->tree);
}
return result;
}
static inline bool ts_lex_mode_eq(TSLexMode self, TSLexMode other) {
return self.lex_state == other.lex_state &&
self.external_lex_state == other.external_lex_state;
}
static bool parser__can_reuse(Parser *self, TSStateId state, Tree *tree,
TableEntry *table_entry) {
TSLexMode current_lex_mode = self->language->lex_modes[state];
if (ts_lex_mode_eq(tree->first_leaf.lex_mode, current_lex_mode))
return true;
if (current_lex_mode.external_lex_state != 0)
return false;
if (tree->size.bytes == 0)
return false;
if (!table_entry->is_reusable)
return false;
if (!table_entry->depends_on_lookahead)
return true;
return tree->child_count > 1 && tree->error_cost == 0;
}
typedef int CondenseResult;
static int CondenseResultMadeChange = 1;
static int CondenseResultAllVersionsHadError = 2;
static CondenseResult parser__condense_stack(Parser *self) {
CondenseResult result = 0;
bool has_version_without_errors = false;
static bool parser__condense_stack(Parser *self) {
bool all_versions_have_error = true;
unsigned old_version_count = ts_stack_version_count(self->stack);
for (StackVersion i = 0; i < ts_stack_version_count(self->stack); i++) {
if (ts_stack_is_halted(self->stack, i)) {
ts_stack_remove_version(self->stack, i);
result |= CondenseResultMadeChange;
i--;
continue;
}
ErrorStatus right_error_status = ts_stack_error_status(self->stack, i);
if (right_error_status.count == 0) has_version_without_errors = true;
if (right_error_status.count == 0) all_versions_have_error = false;
for (StackVersion j = 0; j < i; j++) {
bool can_merge = ts_stack_can_merge(self->stack, i, j);
@ -179,14 +157,12 @@ static CondenseResult parser__condense_stack(Parser *self) {
switch (error_status_compare(left_error_status, right_error_status, can_merge)) {
case ErrorComparisonTakeLeft:
ts_stack_remove_version(self->stack, i);
result |= CondenseResultMadeChange;
i--;
j = i;
break;
case ErrorComparisonTakeRight:
ts_stack_remove_version(self->stack, j);
result |= CondenseResultMadeChange;
i--;
j--;
break;
@ -194,7 +170,6 @@ static CondenseResult parser__condense_stack(Parser *self) {
case ErrorComparisonPreferLeft:
if (can_merge) {
ts_stack_force_merge(self->stack, j, i);
result |= CondenseResultMadeChange;
i--;
j = i;
}
@ -203,42 +178,39 @@ static CondenseResult parser__condense_stack(Parser *self) {
case ErrorComparisonPreferRight:
if (can_merge) {
ts_stack_remove_version(self->stack, j);
result |= CondenseResultMadeChange;
i--;
j--;
} else {
ts_stack_swap_versions(self->stack, i, j);
j = i;
result |= CondenseResultMadeChange;
}
break;
case ErrorComparisonNone:
if (can_merge) {
ts_stack_force_merge(self->stack, j, i);
result |= CondenseResultMadeChange;
i--;
break;
}
break;
}
}
}
while (ts_stack_version_count(self->stack) > MAX_VERSION_COUNT) {
ts_stack_remove_version(self->stack, MAX_VERSION_COUNT);
result |= CondenseResultMadeChange;
}
if (!has_version_without_errors && ts_stack_version_count(self->stack) > 0) {
result |= CondenseResultAllVersionsHadError;
unsigned new_version_count = ts_stack_version_count(self->stack);
if (new_version_count != old_version_count) {
LOG("condense");
LOG_STACK();
}
return result;
return all_versions_have_error && new_version_count > 0;
}
static void parser__restore_external_scanner(Parser *self, Tree *external_token) {
LOG("restore_external_scanner");
ts_lexer_set_last_external_token(&self->lexer, external_token);
if (external_token) {
self->language->external_scanner.deserialize(
self->external_scanner_payload,
@ -253,6 +225,7 @@ static void parser__restore_external_scanner(Parser *self, Tree *external_token)
static Tree *parser__lex(Parser *self, StackVersion version) {
TSStateId parse_state = ts_stack_top_state(self->stack, version);
Length start_position = ts_stack_top_position(self->stack, version);
Tree *external_token = ts_stack_last_external_token(self->stack, version);
TSLexMode lex_mode = self->language->lex_modes[parse_state];
const bool *valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
@ -260,50 +233,54 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
);
bool found_external_token = false;
bool found_error = false;
bool skipped_error = false;
int32_t first_error_character = 0;
Length error_start_position, error_end_position;
uint32_t last_byte_scanned = start_position.bytes;
ts_lexer_reset(&self->lexer, start_position);
for (;;) {
Length current_position = self->lexer.current_position;
if (valid_external_tokens) {
LOG("lex_external state:%d, row:%u, column:%u", lex_mode.external_lex_state,
current_position.extent.row, current_position.extent.column);
Tree *external_token = ts_stack_last_external_token(self->stack, version);
if (!ts_tree_external_token_state_eq(self->lexer.last_external_token, external_token)) {
parser__restore_external_scanner(self, external_token);
}
LOG(
"lex_external state:%d, row:%u, column:%u",
lex_mode.external_lex_state,
current_position.extent.row,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
found_external_token = self->language->external_scanner.scan(
parser__restore_external_scanner(self, external_token);
if (self->language->external_scanner.scan(
self->external_scanner_payload,
&self->lexer.data,
valid_external_tokens
);
if (found_external_token) {
)) {
if (length_has_unknown_chars(self->lexer.token_end_position)) {
self->lexer.token_end_position = self->lexer.current_position;
}
// Don't allow zero-length external tokens durring error recovery.
if (lex_mode.lex_state == 0 && self->lexer.token_end_position.bytes <= current_position.bytes) {
parser__restore_external_scanner(self, external_token);
found_external_token = false;
if (lex_mode.lex_state == ERROR_STATE &&
self->lexer.token_end_position.bytes <= current_position.bytes) {
LOG("disregard_empty_token");
} else {
found_external_token = true;
break;
}
}
if (self->lexer.current_position.bytes > last_byte_scanned) {
last_byte_scanned = self->lexer.current_position.bytes;
}
ts_lexer_reset(&self->lexer, current_position);
}
LOG("lex_internal state:%d, row:%u, column:%u", lex_mode.lex_state,
current_position.extent.row, current_position.extent.column);
LOG(
"lex_internal state:%d, row:%u, column:%u",
lex_mode.lex_state,
current_position.extent.row,
current_position.extent.column
);
ts_lexer_start(&self->lexer);
if (self->language->lex_fn(&self->lexer.data, lex_mode.lex_state)) {
if (length_has_unknown_chars(self->lexer.token_end_position)) {
@ -312,14 +289,16 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
break;
}
if (!found_error) {
if (lex_mode.lex_state != self->language->lex_modes[ERROR_STATE].lex_state) {
LOG("retry_in_error_mode");
found_error = true;
lex_mode = self->language->lex_modes[ERROR_STATE];
valid_external_tokens = ts_language_enabled_external_tokens(
self->language,
lex_mode.external_lex_state
);
if (self->lexer.current_position.bytes > last_byte_scanned) {
last_byte_scanned = self->lexer.current_position.bytes;
}
ts_lexer_reset(&self->lexer, start_position);
continue;
}
@ -365,11 +344,13 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
self->lexer.debug_buffer
);
ts_external_token_state_init(&result->external_token_state, self->lexer.debug_buffer, length);
ts_lexer_set_last_external_token(&self->lexer, result);
}
}
result->bytes_scanned = self->lexer.current_position.bytes - start_position.bytes + 1;
if (self->lexer.current_position.bytes > last_byte_scanned) {
last_byte_scanned = self->lexer.current_position.bytes;
}
result->bytes_scanned = last_byte_scanned - start_position.bytes + 1;
result->parse_state = parse_state;
result->first_leaf.lex_mode = lex_mode;
@ -377,75 +358,114 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
return result;
}
static void parser__clear_cached_token(Parser *self) {
if (self->cached_token) ts_tree_release(self->cached_token);
self->cached_token = NULL;
static Tree *parser__get_cached_token(Parser *self, size_t byte_index, Tree *last_external_token) {
TokenCache *cache = &self->token_cache;
if (cache->token &&
cache->byte_index == byte_index &&
ts_tree_external_token_state_eq(cache->last_external_token, last_external_token)) {
return cache->token;
} else {
return NULL;
}
}
static Tree *parser__get_lookahead(Parser *self, StackVersion version,
ReusableNode *reusable_node,
bool *is_fresh) {
Length position = ts_stack_top_position(self->stack, version);
static void parser__set_cached_token(Parser *self, size_t byte_index, Tree *last_external_token,
Tree *token) {
TokenCache *cache = &self->token_cache;
if (token) ts_tree_retain(token);
if (last_external_token) ts_tree_retain(last_external_token);
if (cache->token) ts_tree_release(cache->token);
if (cache->last_external_token) ts_tree_release(cache->last_external_token);
cache->token = token;
cache->byte_index = byte_index;
cache->last_external_token = last_external_token;
}
while (reusable_node->tree) {
static bool parser__can_reuse_first_leaf(Parser *self, TSStateId state, Tree *tree,
TableEntry *table_entry) {
TSLexMode current_lex_mode = self->language->lex_modes[state];
return
(tree->first_leaf.lex_mode.lex_state == current_lex_mode.lex_state &&
tree->first_leaf.lex_mode.external_lex_state == current_lex_mode.external_lex_state) ||
(current_lex_mode.external_lex_state == 0 &&
tree->size.bytes > 0 &&
table_entry->is_reusable &&
(!table_entry->depends_on_lookahead || (tree->child_count > 1 && tree->error_cost == 0)));
}
static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId *state,
ReusableNode *reusable_node, TableEntry *table_entry) {
Length position = ts_stack_top_position(self->stack, version);
Tree *last_external_token = ts_stack_last_external_token(self->stack, version);
Tree *result;
while ((result = reusable_node->tree)) {
if (reusable_node->byte_index > position.bytes) {
LOG("before_reusable_node sym:%s", SYM_NAME(reusable_node->tree->symbol));
LOG("before_reusable_node symbol:%s", SYM_NAME(result->symbol));
break;
}
if (reusable_node->byte_index < position.bytes) {
LOG("past_reusable sym:%s", SYM_NAME(reusable_node->tree->symbol));
LOG("past_reusable_node symbol:%s", SYM_NAME(result->symbol));
reusable_node_pop(reusable_node);
continue;
}
if (reusable_node->tree->has_changes) {
LOG("cant_reuse_changed tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!ts_tree_external_token_state_eq(reusable_node->last_external_token, last_external_token)) {
LOG("reusable_node_has_different_external_scanner_state symbol:%s", SYM_NAME(result->symbol));
reusable_node_pop(reusable_node);
continue;
}
const char *reason = NULL;
if (result->has_changes) {
reason = "has_changes";
} else if (result->symbol == ts_builtin_sym_error) {
reason = "is_error";
} else if (result->fragile_left || result->fragile_right) {
reason = "is_fragile";
} else if (self->in_ambiguity && result->child_count) {
reason = "in_ambiguity";
}
if (reason) {
LOG("cant_reuse_node_%s tree:%s", reason, SYM_NAME(result->symbol));
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
*state = ts_stack_top_state(self->stack, version);
}
continue;
}
if (reusable_node->tree->symbol == ts_builtin_sym_error) {
LOG("cant_reuse_error tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
}
continue;
ts_language_table_entry(self->language, *state, result->first_leaf.symbol, table_entry);
if (!parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
LOG(
"cant_reuse_node symbol:%s, first_leaf_symbol:%s",
SYM_NAME(result->symbol),
SYM_NAME(result->first_leaf.symbol)
);
reusable_node_pop_leaf(reusable_node);
break;
}
if (!ts_tree_external_token_state_eq(
reusable_node->preceding_external_token,
ts_stack_last_external_token(self->stack, version))) {
LOG("cant_reuse_external_tokens tree:%s, size:%u",
SYM_NAME(reusable_node->tree->symbol),
reusable_node->tree->size.bytes);
if (!reusable_node_breakdown(reusable_node)) {
reusable_node_pop(reusable_node);
parser__breakdown_top_of_stack(self, version);
}
continue;
}
Tree *result = reusable_node->tree;
LOG("reuse_node symbol:%s", SYM_NAME(result->symbol));
ts_tree_retain(result);
return result;
}
if (self->cached_token && position.bytes == self->cached_token_byte_index) {
ts_tree_retain(self->cached_token);
return self->cached_token;
if ((result = parser__get_cached_token(self, position.bytes, last_external_token))) {
ts_language_table_entry(self->language, *state, result->first_leaf.symbol, table_entry);
if (parser__can_reuse_first_leaf(self, *state, result, table_entry)) {
ts_tree_retain(result);
return result;
}
}
*is_fresh = true;
return parser__lex(self, version);
result = parser__lex(self, version);
parser__set_cached_token(self, position.bytes, last_external_token, result);
ts_language_table_entry(self->language, *state, result->symbol, table_entry);
return result;
}
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
@ -500,9 +520,7 @@ static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
static bool parser__better_version_exists(Parser *self, StackVersion version,
ErrorStatus my_error_status) {
if (self->finished_tree &&
self->finished_tree->error_cost <= my_error_status.cost)
return true;
if (self->finished_tree && self->finished_tree->error_cost <= my_error_status.cost) return true;
for (StackVersion i = 0, n = ts_stack_version_count(self->stack); i < n; i++) {
if (i == version || ts_stack_is_halted(self->stack, i)) continue;
@ -612,7 +630,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
if (fragile || self->in_ambiguity || pop.slices.size > 1 || initial_version_count > 1) {
parent->fragile_left = true;
parent->fragile_right = true;
parent->parse_state = TS_TREE_STATE_NONE;
@ -640,10 +658,12 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
// Push the parent node onto the stack, along with any extra tokens that
// were previously on top of the stack.
parser__push(self, slice.version, parent, next_state);
ts_stack_push(self->stack, slice.version, parent, false, next_state);
ts_tree_release(parent);
for (uint32_t j = parent->child_count; j < slice.trees.size; j++) {
Tree *tree = slice.trees.contents[j];
parser__push(self, slice.version, tree, next_state);
ts_stack_push(self->stack, slice.version, tree, false, next_state);
ts_tree_release(tree);
}
}
@ -659,15 +679,13 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
return pop;
}
static const TSParseAction *parser__reductions_after_sequence(
Parser *self,
TSStateId start_state,
const TreeArray *trees_below,
uint32_t tree_count_below,
const TreeArray *trees_above,
TSSymbol lookahead_symbol,
uint32_t *count
) {
static const TSParseAction *parser__reductions_after_sequence(Parser *self,
TSStateId start_state,
const TreeArray *trees_below,
uint32_t tree_count_below,
const TreeArray *trees_above,
TSSymbol lookahead_symbol,
uint32_t *count) {
TSStateId state = start_state;
uint32_t child_count = 0;
*count = 0;
@ -850,7 +868,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
repair.symbol, children.size, children.contents,
repair.alias_sequence_id, self->language
);
parser__push(self, slice.version, parent, next_state);
ts_stack_push(self->stack, slice.version, parent, false, next_state);
ts_tree_release(parent);
ts_stack_decrease_push_count(self->stack, slice.version, error->child_count);
ErrorStatus error_status = ts_stack_error_status(self->stack, slice.version);
@ -879,7 +898,6 @@ static void parser__start(Parser *self, TSInput input, Tree *previous_tree) {
ts_lexer_set_input(&self->lexer, input);
ts_stack_clear(self->stack);
self->reusable_node = reusable_node_new(previous_tree);
parser__clear_cached_token(self);
self->finished_tree = NULL;
}
@ -1033,7 +1051,8 @@ static bool parser__skip_preceding_trees(Parser *self, StackVersion version,
Tree *error = ts_tree_make_error_node(&slice.trees, self->language);
error->extra = true;
TSStateId state = ts_stack_top_state(self->stack, slice.version);
parser__push(self, slice.version, error, state);
ts_stack_push(self->stack, slice.version, error, false, state);
ts_tree_release(error);
}
return pop.slices.size > 0;
@ -1097,11 +1116,13 @@ static void parser__halt_parse(Parser *self) {
Tree *filler_node = ts_tree_make_error(remaining_length, length_zero(), 0, self->language);
filler_node->visible = false;
parser__push(self, 0, filler_node, 0);
ts_stack_push(self->stack, 0, filler_node, false, 0);
ts_tree_release(filler_node);
TreeArray children = array_new();
Tree *root_error = ts_tree_make_error_node(&children, self->language);
parser__push(self, 0, root_error, 0);
ts_stack_push(self->stack, 0, root_error, false, 0);
ts_tree_release(root_error);
Tree *eof = ts_tree_make_leaf(ts_builtin_sym_end, length_zero(), length_zero(), self->language);
parser__accept(self, 0, eof);
@ -1114,7 +1135,8 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
LOG("recover_eof");
TreeArray children = array_new();
Tree *parent = ts_tree_make_error_node(&children, self->language);
parser__push(self, version, parent, 1);
ts_stack_push(self->stack, version, parent, false, 1);
ts_tree_release(parent);
parser__accept(self, version, lookahead);
return;
}
@ -1135,34 +1157,12 @@ static void parser__recover(Parser *self, StackVersion version, TSStateId state,
parser__shift(self, version, state, lookahead, false);
}
static void parser__advance(Parser *self, StackVersion version,
ReusableNode *reusable_node) {
bool validated_lookahead = false;
Tree *lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
static void parser__advance(Parser *self, StackVersion version, ReusableNode *reusable_node) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry table_entry;
Tree *lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry);
for (;;) {
TSStateId state = ts_stack_top_state(self->stack, version);
TableEntry table_entry;
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
if (!validated_lookahead) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
if (lookahead == reusable_node->tree) {
reusable_node_pop_leaf(reusable_node);
} else {
parser__clear_cached_token(self);
}
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
continue;
}
validated_lookahead = true;
LOG("reused_lookahead sym:%s, size:%u", SYM_NAME(lookahead->symbol), lookahead->size.bytes);
}
bool reduction_stopped_at_error = false;
StackVersion last_reduction_version = STACK_VERSION_NONE;
@ -1181,30 +1181,18 @@ static void parser__advance(Parser *self, StackVersion version,
}
if (lookahead->child_count > 0) {
if (parser__breakdown_lookahead(self, &lookahead, state, reusable_node)) {
if (!parser__can_reuse(self, state, lookahead, &table_entry)) {
reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
lookahead = parser__get_lookahead(self, version, reusable_node, &validated_lookahead);
}
}
parser__breakdown_lookahead(self, &lookahead, state, reusable_node);
next_state = ts_language_next_state(self->language, state, lookahead->symbol);
}
parser__shift(self, version, next_state, lookahead, action.params.extra);
if (lookahead == reusable_node->tree) {
reusable_node_pop(reusable_node);
}
if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
return;
}
case TSParseActionTypeReduce: {
if (reduction_stopped_at_error) continue;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
StackPopResult reduction = parser__reduce(
self, version, action.params.symbol, action.params.child_count,
@ -1214,19 +1202,16 @@ static void parser__advance(Parser *self, StackVersion version,
StackSlice slice = *array_front(&reduction.slices);
if (reduction.stopped_at_error) {
reduction_stopped_at_error = true;
if (!parser__repair_error(self, slice, lookahead->first_leaf.symbol,
table_entry))
if (!parser__repair_error(self, slice, lookahead->first_leaf.symbol, table_entry)) {
break;
}
}
last_reduction_version = slice.version;
break;
}
case TSParseActionTypeAccept: {
if (ts_stack_error_status(self->stack, version).count > 0)
continue;
if (ts_stack_error_status(self->stack, version).count > 0) continue;
LOG("accept");
parser__accept(self, version, lookahead);
ts_tree_release(lookahead);
@ -1242,9 +1227,7 @@ static void parser__advance(Parser *self, StackVersion version,
}
parser__recover(self, version, action.params.state, lookahead);
if (lookahead == reusable_node->tree) {
reusable_node_pop(reusable_node);
}
if (lookahead == reusable_node->tree) reusable_node_pop(reusable_node);
ts_tree_release(lookahead);
return;
}
@ -1254,24 +1237,26 @@ static void parser__advance(Parser *self, StackVersion version,
if (last_reduction_version != STACK_VERSION_NONE) {
ts_stack_renumber_version(self->stack, last_reduction_version, version);
LOG_STACK();
continue;
} else if (!parser__breakdown_top_of_stack(self, version)) {
if (state == ERROR_STATE) {
ts_stack_push(self->stack, version, lookahead, false, ERROR_STATE);
ts_tree_release(lookahead);
return;
}
parser__handle_error(self, version, lookahead->first_leaf.symbol);
if (ts_stack_is_halted(self->stack, version)) {
ts_tree_release(lookahead);
return;
} else if (lookahead->size.bytes == 0) {
ts_tree_release(lookahead);
state = ts_stack_top_state(self->stack, version);
lookahead = parser__get_lookahead(self, version, &state, reusable_node, &table_entry);
}
}
if (parser__breakdown_top_of_stack(self, version)) {
continue;
}
if (state == ERROR_STATE) {
parser__push(self, version, lookahead, ERROR_STATE);
return;
}
parser__handle_error(self, version, lookahead->first_leaf.symbol);
if (ts_stack_is_halted(self->stack, version)) {
ts_tree_release(lookahead);
return;
}
state = ts_stack_top_state(self->stack, version);
ts_language_table_entry(self->language, state, lookahead->first_leaf.symbol, &table_entry);
}
}
@ -1283,6 +1268,7 @@ bool parser_init(Parser *self) {
array_grow(&self->reduce_actions, 4);
self->stack = ts_stack_new();
self->finished_tree = NULL;
parser__set_cached_token(self, 0, NULL, NULL);
return true;
}
@ -1307,7 +1293,6 @@ void parser_destroy(Parser *self) {
array_delete(&self->tree_path1);
if (self->tree_path2.contents)
array_delete(&self->tree_path2);
ts_lexer_set_last_external_token(&self->lexer, NULL);
parser_set_language(self, NULL);
}
@ -1341,24 +1326,19 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
self->reusable_node = reusable_node;
CondenseResult condense_result = parser__condense_stack(self);
if (halt_on_error && (condense_result & CondenseResultAllVersionsHadError)) {
bool all_versions_have_error = parser__condense_stack(self);
if (halt_on_error && all_versions_have_error) {
parser__halt_parse(self);
break;
}
if (condense_result & CondenseResultMadeChange) {
LOG("condense");
LOG_STACK();
}
self->is_split = (version > 1);
self->in_ambiguity = version > 1;
} while (version != 0);
LOG("done");
LOG_TREE();
ts_stack_clear(self->stack);
parser__clear_cached_token(self);
parser__set_cached_token(self, 0, NULL, NULL);
ts_tree_assign_parents(self->finished_tree, &self->tree_path1, self->language);
return self->finished_tree;
}

View file

@ -11,22 +11,26 @@ extern "C" {
#include "runtime/reusable_node.h"
#include "runtime/reduce_action.h"
typedef struct {
Tree *token;
Tree *last_external_token;
uint32_t byte_index;
} TokenCache;
typedef struct {
Lexer lexer;
Stack *stack;
const TSLanguage *language;
ReduceActionSet reduce_actions;
Tree *finished_tree;
bool is_split;
bool print_debugging_graphs;
Tree scratch_tree;
Tree *cached_token;
uint32_t cached_token_byte_index;
TokenCache token_cache;
ReusableNode reusable_node;
TreePath tree_path1;
TreePath tree_path2;
void *external_scanner_payload;
Tree *last_external_token;
bool in_ambiguity;
bool print_debugging_graphs;
} Parser;
bool parser_init(Parser *);

View file

@ -3,7 +3,7 @@
typedef struct {
Tree *tree;
uint32_t byte_index;
Tree *preceding_external_token;
Tree *last_external_token;
} ReusableNode;
static inline ReusableNode reusable_node_new(Tree *tree) {
@ -14,7 +14,7 @@ static inline ReusableNode reusable_node_new(Tree *tree) {
static inline void reusable_node_pop(ReusableNode *self) {
self->byte_index += ts_tree_total_bytes(self->tree);
if (self->tree->has_external_tokens) {
self->preceding_external_token = ts_tree_last_external_token(self->tree);
self->last_external_token = ts_tree_last_external_token(self->tree);
}
while (self->tree) {