Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Amaan Qureshi
1a99bfd9ff
wow 2024-09-26 17:51:46 -04:00
6 changed files with 891 additions and 710 deletions

View file

@ -272,6 +272,7 @@ static inline void ts_language_write_symbol_as_dot_string(
TSSymbol symbol TSSymbol symbol
) { ) {
const char *name = ts_language_symbol_name(self, symbol); const char *name = ts_language_symbol_name(self, symbol);
printf("name: %s\n", name);
for (const char *chr = name; *chr; chr++) { for (const char *chr = name; *chr; chr++) {
switch (*chr) { switch (*chr) {
case '"': case '"':

View file

@ -206,12 +206,18 @@ static bool ts_parser__breakdown_top_of_stack(
} }
ts_subtree_retain(child); ts_subtree_retain(child);
ts_stack_push(self->stack, slice.version, child, pending, state); ts_stack_push(self->stack, slice.version, child, pending, state, self->language);
LOG("push 1");
printf("push 1\n");
LOG_STACK();
} }
for (uint32_t j = 1; j < slice.subtrees.size; j++) { for (uint32_t j = 1; j < slice.subtrees.size; j++) {
Subtree tree = slice.subtrees.contents[j]; Subtree tree = slice.subtrees.contents[j];
ts_stack_push(self->stack, slice.version, tree, false, state); ts_stack_push(self->stack, slice.version, tree, false, state, self->language);
LOG("push 2");
printf("push 2\n");
LOG_STACK();
} }
ts_subtree_release(&self->tree_pool, parent); ts_subtree_release(&self->tree_pool, parent);
@ -913,7 +919,10 @@ static void ts_parser__shift(
subtree_to_push = ts_subtree_from_mut(result); subtree_to_push = ts_subtree_from_mut(result);
} }
ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state); ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state, self->language);
LOG("push 3");
printf("push 3\n");
LOG_STACK();
if (ts_subtree_has_external_tokens(subtree_to_push)) { if (ts_subtree_has_external_tokens(subtree_to_push)) {
ts_stack_set_last_external_token( ts_stack_set_last_external_token(
self->stack, version, ts_subtree_last_external_token(subtree_to_push) self->stack, version, ts_subtree_last_external_token(subtree_to_push)
@ -1016,9 +1025,15 @@ static StackVersion ts_parser__reduce(
// Push the parent node onto the stack, along with any extra tokens that // Push the parent node onto the stack, along with any extra tokens that
// were previously on top of the stack. // were previously on top of the stack.
ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state); ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state, self->language);
LOG("push 4");
printf("push 4\n");
LOG_STACK();
for (uint32_t j = 0; j < self->trailing_extras.size; j++) { for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state); ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state, self->language);
LOG("push 5");
printf("push 5\n");
LOG_STACK();
} }
for (StackVersion j = 0; j < slice_version; j++) { for (StackVersion j = 0; j < slice_version; j++) {
@ -1042,9 +1057,15 @@ static void ts_parser__accept(
Subtree lookahead Subtree lookahead
) { ) {
assert(ts_subtree_is_eof(lookahead)); assert(ts_subtree_is_eof(lookahead));
ts_stack_push(self->stack, version, lookahead, false, 1); ts_stack_push(self->stack, version, lookahead, false, 1, self->language);
LOG("push 6");
printf("push 6\n");
LOG_STACK();
StackSliceArray pop = ts_stack_pop_all(self->stack, version); LOG("POP ALL")
printf("POP ALL\n");
StackSliceArray pop = ts_stack_pop_all(self->stack, version, self->dot_graph_file);
LOG_STACK();
for (uint32_t i = 0; i < pop.size; i++) { for (uint32_t i = 0; i < pop.size; i++) {
SubtreeArray trees = pop.contents[i].subtrees; SubtreeArray trees = pop.contents[i].subtrees;
@ -1222,14 +1243,20 @@ static bool ts_parser__recover_to_state(
if (slice.subtrees.size > 0) { if (slice.subtrees.size > 0) {
Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language); Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
ts_stack_push(self->stack, slice.version, error, false, goal_state); ts_stack_push(self->stack, slice.version, error, false, goal_state, self->language);
LOG("push 7");
printf("push 7\n");
LOG_STACK();
} else { } else {
array_delete(&slice.subtrees); array_delete(&slice.subtrees);
} }
for (unsigned j = 0; j < self->trailing_extras.size; j++) { for (unsigned j = 0; j < self->trailing_extras.size; j++) {
Subtree tree = self->trailing_extras.contents[j]; Subtree tree = self->trailing_extras.contents[j];
ts_stack_push(self->stack, slice.version, tree, false, goal_state); ts_stack_push(self->stack, slice.version, tree, false, goal_state, self->language);
LOG("push 8");
printf("push 8\n");
LOG_STACK();
} }
previous_version = slice.version; previous_version = slice.version;
@ -1339,7 +1366,10 @@ static void ts_parser__recover(
LOG("recover_eof"); LOG("recover_eof");
SubtreeArray children = array_new(); SubtreeArray children = array_new();
Subtree parent = ts_subtree_new_error_node(&children, false, self->language); Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
ts_stack_push(self->stack, version, parent, false, 1); ts_stack_push(self->stack, version, parent, false, 1, self->language);
LOG("push 9");
printf("push 9\n");
LOG_STACK();
ts_parser__accept(self, version, lookahead); ts_parser__accept(self, version, lookahead);
return; return;
} }
@ -1407,7 +1437,10 @@ static void ts_parser__recover(
} }
// Push the new ERROR onto the stack. // Push the new ERROR onto the stack.
ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE); ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE, self->language);
LOG("push 10");
printf("push 10\n");
LOG_STACK();
if (ts_subtree_has_external_tokens(lookahead)) { if (ts_subtree_has_external_tokens(lookahead)) {
ts_stack_set_last_external_token( ts_stack_set_last_external_token(
self->stack, version, ts_subtree_last_external_token(lookahead) self->stack, version, ts_subtree_last_external_token(lookahead)
@ -1469,8 +1502,11 @@ static void ts_parser__handle_error(
ts_stack_push( ts_stack_push(
self->stack, version_with_missing_tree, self->stack, version_with_missing_tree,
missing_tree, false, missing_tree, false,
state_after_missing_symbol state_after_missing_symbol, self->language
); );
LOG("push 11");
printf("push 11\n");
LOG_STACK();
if (ts_parser__do_all_potential_reductions( if (ts_parser__do_all_potential_reductions(
self, version_with_missing_tree, self, version_with_missing_tree,
@ -1488,7 +1524,10 @@ static void ts_parser__handle_error(
} }
} }
ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE); ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE, self->language);
LOG("push 12");
printf("push 12\n");
LOG_STACK();
v = (v == version) ? previous_version_count : v + 1; v = (v == version) ? previous_version_count : v + 1;
} }
@ -1847,7 +1886,7 @@ TSParser *ts_parser_new(void) {
array_init(&self->reduce_actions); array_init(&self->reduce_actions);
array_reserve(&self->reduce_actions, 4); array_reserve(&self->reduce_actions, 4);
self->tree_pool = ts_subtree_pool_new(32); self->tree_pool = ts_subtree_pool_new(32);
self->stack = ts_stack_new(&self->tree_pool); self->stack = ts_stack_new(&self->tree_pool, self->language);
self->finished_tree = NULL_SUBTREE; self->finished_tree = NULL_SUBTREE;
self->reusable_node = reusable_node_new(); self->reusable_node = reusable_node_new();
self->dot_graph_file = NULL; self->dot_graph_file = NULL;
@ -1915,6 +1954,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
} }
self->language = ts_language_copy(language); self->language = ts_language_copy(language);
ts_stack_set_language(self->stack, ts_language_copy(language));
ts_stack_set_lexer(self->stack, &self->lexer);
return true; return true;
} }

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@ extern "C" {
#include "./array.h" #include "./array.h"
#include "./subtree.h" #include "./subtree.h"
#include "./lexer.h"
#include "./error_costs.h" #include "./error_costs.h"
#include <stdio.h> #include <stdio.h>
@ -29,7 +30,7 @@ typedef struct {
typedef Array(StackSummaryEntry) StackSummary; typedef Array(StackSummaryEntry) StackSummary;
// Create a stack. // Create a stack.
Stack *ts_stack_new(SubtreePool *); Stack *ts_stack_new(SubtreePool *, const TSLanguage*);
// Release the memory reserved for a given stack. // Release the memory reserved for a given stack.
void ts_stack_delete(Stack *); void ts_stack_delete(Stack *);
@ -47,6 +48,10 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion);
// Set the last external token associated with a given version of the stack. // Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree ); void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
void ts_stack_set_language(Stack *self, const TSLanguage *language);
void ts_stack_set_lexer(Stack *self, Lexer *lexer);
// Get the position of the given version of the stack within the document. // Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion); Length ts_stack_position(const Stack *, StackVersion);
@ -55,7 +60,7 @@ Length ts_stack_position(const Stack *, StackVersion);
// This transfers ownership of the tree to the Stack. Callers that // This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should // need to retain ownership of the tree for their own purposes should
// first retain the tree. // first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId); void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId, const TSLanguage*);
// Pop the given number of entries from the given version of the stack. This // Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple // operation can increase the number of stack versions by revealing multiple
@ -71,7 +76,7 @@ SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion); StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
// Remove any all trees from the given version of the stack. // Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion); StackSliceArray ts_stack_pop_all(Stack *, StackVersion, FILE*);
// Get the maximum number of tree nodes reachable from this version of the stack // Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected. // since the last error was detected.

View file

@ -22,6 +22,7 @@ typedef struct {
#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX #define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
#define TS_MAX_TREE_POOL_SIZE 32 #define TS_MAX_TREE_POOL_SIZE 32
// ExternalScannerState // ExternalScannerState
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) { void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
@ -196,8 +197,10 @@ Subtree ts_subtree_new_leaf(
}}; }};
} else { } else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool); SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
printf("[1] ALLOCATED %p\n", (void *)data);
*data = (SubtreeHeapData) { *data = (SubtreeHeapData) {
.ref_count = 1, .ref_count = 1,
.og_ptr = (size_t)data,
.padding = padding, .padding = padding,
.size = size, .size = size,
.lookahead_bytes = lookahead_bytes, .lookahead_bytes = lookahead_bytes,
@ -272,6 +275,7 @@ MutableSubtree ts_subtree_clone(Subtree self) {
); );
} }
result->ref_count = 1; result->ref_count = 1;
printf("NEW SUBTREE: %p\n", result);
return (MutableSubtree) {.ptr = result}; return (MutableSubtree) {.ptr = result};
} }
@ -375,7 +379,10 @@ void ts_subtree_summarize_children(
self.ptr->named_child_count = 0; self.ptr->named_child_count = 0;
self.ptr->visible_child_count = 0; self.ptr->visible_child_count = 0;
self.ptr->error_cost = 0; if (!ts_subtree_is_error(ts_subtree_from_mut(self))) {
printf("SET 0: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
self.ptr->error_cost = 0;
}
self.ptr->repeat_depth = 0; self.ptr->repeat_depth = 0;
self.ptr->visible_descendant_count = 0; self.ptr->visible_descendant_count = 0;
self.ptr->has_external_tokens = false; self.ptr->has_external_tokens = false;
@ -422,16 +429,27 @@ void ts_subtree_summarize_children(
} }
uint32_t grandchild_count = ts_subtree_child_count(child); uint32_t grandchild_count = ts_subtree_child_count(child);
printf("HI: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
if ( if (
self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error ||
self.ptr->symbol == ts_builtin_sym_error_repeat self.ptr->symbol == ts_builtin_sym_error_repeat
) { ) {
printf("THE FIRST BLOCK\n");
if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) { if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
if (ts_subtree_visible(child)) { if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE; self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) { } else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count; self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
} }
} else {
printf("FALSE BLOCK\n");
}
if (ts_subtree_is_error(child) && grandchild_count == 0) {
MutableSubtree child_mut = ts_subtree_to_mut_unsafe(child);
child_mut.ptr->error_cost = ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * child.ptr->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * child.ptr->size.extent.row;
} }
} }
@ -465,10 +483,13 @@ void ts_subtree_summarize_children(
self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes; self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
printf("HI 2: %d\n", self.ptr->symbol);
printf("HI 2: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
if ( if (
self.ptr->symbol == ts_builtin_sym_error || self.ptr->symbol == ts_builtin_sym_error ||
self.ptr->symbol == ts_builtin_sym_error_repeat self.ptr->symbol == ts_builtin_sym_error_repeat
) { ) {
printf("HI 3\n");
self.ptr->error_cost += self.ptr->error_cost +=
ERROR_COST_PER_RECOVERY + ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes + ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
@ -519,9 +540,10 @@ MutableSubtree ts_subtree_new_node(
children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree)); children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
} }
SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size]; SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
printf("[0]ALLOCATED %p\n", (void *)data);
*data = (SubtreeHeapData) { *data = (SubtreeHeapData) {
.ref_count = 1, .ref_count = 1,
.og_ptr = (size_t)data,
.symbol = symbol, .symbol = symbol,
.child_count = children->size, .child_count = children->size,
.visible = metadata.visible, .visible = metadata.visible,
@ -538,6 +560,11 @@ MutableSubtree ts_subtree_new_node(
}} }}
}; };
MutableSubtree result = {.ptr = data}; MutableSubtree result = {.ptr = data};
if (symbol == ts_builtin_sym_error_repeat) {
printf("ptr: %p\n", (void *)result.ptr);
} else if (symbol == ts_builtin_sym_error) {
printf("ptr: %p\n", (void *)result.ptr);
}
ts_subtree_summarize_children(result, language); ts_subtree_summarize_children(result, language);
return result; return result;
} }
@ -981,6 +1008,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self); uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
fprintf(f, "tree_%p [label=\"", (void *)self); fprintf(f, "tree_%p [label=\"", (void *)self);
ts_language_write_symbol_as_dot_string(language, f, symbol); ts_language_write_symbol_as_dot_string(language, f, symbol);
printf("[0]ts_subtree_error_cost(link.subtree)=%u\n", ts_subtree_error_cost(*self));
fprintf(f, "\""); fprintf(f, "\"");
if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext"); if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
@ -994,7 +1022,8 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
"depends-on-column: %u\n" "depends-on-column: %u\n"
"descendant-count: %u\n" "descendant-count: %u\n"
"repeat-depth: %u\n" "repeat-depth: %u\n"
"lookahead-bytes: %u", "lookahead-bytes: %u\n"
"ptr: %p",
start_offset, end_offset, start_offset, end_offset,
ts_subtree_parse_state(*self), ts_subtree_parse_state(*self),
ts_subtree_error_cost(*self), ts_subtree_error_cost(*self),
@ -1002,11 +1031,23 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
ts_subtree_depends_on_column(*self), ts_subtree_depends_on_column(*self),
ts_subtree_visible_descendant_count(*self), ts_subtree_visible_descendant_count(*self),
ts_subtree_repeat_depth(*self), ts_subtree_repeat_depth(*self),
ts_subtree_lookahead_bytes(*self) ts_subtree_lookahead_bytes(*self),
(void*)self->ptr
); );
if (self->data.is_inline) {
fprintf(f, "\nis_inline: %d", self->data.is_inline);
} else {
fprintf(f, "\nog_ptr: %p", (void*)self->ptr->og_ptr);
}
if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) { if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) {
fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char); fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
printf("ptr %p is error with an error cost of %d\n", (void*)self->ptr, ts_subtree_error_cost(*self));
printf("branch [1] %d %p\n", ts_subtree_missing(*self), self->ptr);
printf("branch [2] %d\n", self->data.is_inline);
} else {
printf("no call!\n");
} }
fprintf(f, "\"]\n"); fprintf(f, "\"]\n");

View file

@ -110,6 +110,7 @@ struct SubtreeInlineData {
// the inline representation. // the inline representation.
typedef struct { typedef struct {
volatile uint32_t ref_count; volatile uint32_t ref_count;
size_t og_ptr;
Length padding; Length padding;
Length size; Length size;
uint32_t lookahead_bytes; uint32_t lookahead_bytes;