Compare commits

...
Sign in to create a new pull request.

1 commit

Author SHA1 Message Date
Amaan Qureshi
1a99bfd9ff
wow 2024-09-26 17:51:46 -04:00
6 changed files with 891 additions and 710 deletions

View file

@ -272,6 +272,7 @@ static inline void ts_language_write_symbol_as_dot_string(
TSSymbol symbol
) {
const char *name = ts_language_symbol_name(self, symbol);
printf("name: %s\n", name);
for (const char *chr = name; *chr; chr++) {
switch (*chr) {
case '"':

View file

@ -206,12 +206,18 @@ static bool ts_parser__breakdown_top_of_stack(
}
ts_subtree_retain(child);
ts_stack_push(self->stack, slice.version, child, pending, state);
ts_stack_push(self->stack, slice.version, child, pending, state, self->language);
LOG("push 1");
printf("push 1\n");
LOG_STACK();
}
for (uint32_t j = 1; j < slice.subtrees.size; j++) {
Subtree tree = slice.subtrees.contents[j];
ts_stack_push(self->stack, slice.version, tree, false, state);
ts_stack_push(self->stack, slice.version, tree, false, state, self->language);
LOG("push 2");
printf("push 2\n");
LOG_STACK();
}
ts_subtree_release(&self->tree_pool, parent);
@ -913,7 +919,10 @@ static void ts_parser__shift(
subtree_to_push = ts_subtree_from_mut(result);
}
ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state);
ts_stack_push(self->stack, version, subtree_to_push, !is_leaf, state, self->language);
LOG("push 3");
printf("push 3\n");
LOG_STACK();
if (ts_subtree_has_external_tokens(subtree_to_push)) {
ts_stack_set_last_external_token(
self->stack, version, ts_subtree_last_external_token(subtree_to_push)
@ -1016,9 +1025,15 @@ static StackVersion ts_parser__reduce(
// Push the parent node onto the stack, along with any extra tokens that
// were previously on top of the stack.
ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state);
ts_stack_push(self->stack, slice_version, ts_subtree_from_mut(parent), false, next_state, self->language);
LOG("push 4");
printf("push 4\n");
LOG_STACK();
for (uint32_t j = 0; j < self->trailing_extras.size; j++) {
ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state);
ts_stack_push(self->stack, slice_version, self->trailing_extras.contents[j], false, next_state, self->language);
LOG("push 5");
printf("push 5\n");
LOG_STACK();
}
for (StackVersion j = 0; j < slice_version; j++) {
@ -1042,9 +1057,15 @@ static void ts_parser__accept(
Subtree lookahead
) {
assert(ts_subtree_is_eof(lookahead));
ts_stack_push(self->stack, version, lookahead, false, 1);
ts_stack_push(self->stack, version, lookahead, false, 1, self->language);
LOG("push 6");
printf("push 6\n");
LOG_STACK();
StackSliceArray pop = ts_stack_pop_all(self->stack, version);
LOG("POP ALL")
printf("POP ALL\n");
StackSliceArray pop = ts_stack_pop_all(self->stack, version, self->dot_graph_file);
LOG_STACK();
for (uint32_t i = 0; i < pop.size; i++) {
SubtreeArray trees = pop.contents[i].subtrees;
@ -1222,14 +1243,20 @@ static bool ts_parser__recover_to_state(
if (slice.subtrees.size > 0) {
Subtree error = ts_subtree_new_error_node(&slice.subtrees, true, self->language);
ts_stack_push(self->stack, slice.version, error, false, goal_state);
ts_stack_push(self->stack, slice.version, error, false, goal_state, self->language);
LOG("push 7");
printf("push 7\n");
LOG_STACK();
} else {
array_delete(&slice.subtrees);
}
for (unsigned j = 0; j < self->trailing_extras.size; j++) {
Subtree tree = self->trailing_extras.contents[j];
ts_stack_push(self->stack, slice.version, tree, false, goal_state);
ts_stack_push(self->stack, slice.version, tree, false, goal_state, self->language);
LOG("push 8");
printf("push 8\n");
LOG_STACK();
}
previous_version = slice.version;
@ -1339,7 +1366,10 @@ static void ts_parser__recover(
LOG("recover_eof");
SubtreeArray children = array_new();
Subtree parent = ts_subtree_new_error_node(&children, false, self->language);
ts_stack_push(self->stack, version, parent, false, 1);
ts_stack_push(self->stack, version, parent, false, 1, self->language);
LOG("push 9");
printf("push 9\n");
LOG_STACK();
ts_parser__accept(self, version, lookahead);
return;
}
@ -1407,7 +1437,10 @@ static void ts_parser__recover(
}
// Push the new ERROR onto the stack.
ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE);
ts_stack_push(self->stack, version, ts_subtree_from_mut(error_repeat), false, ERROR_STATE, self->language);
LOG("push 10");
printf("push 10\n");
LOG_STACK();
if (ts_subtree_has_external_tokens(lookahead)) {
ts_stack_set_last_external_token(
self->stack, version, ts_subtree_last_external_token(lookahead)
@ -1469,8 +1502,11 @@ static void ts_parser__handle_error(
ts_stack_push(
self->stack, version_with_missing_tree,
missing_tree, false,
state_after_missing_symbol
state_after_missing_symbol, self->language
);
LOG("push 11");
printf("push 11\n");
LOG_STACK();
if (ts_parser__do_all_potential_reductions(
self, version_with_missing_tree,
@ -1488,7 +1524,10 @@ static void ts_parser__handle_error(
}
}
ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE);
ts_stack_push(self->stack, v, NULL_SUBTREE, false, ERROR_STATE, self->language);
LOG("push 12");
printf("push 12\n");
LOG_STACK();
v = (v == version) ? previous_version_count : v + 1;
}
@ -1847,7 +1886,7 @@ TSParser *ts_parser_new(void) {
array_init(&self->reduce_actions);
array_reserve(&self->reduce_actions, 4);
self->tree_pool = ts_subtree_pool_new(32);
self->stack = ts_stack_new(&self->tree_pool);
self->stack = ts_stack_new(&self->tree_pool, self->language);
self->finished_tree = NULL_SUBTREE;
self->reusable_node = reusable_node_new();
self->dot_graph_file = NULL;
@ -1915,6 +1954,8 @@ bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
}
self->language = ts_language_copy(language);
ts_stack_set_language(self->stack, ts_language_copy(language));
ts_stack_set_lexer(self->stack, &self->lexer);
return true;
}

File diff suppressed because it is too large Load diff

View file

@ -7,6 +7,7 @@ extern "C" {
#include "./array.h"
#include "./subtree.h"
#include "./lexer.h"
#include "./error_costs.h"
#include <stdio.h>
@ -29,7 +30,7 @@ typedef struct {
typedef Array(StackSummaryEntry) StackSummary;
// Create a stack.
Stack *ts_stack_new(SubtreePool *);
Stack *ts_stack_new(SubtreePool *, const TSLanguage*);
// Release the memory reserved for a given stack.
void ts_stack_delete(Stack *);
@ -47,6 +48,10 @@ Subtree ts_stack_last_external_token(const Stack *, StackVersion);
// Set the last external token associated with a given version of the stack.
void ts_stack_set_last_external_token(Stack *, StackVersion, Subtree );
void ts_stack_set_language(Stack *self, const TSLanguage *language);
void ts_stack_set_lexer(Stack *self, Lexer *lexer);
// Get the position of the given version of the stack within the document.
Length ts_stack_position(const Stack *, StackVersion);
@ -55,7 +60,7 @@ Length ts_stack_position(const Stack *, StackVersion);
// This transfers ownership of the tree to the Stack. Callers that
// need to retain ownership of the tree for their own purposes should
// first retain the tree.
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId);
void ts_stack_push(Stack *, StackVersion, Subtree , bool, TSStateId, const TSLanguage*);
// Pop the given number of entries from the given version of the stack. This
// operation can increase the number of stack versions by revealing multiple
@ -71,7 +76,7 @@ SubtreeArray ts_stack_pop_error(Stack *, StackVersion);
StackSliceArray ts_stack_pop_pending(Stack *, StackVersion);
// Remove any all trees from the given version of the stack.
StackSliceArray ts_stack_pop_all(Stack *, StackVersion);
StackSliceArray ts_stack_pop_all(Stack *, StackVersion, FILE*);
// Get the maximum number of tree nodes reachable from this version of the stack
// since the last error was detected.

View file

@ -22,6 +22,7 @@ typedef struct {
#define TS_MAX_INLINE_TREE_LENGTH UINT8_MAX
#define TS_MAX_TREE_POOL_SIZE 32
// ExternalScannerState
void ts_external_scanner_state_init(ExternalScannerState *self, const char *data, unsigned length) {
@ -196,8 +197,10 @@ Subtree ts_subtree_new_leaf(
}};
} else {
SubtreeHeapData *data = ts_subtree_pool_allocate(pool);
printf("[1] ALLOCATED %p\n", (void *)data);
*data = (SubtreeHeapData) {
.ref_count = 1,
.og_ptr = (size_t)data,
.padding = padding,
.size = size,
.lookahead_bytes = lookahead_bytes,
@ -272,6 +275,7 @@ MutableSubtree ts_subtree_clone(Subtree self) {
);
}
result->ref_count = 1;
printf("NEW SUBTREE: %p\n", result);
return (MutableSubtree) {.ptr = result};
}
@ -375,7 +379,10 @@ void ts_subtree_summarize_children(
self.ptr->named_child_count = 0;
self.ptr->visible_child_count = 0;
self.ptr->error_cost = 0;
if (!ts_subtree_is_error(ts_subtree_from_mut(self))) {
printf("SET 0: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
self.ptr->error_cost = 0;
}
self.ptr->repeat_depth = 0;
self.ptr->visible_descendant_count = 0;
self.ptr->has_external_tokens = false;
@ -422,16 +429,27 @@ void ts_subtree_summarize_children(
}
uint32_t grandchild_count = ts_subtree_child_count(child);
printf("HI: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
if (
self.ptr->symbol == ts_builtin_sym_error ||
self.ptr->symbol == ts_builtin_sym_error_repeat
) {
printf("THE FIRST BLOCK\n");
if (!ts_subtree_extra(child) && !(ts_subtree_is_error(child) && grandchild_count == 0)) {
if (ts_subtree_visible(child)) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE;
} else if (grandchild_count > 0) {
self.ptr->error_cost += ERROR_COST_PER_SKIPPED_TREE * child.ptr->visible_child_count;
}
} else {
printf("FALSE BLOCK\n");
}
if (ts_subtree_is_error(child) && grandchild_count == 0) {
MutableSubtree child_mut = ts_subtree_to_mut_unsafe(child);
child_mut.ptr->error_cost = ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * child.ptr->size.bytes +
ERROR_COST_PER_SKIPPED_LINE * child.ptr->size.extent.row;
}
}
@ -465,10 +483,13 @@ void ts_subtree_summarize_children(
self.ptr->lookahead_bytes = lookahead_end_byte - self.ptr->size.bytes - self.ptr->padding.bytes;
printf("HI 2: %d\n", self.ptr->symbol);
printf("HI 2: %s\n", ts_language_symbol_name(language, self.ptr->symbol));
if (
self.ptr->symbol == ts_builtin_sym_error ||
self.ptr->symbol == ts_builtin_sym_error_repeat
) {
printf("HI 3\n");
self.ptr->error_cost +=
ERROR_COST_PER_RECOVERY +
ERROR_COST_PER_SKIPPED_CHAR * self.ptr->size.bytes +
@ -519,9 +540,10 @@ MutableSubtree ts_subtree_new_node(
children->capacity = (uint32_t)(new_byte_size / sizeof(Subtree));
}
SubtreeHeapData *data = (SubtreeHeapData *)&children->contents[children->size];
printf("[0]ALLOCATED %p\n", (void *)data);
*data = (SubtreeHeapData) {
.ref_count = 1,
.og_ptr = (size_t)data,
.symbol = symbol,
.child_count = children->size,
.visible = metadata.visible,
@ -538,6 +560,11 @@ MutableSubtree ts_subtree_new_node(
}}
};
MutableSubtree result = {.ptr = data};
if (symbol == ts_builtin_sym_error_repeat) {
printf("ptr: %p\n", (void *)result.ptr);
} else if (symbol == ts_builtin_sym_error) {
printf("ptr: %p\n", (void *)result.ptr);
}
ts_subtree_summarize_children(result, language);
return result;
}
@ -981,6 +1008,7 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
uint32_t end_offset = start_offset + ts_subtree_total_bytes(*self);
fprintf(f, "tree_%p [label=\"", (void *)self);
ts_language_write_symbol_as_dot_string(language, f, symbol);
printf("[0]ts_subtree_error_cost(link.subtree)=%u\n", ts_subtree_error_cost(*self));
fprintf(f, "\"");
if (ts_subtree_child_count(*self) == 0) fprintf(f, ", shape=plaintext");
@ -994,7 +1022,8 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
"depends-on-column: %u\n"
"descendant-count: %u\n"
"repeat-depth: %u\n"
"lookahead-bytes: %u",
"lookahead-bytes: %u\n"
"ptr: %p",
start_offset, end_offset,
ts_subtree_parse_state(*self),
ts_subtree_error_cost(*self),
@ -1002,11 +1031,23 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
ts_subtree_depends_on_column(*self),
ts_subtree_visible_descendant_count(*self),
ts_subtree_repeat_depth(*self),
ts_subtree_lookahead_bytes(*self)
ts_subtree_lookahead_bytes(*self),
(void*)self->ptr
);
if (self->data.is_inline) {
fprintf(f, "\nis_inline: %d", self->data.is_inline);
} else {
fprintf(f, "\nog_ptr: %p", (void*)self->ptr->og_ptr);
}
if (ts_subtree_is_error(*self) && ts_subtree_child_count(*self) == 0 && self->ptr->lookahead_char != 0) {
fprintf(f, "\ncharacter: '%c'", self->ptr->lookahead_char);
printf("ptr %p is error with an error cost of %d\n", (void*)self->ptr, ts_subtree_error_cost(*self));
printf("branch [1] %d %p\n", ts_subtree_missing(*self), self->ptr);
printf("branch [2] %d\n", self->data.is_inline);
} else {
printf("no call!\n");
}
fprintf(f, "\"]\n");

View file

@ -110,6 +110,7 @@ struct SubtreeInlineData {
// the inline representation.
typedef struct {
volatile uint32_t ref_count;
size_t og_ptr;
Length padding;
Length size;
uint32_t lookahead_bytes;