From df520635c60eeb81acc2396f360a0725567c0edc Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 20 Feb 2017 14:34:10 -0800 Subject: [PATCH] Prevent crash due to huge number of possible paths through parse stack --- .../error_corpus/javascript_errors.txt | 38 +++++++++++++++++++ src/runtime/parser.c | 11 ------ src/runtime/stack.c | 6 ++- src/runtime/tree.c | 26 +++++++++++++ src/runtime/tree.h | 3 ++ 5 files changed, 72 insertions(+), 12 deletions(-) diff --git a/spec/fixtures/error_corpus/javascript_errors.txt b/spec/fixtures/error_corpus/javascript_errors.txt index 19e1587e..39f54f9a 100644 --- a/spec/fixtures/error_corpus/javascript_errors.txt +++ b/spec/fixtures/error_corpus/javascript_errors.txt @@ -117,3 +117,41 @@ const one = two (arguments (identifier))) (identifier)) (arguments))))) + +=================================================== +Errors after a sequence of function declarations +=================================================== + +/* + * The JS grammar has an ambiguity such that these functions + * can be parsed either as function declarations or as + * function expressions. This ambiguity causes a lot of + * splitting and merging in the parse stack. When iterating + * the parse stack during an error repair, there would then + * be a very large number (> 2^16) of paths through the parse + * stack. + */ +function a() {} +function b() {} +function c() {} +function e() {} +function f() {} +function g() {} +function h() {} +function i() {} + +var x = !!! + +--- + +(program + (comment) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (expression_statement (function (identifier) (formal_parameters) (statement_block))) + (trailing_var_declaration (identifier)) (ERROR)) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 523c1f42..09fa6b13 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -313,17 +313,6 @@ static void parser__clear_cached_token(Parser *self) { self->cached_token = NULL; } -static inline bool ts_external_token_state_eq(const TSExternalTokenState *self, - const TSExternalTokenState *other) { - if (self == other) { - return true; - } else if (!self || !other) { - return false; - } else { - return memcmp(self, other, sizeof(TSExternalTokenState)) == 0; - } -} - static Tree *parser__get_lookahead(Parser *self, StackVersion version, ReusableNode *reusable_node, bool *is_fresh) { diff --git a/src/runtime/stack.c b/src/runtime/stack.c index fc875396..ff6f7e1f 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -8,6 +8,7 @@ #define MAX_LINK_COUNT 8 #define MAX_NODE_POOL_SIZE 50 +#define MAX_ITERATOR_COUNT 64 #define INLINE static inline __attribute__((always_inline)) @@ -143,7 +144,9 @@ static StackNode *stack_node_new(StackNode *next, Tree *tree, bool is_pending, static void stack_node_add_link(StackNode *self, StackLink link) { for (int i = 0; i < self->link_count; i++) { StackLink existing_link = self->links[i]; - if (existing_link.tree == link.tree) { + if (existing_link.tree == link.tree || + (existing_link.tree && link.tree && + ts_tree_tokens_eq(existing_link.tree, link.tree))) { if (existing_link.node == link.node) return; if (existing_link.node->state == link.node->state) { @@ -253,6 +256,7 @@ INLINE StackPopResult stack__iter(Stack *self, StackVersion version, link = node->links[0]; next_iterator = &self->iterators.contents[i]; } else { + if (self->iterators.size >= MAX_ITERATOR_COUNT) continue; link = node->links[j]; array_push(&self->iterators, self->iterators.contents[i]); next_iterator = array_back(&self->iterators); diff --git a/src/runtime/tree.c b/src/runtime/tree.c index e437a0dc..ac9f42f4 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -301,6 +301,21 @@ bool ts_tree_eq(const Tree *self, const Tree *other) { return true; } +bool ts_tree_tokens_eq(const Tree *self, const Tree *other) { + if (self->child_count > 0 || other->child_count > 0) return false; + if (self->symbol != other->symbol) return false; + if (self->padding.bytes != other->padding.bytes) return false; + if (self->size.bytes != other->size.bytes) return false; + if (self->extra != other->extra) return false; + if (self->has_external_token_state) { + if (!other->has_external_token_state) return false; + if (!ts_external_token_state_eq(&self->external_token_state, &other->external_token_state)) return false; + } else { + if (other->has_external_token_state) return false; + } + return true; +} + int ts_tree_compare(const Tree *left, const Tree *right) { if (left->symbol < right->symbol) return -1; @@ -523,3 +538,14 @@ void ts_tree_print_dot_graph(const Tree *self, const TSLanguage *language, ts_tree__print_dot_graph(self, 0, language, f); fprintf(f, "}\n"); } + +bool ts_external_token_state_eq(const TSExternalTokenState *self, + const TSExternalTokenState *other) { + if (self == other) { + return true; + } else if (!self || !other) { + return false; + } else { + return memcmp(self, other, sizeof(TSExternalTokenState)) == 0; + } +} diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 58452fc2..de88a913 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -79,6 +79,7 @@ Tree *ts_tree_make_error(Length, Length, char); void ts_tree_retain(Tree *tree); void ts_tree_release(Tree *tree); bool ts_tree_eq(const Tree *tree1, const Tree *tree2); +bool ts_tree_tokens_eq(const Tree *, const Tree *); int ts_tree_compare(const Tree *tree1, const Tree *tree2); uint32_t ts_tree_start_column(const Tree *self); @@ -107,6 +108,8 @@ static inline bool ts_tree_is_fragile(const Tree *tree) { ts_tree_total_bytes(tree) == 0; } +bool ts_external_token_state_eq(const TSExternalTokenState *, const TSExternalTokenState *); + #ifdef __cplusplus } #endif