From dee86f908a226eef59fddaaf2c2edd0da13c421f Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 7 Jun 2017 17:05:39 -0400 Subject: [PATCH 1/5] Correctly check type is ParseActionTypeRecover --- src/compiler/parse_table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 37707ed0..cf91154f 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -151,7 +151,7 @@ bool ParseState::has_shift_action() const { void ParseState::each_referenced_state(function fn) { for (auto &entry : terminal_entries) for (ParseAction &action : entry.second.actions) - if (action.type == ParseActionTypeShift || ParseActionTypeRecover) + if (action.type == ParseActionTypeShift || action.type == ParseActionTypeRecover) fn(&action.state_index); for (auto &entry : nonterminal_entries) fn(&entry.second); From 6897530c4720000b19dd4566bfc3f840b4a56f95 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 7 Jun 2017 17:07:00 -0400 Subject: [PATCH 2/5] Check for invalid state indexes Some ParseActions have a state-id of -1 which can cause an out-of-bounds read when removing duplicate parse states. This was found by AddressSanitizer: ==90699==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6320000187f8 at pc 0x0001071220a9 bp 0x7fff595fd440 sp 0x7fff595fd438 READ of size 8 at 0x6320000187f8 thread T0 #0 0x1071220a8 in tree_sitter::build_tables::ParseTableBuilder::remove_duplicate_parse_states()::'lambda0'(unsigned long*)::operator()(unsigned long*) const build_parse_table.cc:398 #1 0x107121fa5 in void std::__1::__invoke_void_return_wrapper::__call(tree_sitter::build_tables::ParseTableBuilder::remove_duplicate_parse_states()::'lambda0'(unsigned long*)&&&, unsigned long*&&) __functional_base:416 ... 0x6320000187f8 is located 8 bytes to the left of 88264-byte region [0x632000018800,0x63200002e0c8) allocated by thread T0 here: #0 0x107b1576b in wrap__Znwm (libclang_rt.asan_osx_dynamic.dylib:x86_64h+0x6076b) #1 0x10711da2c in std::__1::vector >::allocate(unsigned long) new:169 #2 0x10711d8fb in std::__1::vector >::vector(unsigned long) vector:1074 #3 0x107112f5c in std::__1::vector >::vector(unsigned long) vector:1068 #4 0x1070af381 in tree_sitter::build_tables::ParseTableBuilder::remove_duplicate_parse_states() build_parse_table.cc:378 #5 0x10709d827 in tree_sitter::build_tables::ParseTableBuilder::build() build_parse_table.cc:85 ... SUMMARY: AddressSanitizer: heap-buffer-overflow build_parse_table.cc:398 in tree_sitter::build_tables::ParseTableBuilder::remove_duplicate_parse_states()::'lambda0'(unsigned long*)::operator()(unsigned long*) const Shadow bytes around the buggy address: 0x1c64000030a0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c64000030b0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c64000030c0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c64000030d0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa 0x1c64000030e0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa =>0x1c64000030f0: fa fa fa fa fa fa fa fa fa fa fa fa fa fa fa[fa] 0x1c6400003100: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c6400003110: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c6400003120: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c6400003130: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 0x1c6400003140: 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 --- src/compiler/build_tables/build_parse_table.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 50c84af7..d1d90455 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -394,7 +394,8 @@ class ParseTableBuilder { } else { ParseState &state = *iter; state.each_referenced_state([&new_state_ids](ParseStateId *state_index) { - *state_index = new_state_ids[*state_index]; + if (*state_index != (ParseStateId)(-1)) + *state_index = new_state_ids[*state_index]; }); ++iter; } From 18ba6ebbd73fbcc00dc68e22878f97d7f647236c Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 9 Jun 2017 16:13:17 -0400 Subject: [PATCH 3/5] Move state_id check into each_referenced_state --- src/compiler/build_tables/build_parse_table.cc | 3 +-- src/compiler/parse_table.cc | 3 ++- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index d1d90455..50c84af7 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -394,8 +394,7 @@ class ParseTableBuilder { } else { ParseState &state = *iter; state.each_referenced_state([&new_state_ids](ParseStateId *state_index) { - if (*state_index != (ParseStateId)(-1)) - *state_index = new_state_ids[*state_index]; + *state_index = new_state_ids[*state_index]; }); ++iter; } diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index cf91154f..5b422d43 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -154,7 +154,8 @@ void ParseState::each_referenced_state(function fn) { if (action.type == ParseActionTypeShift || action.type == ParseActionTypeRecover) fn(&action.state_index); for (auto &entry : nonterminal_entries) - fn(&entry.second); + if (entry.second != (ParseStateId)(-1)) + fn(&entry.second); } bool ParseState::operator==(const ParseState &other) const { From 577e43f65373de5eac6855c79e136f58950cfd04 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 9 Jun 2017 16:25:40 -0400 Subject: [PATCH 4/5] shift-extra actions do not have valid state_ids --- src/compiler/parse_table.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 5b422d43..8ea1f8c4 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -151,7 +151,7 @@ bool ParseState::has_shift_action() const { void ParseState::each_referenced_state(function fn) { for (auto &entry : terminal_entries) for (ParseAction &action : entry.second.actions) - if (action.type == ParseActionTypeShift || action.type == ParseActionTypeRecover) + if ((action.type == ParseActionTypeShift && !action.extra) || action.type == ParseActionTypeRecover) fn(&action.state_index); for (auto &entry : nonterminal_entries) if (entry.second != (ParseStateId)(-1)) From c58f6401d0c3d02b6d0da81908dc07c7c732f88c Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Wed, 14 Jun 2017 08:49:38 -0400 Subject: [PATCH 5/5] Non-terminal entries always have valid state-ids --- src/compiler/parse_table.cc | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 8ea1f8c4..ffa57760 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -154,8 +154,7 @@ void ParseState::each_referenced_state(function fn) { if ((action.type == ParseActionTypeShift && !action.extra) || action.type == ParseActionTypeRecover) fn(&action.state_index); for (auto &entry : nonterminal_entries) - if (entry.second != (ParseStateId)(-1)) - fn(&entry.second); + fn(&entry.second); } bool ParseState::operator==(const ParseState &other) const {