Refine logic for which tokens to use in error recovery

This commit is contained in:
Max Brunsfeld 2016-04-27 14:09:19 -07:00
parent 31f6b2e24a
commit 5b74813a5c
9 changed files with 159 additions and 114 deletions

View file

@ -14,7 +14,7 @@
'src/compiler/build_tables/build_lex_table.cc',
'src/compiler/build_tables/build_parse_table.cc',
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/does_match_any_line.cc',
'src/compiler/build_tables/recovery_tokens.cc',
'src/compiler/build_tables/item_set_closure.cc',
'src/compiler/build_tables/lex_item.cc',
'src/compiler/build_tables/lex_item_transitions.cc',

View file

@ -0,0 +1,36 @@
#include "spec_helper.h"
#include "compiler/rules/character_set.h"
#include "compiler/build_tables/recovery_tokens.h"
#include "compiler/lexical_grammar.h"
#include "helpers/rule_helpers.h"
#include "helpers/stream_methods.h"
#include "compiler/rules.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("recovery_tokens(rule)", []() {
it("includes rules that can only begin and end with an explicit set of characters", [&]() {
LexicalGrammar grammar;
grammar.separators = {
character({ ' ' }),
};
grammar.variables = {
Variable("var0", VariableTypeNamed, character({}, false)),
Variable("var1", VariableTypeNamed, seq({
character({ 'a', 'b' }),
character({}, false),
character({ 'c', 'd' }),
})),
};
AssertThat(recovery_tokens(grammar), Equals<vector<Symbol>>({
Symbol(1, true),
}));
});
});
END_TEST

View file

@ -1,43 +0,0 @@
#include "spec_helper.h"
#include "compiler/rules/character_set.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "helpers/rule_helpers.h"
#include "compiler/rules.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("does_match_any_line(rule)", []() {
it("returns true for rules that match any sequence of characters on a line", [&]() {
rule_ptr rule = character({}, false);
AssertThat(does_match_any_line(rule), IsFalse());
rule = repeat(character({}, false));
AssertThat(does_match_any_line(rule), IsTrue());
rule = repeat(character({}, false));
AssertThat(does_match_any_line(rule), IsTrue());
rule = choice({ repeat(character({}, false)), str("x") });
AssertThat(does_match_any_line(rule), IsTrue());
rule = repeat(choice({ character({}, false), str("x") }));
AssertThat(does_match_any_line(rule), IsTrue());
rule = choice({ str("y"), str("x") });
AssertThat(does_match_any_line(rule), IsFalse());
rule = seq({ repeat(character({}, false)), repeat(character({}, false)) });
AssertThat(does_match_any_line(rule), IsTrue());
rule = seq({ repeat(character({}, false)), str("x") });
AssertThat(does_match_any_line(rule), IsFalse());
rule = repeat(character({0, '\n'}, false));
AssertThat(does_match_any_line(rule), IsTrue());
});
});
END_TEST

View file

@ -15,7 +15,7 @@
#include "compiler/syntax_grammar.h"
#include "compiler/rules/symbol.h"
#include "compiler/rules/built_in_symbols.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/build_tables/recovery_tokens.h"
namespace tree_sitter {
namespace build_tables {
@ -108,10 +108,8 @@ class ParseTableBuilder {
void add_out_of_context_parse_states() {
auto symbols_by_first = symbols_by_first_symbol(grammar);
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol symbol(i, true);
if (!does_match_any_line(lexical_grammar.variables[i].rule))
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
for (const Symbol &symbol : recovery_tokens(lexical_grammar)) {
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
}
for (size_t i = 0; i < grammar.variables.size(); i++) {

View file

@ -1,62 +0,0 @@
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
class DoesTokenCatchAnyCharacter : public rules::RuleFn<bool> {
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
bool apply_to(const rules::CharacterSet *rule) {
if (rule->includes_all) {
for (uint32_t character : rule->excluded_chars) {
if (character != 0 && character != '\n')
return false;
}
return true;
}
return false;
}
};
class DoesTokenCatchAll : public rules::RuleFn<bool> {
bool apply_to(const rules::Repeat *rule) {
return DoesTokenCatchAnyCharacter().apply(rule->content);
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
};
bool does_match_any_line(const rule_ptr &rule) {
return DoesTokenCatchAll().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -0,0 +1,89 @@
#include "compiler/build_tables/recovery_tokens.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
using rules::Symbol;
using std::vector;
template <bool left, bool right>
class CharacterAggregator : public rules::RuleFn<void> {
void apply_to(const rules::Seq *rule) {
if (left)
apply(rule->left);
if (right)
apply(rule->right);
}
void apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
apply(element);
}
void apply_to(const rules::Repeat *rule) {
apply(rule->content);
}
void apply_to(const rules::Metadata *rule) {
apply(rule->rule);
}
void apply_to(const rules::CharacterSet *rule) {
result.add_set(*rule);
}
public:
rules::CharacterSet result;
};
class FirstCharacters : public CharacterAggregator<true, false> {};
class LastCharacters : public CharacterAggregator<false, true> {};
class AllCharacters : public CharacterAggregator<true, true> {};
vector<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
vector<Symbol> result;
AllCharacters all_separator_characters;
for (const rule_ptr &separator : grammar.separators)
all_separator_characters.apply(separator);
for (size_t i = 0; i < grammar.variables.size(); i++) {
const Variable &variable = grammar.variables[i];
rule_ptr rule = variable.rule;
FirstCharacters first_characters;
first_characters.apply(variable.rule);
LastCharacters last_characters;
last_characters.apply(variable.rule);
AllCharacters all_characters;
all_characters.apply(variable.rule);
bool has_distinct_start =
!first_characters.result.includes_all &&
!first_characters.result.intersects(all_separator_characters.result);
bool has_distinct_end =
!last_characters.result.includes_all &&
!last_characters.result.intersects(all_separator_characters.result);
bool has_no_separators =
!all_characters.result.intersects(all_separator_characters.result);
if ((has_distinct_start && has_distinct_end) || has_no_separators)
result.push_back(Symbol(i, true));
}
return result;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -2,11 +2,16 @@
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
#include "compiler/rule.h"
#include "compiler/rules/symbol.h"
#include <vector>
namespace tree_sitter {
struct LexicalGrammar;
namespace build_tables {
bool does_match_any_line(const rule_ptr &);
std::vector<rules::Symbol> recovery_tokens(const LexicalGrammar &);
} // namespace build_tables
} // namespace tree_sitter

View file

@ -153,8 +153,24 @@ bool CharacterSet::is_empty() const {
}
void CharacterSet::add_set(const CharacterSet &other) {
for (uint32_t c : other.included_chars)
included_chars.insert(c);
if (includes_all) {
if (other.includes_all) {
excluded_chars = remove_chars(&excluded_chars, other.excluded_chars);
} else {
remove_chars(&excluded_chars, other.included_chars);
}
} else {
if (other.includes_all) {
includes_all = true;
for (uint32_t c : other.excluded_chars)
if (!included_chars.count(c))
excluded_chars.insert(c);
included_chars.clear();
} else {
for (uint32_t c : other.included_chars)
included_chars.insert(c);
}
}
}
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
@ -182,6 +198,11 @@ CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
return result;
}
bool CharacterSet::intersects(const CharacterSet &other) const {
CharacterSet copy(*this);
return !copy.remove_set(other).is_empty();
}
vector<CharacterRange> CharacterSet::included_ranges() const {
return consolidate_ranges(included_chars);
}

View file

@ -31,6 +31,7 @@ class CharacterSet : public Rule {
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
bool intersects(const CharacterSet &other) const;
bool is_empty() const;
std::vector<CharacterRange> included_ranges() const;