Refine logic for which tokens to use in error recovery
This commit is contained in:
parent
31f6b2e24a
commit
5b74813a5c
9 changed files with 159 additions and 114 deletions
|
|
@ -14,7 +14,7 @@
|
|||
'src/compiler/build_tables/build_lex_table.cc',
|
||||
'src/compiler/build_tables/build_parse_table.cc',
|
||||
'src/compiler/build_tables/build_tables.cc',
|
||||
'src/compiler/build_tables/does_match_any_line.cc',
|
||||
'src/compiler/build_tables/recovery_tokens.cc',
|
||||
'src/compiler/build_tables/item_set_closure.cc',
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
|
|
|
|||
36
spec/compiler/build_tables/distinctive_tokens_spec.cc
Normal file
36
spec/compiler/build_tables/distinctive_tokens_spec.cc
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("recovery_tokens(rule)", []() {
|
||||
it("includes rules that can only begin and end with an explicit set of characters", [&]() {
|
||||
LexicalGrammar grammar;
|
||||
grammar.separators = {
|
||||
character({ ' ' }),
|
||||
};
|
||||
|
||||
grammar.variables = {
|
||||
Variable("var0", VariableTypeNamed, character({}, false)),
|
||||
Variable("var1", VariableTypeNamed, seq({
|
||||
character({ 'a', 'b' }),
|
||||
character({}, false),
|
||||
character({ 'c', 'd' }),
|
||||
})),
|
||||
};
|
||||
|
||||
AssertThat(recovery_tokens(grammar), Equals<vector<Symbol>>({
|
||||
Symbol(1, true),
|
||||
}));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
#include "spec_helper.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
#include "helpers/rule_helpers.h"
|
||||
#include "compiler/rules.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("does_match_any_line(rule)", []() {
|
||||
it("returns true for rules that match any sequence of characters on a line", [&]() {
|
||||
rule_ptr rule = character({}, false);
|
||||
AssertThat(does_match_any_line(rule), IsFalse());
|
||||
|
||||
rule = repeat(character({}, false));
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
|
||||
rule = repeat(character({}, false));
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
|
||||
rule = choice({ repeat(character({}, false)), str("x") });
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
|
||||
rule = repeat(choice({ character({}, false), str("x") }));
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
|
||||
rule = choice({ str("y"), str("x") });
|
||||
AssertThat(does_match_any_line(rule), IsFalse());
|
||||
|
||||
rule = seq({ repeat(character({}, false)), repeat(character({}, false)) });
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
|
||||
rule = seq({ repeat(character({}, false)), str("x") });
|
||||
AssertThat(does_match_any_line(rule), IsFalse());
|
||||
|
||||
rule = repeat(character({0, '\n'}, false));
|
||||
AssertThat(does_match_any_line(rule), IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -15,7 +15,7 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include "compiler/rules/built_in_symbols.h"
|
||||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -108,10 +108,8 @@ class ParseTableBuilder {
|
|||
void add_out_of_context_parse_states() {
|
||||
auto symbols_by_first = symbols_by_first_symbol(grammar);
|
||||
|
||||
for (size_t i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol symbol(i, true);
|
||||
if (!does_match_any_line(lexical_grammar.variables[i].rule))
|
||||
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
|
||||
for (const Symbol &symbol : recovery_tokens(lexical_grammar)) {
|
||||
add_out_of_context_parse_state(symbol, symbols_by_first[symbol]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
|
|
|
|||
|
|
@ -1,62 +0,0 @@
|
|||
#include "compiler/build_tables/does_match_any_line.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
class DoesTokenCatchAnyCharacter : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
if (apply(element))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::CharacterSet *rule) {
|
||||
if (rule->includes_all) {
|
||||
for (uint32_t character : rule->excluded_chars) {
|
||||
if (character != 0 && character != '\n')
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
};
|
||||
|
||||
class DoesTokenCatchAll : public rules::RuleFn<bool> {
|
||||
bool apply_to(const rules::Repeat *rule) {
|
||||
return DoesTokenCatchAnyCharacter().apply(rule->content);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Metadata *rule) {
|
||||
return apply(rule->rule);
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
if (apply(element))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
bool apply_to(const rules::Seq *rule) {
|
||||
return apply(rule->left) && apply(rule->right);
|
||||
}
|
||||
};
|
||||
|
||||
bool does_match_any_line(const rule_ptr &rule) {
|
||||
return DoesTokenCatchAll().apply(rule);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
89
src/compiler/build_tables/recovery_tokens.cc
Normal file
89
src/compiler/build_tables/recovery_tokens.cc
Normal file
|
|
@ -0,0 +1,89 @@
|
|||
#include "compiler/build_tables/recovery_tokens.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rules/choice.h"
|
||||
#include "compiler/rules/character_set.h"
|
||||
#include "compiler/rules/repeat.h"
|
||||
#include "compiler/rules/visitor.h"
|
||||
#include "compiler/rules/seq.h"
|
||||
#include "compiler/rules/metadata.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using rules::Symbol;
|
||||
using std::vector;
|
||||
|
||||
template <bool left, bool right>
|
||||
class CharacterAggregator : public rules::RuleFn<void> {
|
||||
void apply_to(const rules::Seq *rule) {
|
||||
if (left)
|
||||
apply(rule->left);
|
||||
if (right)
|
||||
apply(rule->right);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Choice *rule) {
|
||||
for (const rule_ptr &element : rule->elements)
|
||||
apply(element);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Repeat *rule) {
|
||||
apply(rule->content);
|
||||
}
|
||||
|
||||
void apply_to(const rules::Metadata *rule) {
|
||||
apply(rule->rule);
|
||||
}
|
||||
|
||||
void apply_to(const rules::CharacterSet *rule) {
|
||||
result.add_set(*rule);
|
||||
}
|
||||
|
||||
public:
|
||||
rules::CharacterSet result;
|
||||
};
|
||||
|
||||
class FirstCharacters : public CharacterAggregator<true, false> {};
|
||||
class LastCharacters : public CharacterAggregator<false, true> {};
|
||||
class AllCharacters : public CharacterAggregator<true, true> {};
|
||||
|
||||
vector<Symbol> recovery_tokens(const LexicalGrammar &grammar) {
|
||||
vector<Symbol> result;
|
||||
|
||||
AllCharacters all_separator_characters;
|
||||
for (const rule_ptr &separator : grammar.separators)
|
||||
all_separator_characters.apply(separator);
|
||||
|
||||
for (size_t i = 0; i < grammar.variables.size(); i++) {
|
||||
const Variable &variable = grammar.variables[i];
|
||||
rule_ptr rule = variable.rule;
|
||||
|
||||
FirstCharacters first_characters;
|
||||
first_characters.apply(variable.rule);
|
||||
|
||||
LastCharacters last_characters;
|
||||
last_characters.apply(variable.rule);
|
||||
|
||||
AllCharacters all_characters;
|
||||
all_characters.apply(variable.rule);
|
||||
|
||||
bool has_distinct_start =
|
||||
!first_characters.result.includes_all &&
|
||||
!first_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_distinct_end =
|
||||
!last_characters.result.includes_all &&
|
||||
!last_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
bool has_no_separators =
|
||||
!all_characters.result.intersects(all_separator_characters.result);
|
||||
|
||||
if ((has_distinct_start && has_distinct_end) || has_no_separators)
|
||||
result.push_back(Symbol(i, true));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -2,11 +2,16 @@
|
|||
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
|
||||
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/rules/symbol.h"
|
||||
#include <vector>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct LexicalGrammar;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
bool does_match_any_line(const rule_ptr &);
|
||||
std::vector<rules::Symbol> recovery_tokens(const LexicalGrammar &);
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -153,8 +153,24 @@ bool CharacterSet::is_empty() const {
|
|||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (uint32_t c : other.included_chars)
|
||||
included_chars.insert(c);
|
||||
if (includes_all) {
|
||||
if (other.includes_all) {
|
||||
excluded_chars = remove_chars(&excluded_chars, other.excluded_chars);
|
||||
} else {
|
||||
remove_chars(&excluded_chars, other.included_chars);
|
||||
}
|
||||
} else {
|
||||
if (other.includes_all) {
|
||||
includes_all = true;
|
||||
for (uint32_t c : other.excluded_chars)
|
||||
if (!included_chars.count(c))
|
||||
excluded_chars.insert(c);
|
||||
included_chars.clear();
|
||||
} else {
|
||||
for (uint32_t c : other.included_chars)
|
||||
included_chars.insert(c);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
||||
|
|
@ -182,6 +198,11 @@ CharacterSet CharacterSet::remove_set(const CharacterSet &other) {
|
|||
return result;
|
||||
}
|
||||
|
||||
bool CharacterSet::intersects(const CharacterSet &other) const {
|
||||
CharacterSet copy(*this);
|
||||
return !copy.remove_set(other).is_empty();
|
||||
}
|
||||
|
||||
vector<CharacterRange> CharacterSet::included_ranges() const {
|
||||
return consolidate_ranges(included_chars);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ class CharacterSet : public Rule {
|
|||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
bool intersects(const CharacterSet &other) const;
|
||||
bool is_empty() const;
|
||||
|
||||
std::vector<CharacterRange> included_ranges() const;
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue