Merge pull request #153 from tree-sitter/better-lexical-conflict-detection
Make lexical conflict detection more conservative and reliable
This commit is contained in:
commit
64fd5d0b4b
32 changed files with 342 additions and 404 deletions
|
|
@ -63,7 +63,6 @@ typedef union {
|
|||
struct {
|
||||
uint8_t count;
|
||||
bool reusable : 1;
|
||||
bool depends_on_lookahead : 1;
|
||||
};
|
||||
} TSParseActionEntry;
|
||||
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 7
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 8
|
||||
|
||||
typedef unsigned short TSSymbol;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
|
|
|||
|
|
@ -13,7 +13,6 @@
|
|||
'sources': [
|
||||
'src/compiler/build_tables/lex_item.cc',
|
||||
'src/compiler/build_tables/lex_item_transitions.cc',
|
||||
'src/compiler/build_tables/lex_conflict_manager.cc',
|
||||
'src/compiler/build_tables/lex_table_builder.cc',
|
||||
'src/compiler/build_tables/lookahead_set.cc',
|
||||
'src/compiler/build_tables/parse_item.cc',
|
||||
|
|
|
|||
|
|
@ -1,53 +0,0 @@
|
|||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include <utility>
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
bool LexConflictManager::resolve(const LexItemSet &item_set,
|
||||
const AdvanceAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
if (new_action.precedence_range.max >= old_action.precedence) {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
possible_extensions[old_action.symbol.index].insert(item.lhs.index);
|
||||
}
|
||||
return true;
|
||||
} else {
|
||||
for (const LexItem &item : item_set.entries) {
|
||||
possible_homonyms[item.lhs.index].insert(old_action.symbol.index);
|
||||
}
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
bool LexConflictManager::resolve(const AcceptTokenAction &new_action,
|
||||
const AcceptTokenAction &old_action) {
|
||||
bool result;
|
||||
if (new_action.precedence > old_action.precedence) {
|
||||
result = true;
|
||||
} else if (new_action.precedence < old_action.precedence) {
|
||||
result = false;
|
||||
} else if (new_action.is_string && !old_action.is_string) {
|
||||
result = true;
|
||||
} else if (old_action.is_string && !new_action.is_string) {
|
||||
result = false;
|
||||
} else if (new_action.symbol.index < old_action.symbol.index) {
|
||||
result = true;
|
||||
} else {
|
||||
result = false;
|
||||
}
|
||||
|
||||
if (result) {
|
||||
possible_homonyms[old_action.symbol.index].insert(new_action.symbol.index);
|
||||
} else {
|
||||
possible_homonyms[new_action.symbol.index].insert(old_action.symbol.index);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -1,31 +0,0 @@
|
|||
#ifndef COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
#define COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
struct AdvanceAction;
|
||||
struct AcceptTokenAction;
|
||||
|
||||
namespace build_tables {
|
||||
|
||||
class LexItemSet;
|
||||
|
||||
class LexConflictManager {
|
||||
public:
|
||||
bool resolve(const LexItemSet &, const AdvanceAction &,
|
||||
const AcceptTokenAction &);
|
||||
bool resolve(const AcceptTokenAction &, const AcceptTokenAction &);
|
||||
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_homonyms;
|
||||
std::map<rules::Symbol::Index, std::set<rules::Symbol::Index>> possible_extensions;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_BUILD_TABLES_LEX_CONFLICT_MANAGER_H_
|
||||
|
|
@ -13,6 +13,7 @@ using std::string;
|
|||
using std::unordered_set;
|
||||
using rules::CharacterSet;
|
||||
using rules::Symbol;
|
||||
using rules::Metadata;
|
||||
|
||||
LexItem::LexItem(const rules::Symbol &lhs, const rules::Rule &rule)
|
||||
: lhs(lhs), rule(rule) {}
|
||||
|
|
@ -82,6 +83,19 @@ bool LexItemSet::operator==(const LexItemSet &other) const {
|
|||
return entries == other.entries;
|
||||
}
|
||||
|
||||
bool LexItem::is_in_separators() const {
|
||||
if (!rule.is<Metadata>()) return false;
|
||||
auto &metadata = rule.get_unchecked<Metadata>();
|
||||
return !metadata.params.is_main_token;
|
||||
}
|
||||
|
||||
bool LexItemSet::has_items_in_separators() const {
|
||||
for (const LexItem &item : entries) {
|
||||
if (item.is_in_separators()) return true;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
LexItemSet::TransitionMap LexItemSet::transitions() const {
|
||||
TransitionMap result;
|
||||
for (const LexItem &item : entries) {
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ class LexItem {
|
|||
|
||||
bool operator==(const LexItem &other) const;
|
||||
CompletionStatus completion_status() const;
|
||||
bool is_in_separators() const;
|
||||
|
||||
rules::Symbol lhs;
|
||||
rules::Rule rule;
|
||||
|
|
@ -47,12 +48,12 @@ class LexItemSet {
|
|||
LexItemSet();
|
||||
explicit LexItemSet(const std::unordered_set<LexItem> &);
|
||||
|
||||
bool operator==(const LexItemSet &) const;
|
||||
|
||||
struct Transition;
|
||||
typedef std::map<rules::CharacterSet, Transition> TransitionMap;
|
||||
|
||||
bool operator==(const LexItemSet &) const;
|
||||
TransitionMap transitions() const;
|
||||
bool has_items_in_separators() const;
|
||||
|
||||
std::unordered_set<LexItem> entries;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ namespace build_tables {
|
|||
|
||||
using std::function;
|
||||
using std::map;
|
||||
using std::move;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using rules::CharacterSet;
|
||||
|
|
@ -157,7 +158,7 @@ class TransitionBuilder {
|
|||
add_transition(
|
||||
transitions, pair.first,
|
||||
transform_transition(pair.second, [¶ms](Rule rule) {
|
||||
return rules::Metadata{rule, params};
|
||||
return rules::Metadata::merge(move(rule), params);
|
||||
})
|
||||
);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@
|
|||
#include <utility>
|
||||
#include <cwctype>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/parse_table.h"
|
||||
|
|
@ -19,6 +18,7 @@ namespace build_tables {
|
|||
|
||||
using std::iswalpha;
|
||||
using std::map;
|
||||
using std::move;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
using std::string;
|
||||
|
|
@ -68,15 +68,13 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
LexTable keyword_lex_table;
|
||||
const LexicalGrammar grammar;
|
||||
vector<Rule> separator_rules;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> main_lex_state_ids;
|
||||
unordered_map<LexItemSet, LexStateId> keyword_lex_state_ids;
|
||||
CharacterSet separator_start_characters;
|
||||
vector<CharacterSet> starting_characters_by_token;
|
||||
vector<CharacterSet> following_characters_by_token;
|
||||
vector<set<Symbol>> shadowed_tokens_by_token;
|
||||
const vector<LookaheadSet> &coincident_tokens_by_token;
|
||||
vector<bool> conflict_status_by_token;
|
||||
vector<ConflictStatus> conflict_matrix;
|
||||
bool conflict_detection_mode;
|
||||
LookaheadSet keyword_symbols;
|
||||
Symbol keyword_capture_token;
|
||||
|
|
@ -89,8 +87,8 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
: grammar(lexical_grammar),
|
||||
starting_characters_by_token(lexical_grammar.variables.size()),
|
||||
following_characters_by_token(lexical_grammar.variables.size()),
|
||||
shadowed_tokens_by_token(lexical_grammar.variables.size()),
|
||||
coincident_tokens_by_token(coincident_tokens),
|
||||
conflict_matrix(lexical_grammar.variables.size() * lexical_grammar.variables.size(), DoesNotMatch),
|
||||
conflict_detection_mode(false),
|
||||
keyword_capture_token(rules::NONE()) {
|
||||
|
||||
|
|
@ -116,6 +114,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
if (following_tokens != following_tokens_by_token.end()) {
|
||||
following_tokens->second.for_each([&](Symbol following_token) {
|
||||
following_character_aggregator.apply(grammar.variables[following_token.index].rule);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -124,7 +123,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
aggregator.apply(grammar.variables[i].rule);
|
||||
bool all_alpha = true, all_lower = true;
|
||||
for (auto character : aggregator.result.included_chars) {
|
||||
if (!iswalpha(character)) all_alpha = true;
|
||||
if (!iswalpha(character) && character != '_') all_alpha = false;
|
||||
if (!iswlower(character)) all_lower = false;
|
||||
}
|
||||
|
||||
|
|
@ -163,8 +162,6 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
Symbol::terminal(i),
|
||||
Symbol::terminal(j)
|
||||
}), true));
|
||||
if (conflict_status_by_token[i]) shadowed_tokens_by_token[j].insert(Symbol::terminal(i));
|
||||
if (conflict_status_by_token[j]) shadowed_tokens_by_token[i].insert(Symbol::terminal(j));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -174,9 +171,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
Symbol symbol = Symbol::terminal(i);
|
||||
bool matches_all_keywords = true;
|
||||
keyword_symbols.for_each([&](Symbol keyword_symbol) {
|
||||
if (!conflict_manager.possible_homonyms[symbol.index].count(keyword_symbol.index)) {
|
||||
if (!(get_conflict_status(symbol, keyword_symbol) & MatchesSameString)) {
|
||||
matches_all_keywords = false;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (!matches_all_keywords) continue;
|
||||
|
||||
|
|
@ -189,9 +188,11 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
// Don't use a token to capture keywords if it conflicts with other tokens
|
||||
// that occur in the same state as a keyword.
|
||||
bool shadows_other_tokens = false;
|
||||
for (auto shadowed_token : shadowed_tokens_by_token[i]) {
|
||||
if (!keyword_symbols.contains(shadowed_token) &&
|
||||
keyword_symbols.intersects(coincident_tokens_by_token[shadowed_token.index])) {
|
||||
for (Symbol::Index j = 0; j < n; j++) {
|
||||
Symbol other_symbol = Symbol::terminal(j);
|
||||
if ((get_conflict_status(other_symbol, symbol) & (MatchesShorterStringWithinSeparators|MatchesLongerStringWithValidNextChar)) &&
|
||||
!keyword_symbols.contains(other_symbol) &&
|
||||
keyword_symbols.intersects(coincident_tokens_by_token[j])) {
|
||||
shadows_other_tokens = true;
|
||||
break;
|
||||
}
|
||||
|
|
@ -250,11 +251,21 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
return {main_lex_table, keyword_lex_table, keyword_capture_token};
|
||||
}
|
||||
|
||||
const set<Symbol> &get_incompatible_tokens(Symbol::Index index) const {
|
||||
return shadowed_tokens_by_token[index];
|
||||
ConflictStatus get_conflict_status(Symbol shadowed_token, Symbol other_token) const {
|
||||
if (shadowed_token.is_built_in() ||
|
||||
other_token.is_built_in() ||
|
||||
!shadowed_token.is_terminal() ||
|
||||
!other_token.is_terminal()) return DoesNotMatch;
|
||||
unsigned index = shadowed_token.index * grammar.variables.size() + other_token.index;
|
||||
return conflict_matrix[index];
|
||||
}
|
||||
|
||||
private:
|
||||
void record_conflict(Symbol shadowed_token, Symbol other_token, ConflictStatus status) {
|
||||
unsigned index = shadowed_token.index * grammar.variables.size() + other_token.index;
|
||||
conflict_matrix[index] = static_cast<ConflictStatus>(conflict_matrix[index] | status);
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(LexTable &lex_table, const LexItemSet &item_set) {
|
||||
auto &lex_state_ids = &lex_table == &main_lex_table ?
|
||||
main_lex_state_ids :
|
||||
|
|
@ -280,27 +291,27 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
AdvanceAction action(-1, transition.precedence, transition.in_main_token);
|
||||
AcceptTokenAction &accept_action = lex_table.states[state_id].accept_action;
|
||||
if (accept_action.is_present()) {
|
||||
bool prefer_advancing = conflict_manager.resolve(
|
||||
transition.destination,
|
||||
action,
|
||||
accept_action
|
||||
);
|
||||
bool prefer_advancing = action.precedence_range.max >= accept_action.precedence;
|
||||
|
||||
if (conflict_detection_mode) {
|
||||
bool next_item_set_can_yield_this_token = false;
|
||||
for (const LexItem &item : transition.destination.entries) {
|
||||
if (item.lhs == accept_action.symbol) {
|
||||
next_item_set_can_yield_this_token = true;
|
||||
} else if (!prefer_advancing && !transition.in_main_token) {
|
||||
conflict_status_by_token[item.lhs.index] = true;
|
||||
} else if (!prefer_advancing && item_set.has_items_in_separators()) {
|
||||
record_conflict(item.lhs, accept_action.symbol, MatchesShorterStringWithinSeparators);
|
||||
}
|
||||
}
|
||||
|
||||
if (prefer_advancing &&
|
||||
!next_item_set_can_yield_this_token &&
|
||||
(characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
|
||||
characters.intersects(separator_start_characters))) {
|
||||
conflict_status_by_token[accept_action.symbol.index] = true;
|
||||
if (prefer_advancing && !next_item_set_can_yield_this_token) {
|
||||
auto advance_symbol = transition.destination.entries.begin()->lhs;
|
||||
if (characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
|
||||
characters.intersects(separator_start_characters)) {
|
||||
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerStringWithValidNextChar);
|
||||
} else {
|
||||
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerString);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -321,10 +332,10 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
grammar.variables[item.lhs.index].is_string);
|
||||
AcceptTokenAction &existing_action = lex_table.states[state_id].accept_action;
|
||||
if (existing_action.is_present()) {
|
||||
if (conflict_manager.resolve(action, existing_action)) {
|
||||
conflict_status_by_token[existing_action.symbol.index] = true;
|
||||
if (should_replace_accept_action(existing_action, action)) {
|
||||
record_conflict(existing_action.symbol, action.symbol, MatchesSameString);
|
||||
} else {
|
||||
conflict_status_by_token[action.symbol.index] = true;
|
||||
record_conflict(action.symbol, existing_action.symbol, MatchesSameString);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
@ -336,26 +347,16 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
void mark_fragile_tokens(ParseTable *parse_table) {
|
||||
for (ParseState &state : parse_table->states) {
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol symbol = entry.first;
|
||||
if (symbol.is_terminal()) {
|
||||
auto homonyms = conflict_manager.possible_homonyms.find(symbol.index);
|
||||
if (homonyms != conflict_manager.possible_homonyms.end())
|
||||
for (Symbol::Index homonym : homonyms->second)
|
||||
if (state.terminal_entries.count(Symbol::terminal(homonym))) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!entry.second.reusable)
|
||||
continue;
|
||||
|
||||
auto extensions = conflict_manager.possible_extensions.find(symbol.index);
|
||||
if (extensions != conflict_manager.possible_extensions.end())
|
||||
for (Symbol::Index extension : extensions->second)
|
||||
if (state.terminal_entries.count(Symbol::terminal(extension))) {
|
||||
entry.second.depends_on_lookahead = true;
|
||||
break;
|
||||
}
|
||||
Symbol token = entry.first;
|
||||
if (token.is_external() || token.is_built_in()) continue;
|
||||
for (unsigned i = 0; i < grammar.variables.size(); i++) {
|
||||
Symbol other_token = Symbol::terminal(i);
|
||||
ConflictStatus status = get_conflict_status(token, other_token);
|
||||
if (status != ConflictStatus::DoesNotMatch &&
|
||||
state.terminal_entries.count(other_token)) {
|
||||
entry.second.reusable = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -366,25 +367,16 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
|
||||
left->for_each_difference(right, [&](bool in_left, Symbol different_symbol) {
|
||||
if (!different_symbol.is_external() && !different_symbol.is_built_in()) {
|
||||
if (in_left) {
|
||||
right.for_each([&](Symbol right_symbol) {
|
||||
if (shadowed_tokens_by_token[different_symbol.index].count(right_symbol) ||
|
||||
!coincident_tokens_by_token[different_symbol.index].contains(right_symbol)) {
|
||||
is_compatible = false;
|
||||
return;
|
||||
}
|
||||
});
|
||||
if (!is_compatible) return false;
|
||||
} else {
|
||||
left->for_each([&](Symbol left_symbol) {
|
||||
if (shadowed_tokens_by_token[different_symbol.index].count(left_symbol) ||
|
||||
!coincident_tokens_by_token[different_symbol.index].contains(left_symbol)) {
|
||||
is_compatible = false;
|
||||
return;
|
||||
}
|
||||
});
|
||||
if (!is_compatible) return false;
|
||||
}
|
||||
const LookaheadSet &existing_set = in_left ? right : *left;
|
||||
existing_set.for_each([&](Symbol existing_symbol) {
|
||||
if ((get_conflict_status(existing_symbol, different_symbol) & CannotDistinguish) ||
|
||||
!coincident_tokens_by_token[different_symbol.index].contains(existing_symbol)) {
|
||||
is_compatible = false;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
if (!is_compatible) return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -465,7 +457,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
LexItemSet result;
|
||||
terminals.for_each([&](Symbol symbol) {
|
||||
if (symbol.is_terminal()) {
|
||||
for (const auto &rule : rules_for_symbol(symbol)) {
|
||||
for (auto &&rule : rules_for_symbol(symbol)) {
|
||||
if (with_separators) {
|
||||
for (const auto &separator_rule : separator_rules) {
|
||||
result.entries.insert(LexItem(
|
||||
|
|
@ -473,16 +465,17 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
Metadata::separator(
|
||||
Rule::seq({
|
||||
separator_rule,
|
||||
Metadata::main_token(rule)
|
||||
Metadata::main_token(move(rule))
|
||||
})
|
||||
)
|
||||
));
|
||||
}
|
||||
} else {
|
||||
result.entries.insert(LexItem(symbol, Metadata::main_token(rule)));
|
||||
result.entries.insert(LexItem(symbol, Metadata::main_token(move(rule))));
|
||||
}
|
||||
}
|
||||
}
|
||||
return true;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
|
@ -503,10 +496,22 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
);
|
||||
}
|
||||
|
||||
bool should_replace_accept_action(const AcceptTokenAction &old_action,
|
||||
const AcceptTokenAction &new_action) {
|
||||
if (new_action.precedence > old_action.precedence) return true;
|
||||
if (new_action.precedence < old_action.precedence) return false;
|
||||
if (new_action.is_string && !old_action.is_string) return true;
|
||||
if (old_action.is_string && !new_action.is_string) return false;
|
||||
return new_action.symbol.index < old_action.symbol.index;
|
||||
}
|
||||
|
||||
void clear() {
|
||||
main_lex_table.states.clear();
|
||||
main_lex_state_ids.clear();
|
||||
conflict_status_by_token = vector<bool>(grammar.variables.size(), false);
|
||||
}
|
||||
|
||||
const string &token_name(rules::Symbol &symbol) {
|
||||
return grammar.variables[symbol.index].name;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -526,8 +531,8 @@ LexTableBuilder::BuildResult LexTableBuilder::build(ParseTable *parse_table) {
|
|||
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
|
||||
}
|
||||
|
||||
const set<Symbol> &LexTableBuilder::get_incompatible_tokens(Symbol::Index token) const {
|
||||
return static_cast<const LexTableBuilderImpl *>(this)->get_incompatible_tokens(token);
|
||||
ConflictStatus LexTableBuilder::get_conflict_status(Symbol a, Symbol b) const {
|
||||
return static_cast<const LexTableBuilderImpl *>(this)->get_conflict_status(a, b);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -17,6 +17,19 @@ namespace build_tables {
|
|||
|
||||
class LookaheadSet;
|
||||
|
||||
enum ConflictStatus {
|
||||
DoesNotMatch = 0,
|
||||
MatchesShorterStringWithinSeparators = 1 << 0,
|
||||
MatchesSameString = 1 << 1,
|
||||
MatchesLongerString = 1 << 2,
|
||||
MatchesLongerStringWithValidNextChar = 1 << 3,
|
||||
CannotDistinguish = (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesSameString |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
),
|
||||
};
|
||||
|
||||
class LexTableBuilder {
|
||||
public:
|
||||
static std::unique_ptr<LexTableBuilder> create(const SyntaxGrammar &,
|
||||
|
|
@ -31,7 +44,8 @@ class LexTableBuilder {
|
|||
};
|
||||
|
||||
BuildResult build(ParseTable *);
|
||||
const std::set<rules::Symbol> &get_incompatible_tokens(rules::Symbol::Index) const;
|
||||
|
||||
ConflictStatus get_conflict_status(rules::Symbol, rules::Symbol) const;
|
||||
|
||||
protected:
|
||||
LexTableBuilder() = default;
|
||||
|
|
|
|||
|
|
@ -35,7 +35,11 @@ bool LookaheadSet::contains(const Symbol &symbol) const {
|
|||
bool LookaheadSet::intersects(const LookaheadSet &other) const {
|
||||
bool result = false;
|
||||
for_each([&](Symbol symbol) {
|
||||
if (other.contains(symbol)) result = true;
|
||||
if (other.contains(symbol)) {
|
||||
result = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,12 +32,12 @@ class LookaheadSet {
|
|||
iter != end;
|
||||
++iter) {
|
||||
if (*iter) {
|
||||
callback(rules::Symbol::external(iter - begin));
|
||||
if (!callback(rules::Symbol::external(iter - begin))) return;
|
||||
}
|
||||
}
|
||||
|
||||
if (eof) {
|
||||
callback(rules::END_OF_INPUT());
|
||||
if (!callback(rules::END_OF_INPUT())) return;
|
||||
}
|
||||
|
||||
for (auto begin = terminal_bits.begin(),
|
||||
|
|
@ -46,7 +46,7 @@ class LookaheadSet {
|
|||
iter != end;
|
||||
++iter) {
|
||||
if (*iter) {
|
||||
callback(rules::Symbol::terminal(iter - begin));
|
||||
if (!callback(rules::Symbol::terminal(iter - begin))) return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -187,6 +187,7 @@ size_t hash<ParseItemSet>::operator()(const ParseItemSet &item_set) const {
|
|||
hash_combine(&result, lookahead_set.size());
|
||||
lookahead_set.for_each([&result](Symbol symbol) {
|
||||
hash_combine(&result, symbol);
|
||||
return true;
|
||||
});
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -145,21 +145,44 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
|
||||
void build_error_parse_state(ParseStateId state_id) {
|
||||
unsigned CannotMerge = (
|
||||
MatchesShorterStringWithinSeparators |
|
||||
MatchesLongerStringWithValidNextChar
|
||||
);
|
||||
|
||||
// Add all the tokens that have no conflict with other tokens.
|
||||
LookaheadSet non_conflicting_tokens;
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
const LexicalVariable &variable = lexical_grammar.variables[i];
|
||||
|
||||
bool exclude_from_recovery_state = false;
|
||||
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(i)) {
|
||||
if (!coincident_tokens_by_token[i].contains(incompatible_token) &&
|
||||
((lexical_grammar.variables[incompatible_token.index].is_string && !variable.is_string) ||
|
||||
!lex_table_builder->get_incompatible_tokens(incompatible_token.index).count(token))) {
|
||||
exclude_from_recovery_state = true;
|
||||
bool conflicts_with_other_tokens = false;
|
||||
for (unsigned j = 0; j < lexical_grammar.variables.size(); j++) {
|
||||
Symbol other_token = Symbol::terminal(j);
|
||||
if (j != i &&
|
||||
!coincident_tokens_by_token[token.index].contains(other_token) &&
|
||||
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
|
||||
conflicts_with_other_tokens = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (!exclude_from_recovery_state) {
|
||||
parse_table.add_terminal_action(state_id, Symbol::terminal(i), ParseAction::Recover());
|
||||
if (!conflicts_with_other_tokens) non_conflicting_tokens.insert(token);
|
||||
}
|
||||
|
||||
LookaheadSet tokens;
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
bool conflicts_with_other_tokens = false;
|
||||
if (!non_conflicting_tokens.contains(token)) {
|
||||
non_conflicting_tokens.for_each([&](Symbol other_token) {
|
||||
if (!coincident_tokens_by_token[token.index].contains(other_token) &&
|
||||
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
|
||||
conflicts_with_other_tokens = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
if (!conflicts_with_other_tokens) {
|
||||
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -239,6 +262,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
});
|
||||
|
||||
// If the item is unfinished, create a new item by advancing one symbol.
|
||||
|
|
@ -490,8 +515,10 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
|
||||
// Do not add a token if it conflicts with an existing token.
|
||||
if (!new_token.is_built_in()) {
|
||||
for (Symbol incompatible_token : lex_table_builder->get_incompatible_tokens(new_token.index)) {
|
||||
if (state.terminal_entries.count(incompatible_token)) return false;
|
||||
for (const auto &entry : state.terminal_entries) {
|
||||
if (lex_table_builder->get_conflict_status(entry.first, new_token) & CannotDistinguish) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -835,13 +862,15 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
|
||||
if (!left_tokens.empty() && !right_tokens.empty()) {
|
||||
left_tokens.for_each([&](Symbol left_symbol) {
|
||||
if (!left_symbol.is_non_terminal() && !left_symbol.is_built_in()) {
|
||||
if (left_symbol.is_terminal() && !left_symbol.is_built_in()) {
|
||||
right_tokens.for_each([&](Symbol right_symbol) {
|
||||
if (right_symbol.is_terminal() && !right_symbol.is_built_in()) {
|
||||
following_tokens_by_token[left_symbol].insert(right_symbol);
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -399,7 +399,7 @@ class CCodeGenerator {
|
|||
}
|
||||
|
||||
void add_parse_table() {
|
||||
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
|
||||
add_parse_action_list_id(ParseTableEntry{ {}, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
|
||||
|
|
@ -623,10 +623,12 @@ class CCodeGenerator {
|
|||
indent([&]() {
|
||||
for (const auto &pair : parse_table_entries) {
|
||||
size_t index = pair.first;
|
||||
line("[" + to_string(index) + "] = {.count = " +
|
||||
to_string(pair.second.actions.size()) + ", .reusable = " +
|
||||
_boolean(pair.second.reusable) + ", .depends_on_lookahead = " +
|
||||
_boolean(pair.second.depends_on_lookahead) + "},");
|
||||
line(
|
||||
"[" + to_string(index) + "] = {"
|
||||
".count = " + to_string(pair.second.actions.size()) + ", "
|
||||
".reusable = " + _boolean(pair.second.reusable) +
|
||||
"},"
|
||||
);
|
||||
|
||||
for (const ParseAction &action : pair.second.actions) {
|
||||
add(" ");
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
|
||||
using std::move;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using std::unordered_set;
|
||||
|
|
@ -112,7 +113,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (!result.error_message.empty()) {
|
||||
return "Invalid token content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::token(result.rule));
|
||||
return Rule(Metadata::token(move(result.rule)));
|
||||
}
|
||||
|
||||
if (type == "PATTERN") {
|
||||
|
|
@ -153,7 +154,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::prec(precedence_json.u.integer, result.rule));
|
||||
return Rule(Metadata::prec(precedence_json.u.integer, move(result.rule)));
|
||||
}
|
||||
|
||||
if (type == "PREC_LEFT") {
|
||||
|
|
@ -167,7 +168,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::prec_left(precedence_json.u.integer, result.rule));
|
||||
return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.rule)));
|
||||
}
|
||||
|
||||
if (type == "PREC_RIGHT") {
|
||||
|
|
@ -181,7 +182,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
|
||||
return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.rule)));
|
||||
}
|
||||
|
||||
if (type == "PREC_DYNAMIC") {
|
||||
|
|
@ -195,7 +196,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
|
||||
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.rule)));
|
||||
}
|
||||
|
||||
if (type == "ALIAS") {
|
||||
|
|
@ -217,7 +218,7 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
return Rule(Metadata::alias(
|
||||
string(value_json.u.string.ptr),
|
||||
is_named_json.u.boolean,
|
||||
result.rule
|
||||
move(result.rule)
|
||||
));
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -103,20 +103,14 @@ bool ParseAction::operator<(const ParseAction &other) const {
|
|||
return alias_sequence_id < other.alias_sequence_id;
|
||||
}
|
||||
|
||||
ParseTableEntry::ParseTableEntry()
|
||||
: reusable(true), depends_on_lookahead(false) {}
|
||||
ParseTableEntry::ParseTableEntry() : reusable(true) {}
|
||||
|
||||
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions,
|
||||
bool reusable, bool depends_on_lookahead)
|
||||
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions, bool reusable)
|
||||
: actions(actions),
|
||||
reusable(reusable),
|
||||
depends_on_lookahead(depends_on_lookahead) {}
|
||||
reusable(reusable) {}
|
||||
|
||||
bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
|
||||
return
|
||||
actions == other.actions &&
|
||||
reusable == other.reusable &&
|
||||
depends_on_lookahead == other.depends_on_lookahead;
|
||||
return actions == other.actions && reusable == other.reusable;
|
||||
}
|
||||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ struct ParseAction {
|
|||
|
||||
struct ParseTableEntry {
|
||||
ParseTableEntry();
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool, bool);
|
||||
ParseTableEntry(const std::vector<ParseAction> &, bool);
|
||||
bool operator==(const ParseTableEntry &other) const;
|
||||
inline bool operator!=(const ParseTableEntry &other) const {
|
||||
return !operator==(other);
|
||||
|
|
@ -57,7 +57,6 @@ struct ParseTableEntry {
|
|||
|
||||
std::vector<ParseAction> actions;
|
||||
bool reusable;
|
||||
bool depends_on_lookahead;
|
||||
};
|
||||
|
||||
struct ParseState {
|
||||
|
|
|
|||
|
|
@ -114,7 +114,13 @@ class TokenExtractor {
|
|||
|
||||
[this](const rules::Metadata &rule) -> Rule {
|
||||
if (rule.params.is_token) {
|
||||
return extract_token(*rule.rule, VariableTypeAuxiliary);
|
||||
rules::Metadata metadata{*rule.rule, rule.params};
|
||||
metadata.params.is_token = false;
|
||||
if (metadata.params == rules::MetadataParams{}) {
|
||||
return extract_token(*metadata.rule, VariableTypeAuxiliary);
|
||||
} else {
|
||||
return extract_token(metadata, VariableTypeAuxiliary);
|
||||
}
|
||||
} else {
|
||||
return rules::Metadata{apply(*rule.rule), rule.params};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -138,9 +138,15 @@ bool Rule::is<Symbol>() const { return type == SymbolType; }
|
|||
template <>
|
||||
bool Rule::is<Repeat>() const { return type == RepeatType; }
|
||||
|
||||
template <>
|
||||
bool Rule::is<Metadata>() const { return type == MetadataType; }
|
||||
|
||||
template <>
|
||||
const Symbol & Rule::get_unchecked<Symbol>() const { return symbol_; }
|
||||
|
||||
template <>
|
||||
const Metadata & Rule::get_unchecked<Metadata>() const { return metadata_; }
|
||||
|
||||
static inline void add_choice_element(std::vector<Rule> *elements, const Rule &new_rule) {
|
||||
new_rule.match(
|
||||
[elements](Choice choice) {
|
||||
|
|
@ -284,4 +290,4 @@ size_t hash<Rule>::operator()(const Rule &rule) const {
|
|||
}
|
||||
}
|
||||
|
||||
} // namespace std
|
||||
} // namespace std
|
||||
|
|
|
|||
|
|
@ -30,72 +30,125 @@ bool Metadata::operator==(const Metadata &other) const {
|
|||
return rule->operator==(*other.rule) && params == other.params;
|
||||
}
|
||||
|
||||
Metadata Metadata::token(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.is_token = true;
|
||||
return Metadata{rule, params};
|
||||
template <typename T>
|
||||
static Metadata add_metadata(Rule &&rule, T &&callback) {
|
||||
if (rule.is<Metadata>()) {
|
||||
Metadata metadata = rule.get_unchecked<Metadata>();
|
||||
callback(metadata.params);
|
||||
return metadata;
|
||||
} else {
|
||||
MetadataParams params;
|
||||
callback(params);
|
||||
return Metadata{move(rule), params};
|
||||
}
|
||||
}
|
||||
|
||||
Metadata Metadata::active_prec(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.is_active = true;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::merge(Rule &&rule, MetadataParams new_params) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (new_params.has_precedence && !params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = new_params.precedence;
|
||||
}
|
||||
|
||||
if (new_params.has_associativity && !params.has_associativity) {
|
||||
params.has_associativity = true;
|
||||
params.associativity = new_params.associativity;
|
||||
}
|
||||
|
||||
if (new_params.dynamic_precedence != 0) {
|
||||
params.dynamic_precedence = new_params.dynamic_precedence;
|
||||
}
|
||||
|
||||
if (new_params.is_string) params.is_string = true;
|
||||
if (new_params.is_active) params.is_active = true;
|
||||
if (new_params.is_main_token) params.is_main_token = true;
|
||||
|
||||
if (!new_params.alias.value.empty()) {
|
||||
params.alias = new_params.alias;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::prec(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::token(Rule &&rule) {
|
||||
return add_metadata(move(rule), [](MetadataParams ¶ms) {
|
||||
params.is_token = true;
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_left(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityLeft;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::active_prec(int precedence, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.is_active = true;
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_right(int precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityRight;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::prec(int precedence, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (!params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_dynamic(int dynamic_precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.dynamic_precedence = dynamic_precedence;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::prec_left(int precedence, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (!params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
}
|
||||
if (!params.has_associativity) {
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityLeft;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::separator(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = INT_MIN;
|
||||
params.is_active = true;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::prec_right(int precedence, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (!params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = precedence;
|
||||
}
|
||||
if (!params.has_associativity) {
|
||||
params.has_associativity = true;
|
||||
params.associativity = AssociativityRight;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::main_token(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
params.precedence = 0;
|
||||
params.is_main_token = true;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::prec_dynamic(int dynamic_precedence, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
params.dynamic_precedence = dynamic_precedence;
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::alias(string &&value, bool is_named, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.alias.value = move(value);
|
||||
params.alias.is_named = is_named;
|
||||
return Metadata{rule, params};
|
||||
Metadata Metadata::separator(Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (!params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = INT_MIN;
|
||||
}
|
||||
params.is_active = true;
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::main_token(Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
if (!params.has_precedence) {
|
||||
params.has_precedence = true;
|
||||
params.precedence = 0;
|
||||
}
|
||||
params.is_main_token = true;
|
||||
});
|
||||
}
|
||||
|
||||
Metadata Metadata::alias(string &&value, bool is_named, Rule &&rule) {
|
||||
return add_metadata(move(rule), [&](MetadataParams ¶ms) {
|
||||
params.alias.value = move(value);
|
||||
params.alias.is_named = is_named;
|
||||
});
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
|
|
|
|||
|
|
@ -62,15 +62,16 @@ struct Metadata {
|
|||
|
||||
Metadata(const Rule &rule, MetadataParams params);
|
||||
|
||||
static Metadata token(const Rule &rule);
|
||||
static Metadata active_prec(int precedence, const Rule &rule);
|
||||
static Metadata prec(int precedence, const Rule &rule);
|
||||
static Metadata prec_left(int precedence, const Rule &rule);
|
||||
static Metadata prec_right(int precedence, const Rule &rule);
|
||||
static Metadata prec_dynamic(int precedence, const Rule &rule);
|
||||
static Metadata separator(const Rule &rule);
|
||||
static Metadata main_token(const Rule &rule);
|
||||
static Metadata alias(std::string &&value, bool is_named, const Rule &rule);
|
||||
static Metadata merge(Rule &&rule, MetadataParams params);
|
||||
static Metadata token(Rule &&rule);
|
||||
static Metadata active_prec(int precedence, Rule &&rule);
|
||||
static Metadata prec(int precedence, Rule &&rule);
|
||||
static Metadata prec_left(int precedence, Rule &&rule);
|
||||
static Metadata prec_right(int precedence, Rule &&rule);
|
||||
static Metadata prec_dynamic(int precedence, Rule &&rule);
|
||||
static Metadata separator(Rule &&rule);
|
||||
static Metadata main_token(Rule &&rule);
|
||||
static Metadata alias(std::string &&value, bool is_named, Rule &&rule);
|
||||
|
||||
bool operator==(const Metadata &other) const;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
|||
const TSParseActionEntry *entry = &self->parse_actions[action_index];
|
||||
result->action_count = entry->count;
|
||||
result->is_reusable = entry->reusable;
|
||||
result->depends_on_lookahead = entry->depends_on_lookahead;
|
||||
result->actions = (const TSParseAction *)(entry + 1);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,7 +12,6 @@ typedef struct {
|
|||
const TSParseAction *actions;
|
||||
uint32_t action_count;
|
||||
bool is_reusable;
|
||||
bool depends_on_lookahead;
|
||||
} TableEntry;
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *, TSStateId, TSSymbol, TableEntry *);
|
||||
|
|
|
|||
|
|
@ -487,16 +487,7 @@ static bool parser__can_reuse_first_leaf(Parser *self, TSStateId state, Tree *tr
|
|||
|
||||
// If the current state allows external tokens or other tokens that conflict with this
|
||||
// token, this token is not reusable.
|
||||
if (current_lex_mode.external_lex_state != 0 ||
|
||||
!table_entry->is_reusable) return false;
|
||||
|
||||
// If the current state allows other tokens of which this token is a *prefix* and the
|
||||
// content *after* this token has changed, this token isn't reusable.
|
||||
if (table_entry->depends_on_lookahead &&
|
||||
((tree->child_count <= 1 || tree->error_cost > 0) &&
|
||||
(next_reusable_node && reusable_node_has_leading_changes(next_reusable_node)))) return false;
|
||||
|
||||
return true;
|
||||
return current_lex_mode.external_lex_state == 0 && table_entry->is_reusable;
|
||||
}
|
||||
|
||||
static Tree *parser__get_lookahead(Parser *self, StackVersion version, TSStateId *state,
|
||||
|
|
|
|||
|
|
@ -36,16 +36,6 @@ static inline ReusableNode reusable_node_after_leaf(const ReusableNode *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
static inline bool reusable_node_has_leading_changes(const ReusableNode *self) {
|
||||
Tree *tree = self->tree;
|
||||
while (tree->has_changes) {
|
||||
if (tree->child_count == 0) return false;
|
||||
tree = tree->children[0];
|
||||
if (tree->size.bytes == 0) return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
static inline bool reusable_node_breakdown(ReusableNode *self) {
|
||||
if (self->tree->child_count == 0) {
|
||||
return false;
|
||||
|
|
|
|||
|
|
@ -1,85 +0,0 @@
|
|||
#include "test_helper.h"
|
||||
#include "helpers/stream_methods.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
||||
using namespace rules;
|
||||
using namespace build_tables;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("LexConflictManager::resolve(new_action, old_action)", []() {
|
||||
LexConflictManager conflict_manager;
|
||||
bool update;
|
||||
Symbol sym1 = Symbol::terminal(0);
|
||||
Symbol sym2 = Symbol::terminal(1);
|
||||
Symbol sym3 = Symbol::terminal(2);
|
||||
Symbol sym4 = Symbol::terminal(3);
|
||||
LexItemSet item_set({ LexItem(sym4, Blank{} )});
|
||||
|
||||
before_each([&]() {
|
||||
conflict_manager = LexConflictManager();
|
||||
});
|
||||
|
||||
it("favors advance actions over empty accept token actions", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(2, {0, 0}, true), AcceptTokenAction());
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
describe("accept-token/accept-token conflicts", [&]() {
|
||||
describe("when the tokens' precedence values differ", [&]() {
|
||||
it("favors the token with higher precedence", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 2, false), AcceptTokenAction(sym2, 1, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
|
||||
it("adds the preferred token as a possible homonym for the discarded one", [&]() {
|
||||
conflict_manager.resolve(AcceptTokenAction(sym2, 1, false), AcceptTokenAction(sym1, 2, false));
|
||||
AssertThat(conflict_manager.possible_homonyms[sym2.index], Contains(sym1.index));
|
||||
});
|
||||
});
|
||||
|
||||
describe("when one token is string-based and the other is regexp-based", [&]() {
|
||||
it("favors the string-based token", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, true));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, true), AcceptTokenAction(sym1, 0, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the tokens have equal precedence", [&]() {
|
||||
it("favors the token listed earlier in the grammar", [&]() {
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym2, 0, false), AcceptTokenAction(sym1, 0, false));
|
||||
AssertThat(update, IsFalse());
|
||||
|
||||
update = conflict_manager.resolve(AcceptTokenAction(sym1, 0, false), AcceptTokenAction(sym2, 0, false));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
describe("advance/accept-token conflicts", [&]() {
|
||||
describe("when the token to accept has higher precedence", [&]() {
|
||||
it("prefers the accept-token action", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 3, true));
|
||||
AssertThat(update, IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
describe("when the token to accept does not have a higher precedence", [&]() {
|
||||
it("favors the advance action", [&]() {
|
||||
update = conflict_manager.resolve(item_set, AdvanceAction(1, { 1, 2 }, true), AcceptTokenAction(sym3, 2, true));
|
||||
AssertThat(update, IsTrue());
|
||||
});
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -61,11 +61,8 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
});
|
||||
|
||||
it("marks transitions that are within the main token (as opposed to separators)", [&]() {
|
||||
MetadataParams params;
|
||||
params.is_main_token = true;
|
||||
|
||||
LexItemSet item_set({
|
||||
LexItem(Symbol::non_terminal(1), Metadata{CharacterSet{{'x'}}, params}),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::main_token(CharacterSet{{'x'}})),
|
||||
});
|
||||
|
||||
AssertThat(
|
||||
|
|
@ -75,7 +72,7 @@ describe("LexItemSet::transitions()", [&]() {
|
|||
CharacterSet({'x'}),
|
||||
Transition{
|
||||
LexItemSet({
|
||||
LexItem(Symbol::non_terminal(1), Metadata{Blank{}, params}),
|
||||
LexItem(Symbol::non_terminal(1), Metadata::active_prec(0, Metadata::main_token(Blank{}))),
|
||||
}),
|
||||
PrecedenceRange(),
|
||||
true
|
||||
|
|
|
|||
|
|
@ -1,21 +0,0 @@
|
|||
#include "test_helper.h"
|
||||
#include "compiler/rule.h"
|
||||
|
||||
using namespace rules;
|
||||
|
||||
START_TEST
|
||||
|
||||
describe("Repeat", []() {
|
||||
describe("constructing repeats", [&]() {
|
||||
it("doesn't create redundant repeats", [&]() {
|
||||
Rule symbol = Symbol::non_terminal(1);
|
||||
Rule repeat = Rule::repeat(Rule(symbol));
|
||||
Rule outer_repeat = Rule::repeat(Rule(repeat));
|
||||
|
||||
AssertThat(repeat, !Equals(symbol));
|
||||
AssertThat(outer_repeat, Equals(repeat));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -5,7 +5,26 @@ using namespace rules;
|
|||
|
||||
START_TEST
|
||||
|
||||
describe("Choice", []() {
|
||||
describe("Repeat", []() {
|
||||
describe("constructing repeats", [&]() {
|
||||
it("doesn't create redundant repeats", [&]() {
|
||||
Rule symbol = Symbol::non_terminal(1);
|
||||
Rule repeat = Rule::repeat(Rule(symbol));
|
||||
Rule outer_repeat = Rule::repeat(Rule(repeat));
|
||||
|
||||
AssertThat(repeat, !Equals(symbol));
|
||||
AssertThat(outer_repeat, Equals(repeat));
|
||||
});
|
||||
});
|
||||
|
||||
describe("adding metadata to rules", [&]() {
|
||||
it("doesn't create redundant metadata rules", [&]() {
|
||||
Rule symbol = Symbol::non_terminal(1);
|
||||
Rule outer_rule = Metadata::prec(2, Metadata::prec(1, Rule(symbol)));
|
||||
AssertThat(outer_rule, Equals(Rule(Metadata::prec(1, Rule(symbol)))));
|
||||
});
|
||||
});
|
||||
|
||||
describe("constructing choices", [&]() {
|
||||
it("eliminates duplicate members", [&]() {
|
||||
Rule rule = Rule::choice({
|
||||
|
|
@ -111,7 +111,12 @@ ostream &operator<<(ostream &stream, const Repeat &rule) {
|
|||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Metadata &rule) {
|
||||
return stream << "(Metadata " << *rule.rule << ")";
|
||||
stream << "(Metadata";
|
||||
if (rule.params.has_precedence) stream << " prec=" << to_string(rule.params.precedence);
|
||||
if (rule.params.has_associativity) stream << " assoc=" << rule.params.associativity;
|
||||
if (rule.params.is_token) stream << " token";
|
||||
if (rule.params.is_main_token) stream << " main";
|
||||
return stream << " " << *rule.rule << ")";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Rule &rule) {
|
||||
|
|
@ -205,6 +210,7 @@ ostream &operator<<(ostream &stream, const LookaheadSet &lookaheads) {
|
|||
stream << "(LookaheadSet";
|
||||
lookaheads.for_each([&stream](Symbol symbol) {
|
||||
stream << " " << symbol;
|
||||
return true;
|
||||
});
|
||||
return stream << ")";
|
||||
}
|
||||
|
|
|
|||
|
|
@ -37,7 +37,6 @@
|
|||
'externals/crypto-algorithms',
|
||||
],
|
||||
'sources': [
|
||||
'test/compiler/build_tables/lex_conflict_manager_test.cc',
|
||||
'test/compiler/build_tables/lex_item_test.cc',
|
||||
'test/compiler/build_tables/parse_item_set_builder_test.cc',
|
||||
'test/compiler/build_tables/rule_can_be_blank_test.cc',
|
||||
|
|
@ -49,8 +48,7 @@
|
|||
'test/compiler/prepare_grammar/intern_symbols_test.cc',
|
||||
'test/compiler/prepare_grammar/parse_regex_test.cc',
|
||||
'test/compiler/rules/character_set_test.cc',
|
||||
'test/compiler/rules/choice_test.cc',
|
||||
'test/compiler/rules/repeat_test.cc',
|
||||
'test/compiler/rules/rule_test.cc',
|
||||
'test/compiler/util/string_helpers_test.cc',
|
||||
'test/helpers/encoding_helpers.cc',
|
||||
'test/helpers/file_helpers.cc',
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue