Merge pull request #87 from tree-sitter/dynamic-precedence
Introduce rule for resolving runtime ambiguities
This commit is contained in:
commit
43d347c225
36 changed files with 689 additions and 364 deletions
2
.gitmodules
vendored
2
.gitmodules
vendored
|
|
@ -12,4 +12,4 @@
|
|||
url = https://github.com/udp/json-parser.git
|
||||
[submodule "externals/crypto-algorithms"]
|
||||
path = externals/crypto-algorithms
|
||||
url = https://github.com/B-Con/crypto-algorithms.git
|
||||
url = https://github.com/maxbrunsfeld/crypto-algorithms.git
|
||||
|
|
|
|||
|
|
@ -182,41 +182,7 @@
|
|||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"prec-left-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC_LEFT$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"prec-right-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^PREC_RIGHT$"
|
||||
"pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
|
||||
},
|
||||
"value": {
|
||||
"type": "integer"
|
||||
|
|
@ -239,9 +205,7 @@
|
|||
{ "$ref": "#/definitions/repeat1-rule" },
|
||||
{ "$ref": "#/definitions/repeat-rule" },
|
||||
{ "$ref": "#/definitions/token-rule" },
|
||||
{ "$ref": "#/definitions/prec-rule" },
|
||||
{ "$ref": "#/definitions/prec-left-rule" },
|
||||
{ "$ref": "#/definitions/prec-right-rule" }
|
||||
{ "$ref": "#/definitions/prec-rule" }
|
||||
]
|
||||
}
|
||||
}
|
||||
|
|
|
|||
2
externals/crypto-algorithms
vendored
2
externals/crypto-algorithms
vendored
|
|
@ -1 +1 @@
|
|||
Subproject commit cfbde48414baacf51fc7c74f275190881f037d32
|
||||
Subproject commit c7e5c23ab04ecfb5465cbefbe17ba23d4cb3bc9d
|
||||
|
|
@ -42,6 +42,7 @@ typedef struct {
|
|||
union {
|
||||
TSStateId to_state;
|
||||
struct {
|
||||
short dynamic_precedence;
|
||||
TSSymbol symbol;
|
||||
unsigned short child_count;
|
||||
};
|
||||
|
|
@ -145,21 +146,30 @@ typedef struct TSLanguage {
|
|||
{ .type = TSParseActionTypeShift, .extra = true } \
|
||||
}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val) \
|
||||
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.params = {.symbol = symbol_val, .child_count = child_count_val } \
|
||||
.params = { \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
.dynamic_precedence = dynamic_precedence_val, \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REDUCE_FRAGILE(symbol_val, child_count_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, .fragile = true, \
|
||||
.params = {.symbol = symbol_val, .child_count = child_count_val } \
|
||||
} \
|
||||
}
|
||||
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.fragile = true, \
|
||||
.params = { \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
.dynamic_precedence = dynamic_precedence_val, \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ \
|
||||
|
|
|
|||
|
|
@ -9,7 +9,7 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 2
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 3
|
||||
|
||||
typedef unsigned short TSSymbol;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@
|
|||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <set>
|
||||
#include <deque>
|
||||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
|
|
@ -16,11 +17,13 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::deque;
|
||||
using std::find;
|
||||
using std::pair;
|
||||
using std::vector;
|
||||
using std::set;
|
||||
using std::map;
|
||||
using std::move;
|
||||
using std::string;
|
||||
using std::to_string;
|
||||
using std::unordered_map;
|
||||
|
|
@ -28,75 +31,77 @@ using rules::Associativity;
|
|||
using rules::Symbol;
|
||||
using rules::END_OF_INPUT;
|
||||
|
||||
using SymbolSequence = vector<Symbol>;
|
||||
|
||||
struct ParseStateQueueEntry {
|
||||
SymbolSequence preceding_symbols;
|
||||
ParseItemSet item_set;
|
||||
ParseStateId state_id;
|
||||
};
|
||||
|
||||
class ParseTableBuilder {
|
||||
const SyntaxGrammar grammar;
|
||||
const LexicalGrammar lexical_grammar;
|
||||
unordered_map<Symbol, ParseItemSet> recovery_states;
|
||||
unordered_map<ParseItemSet, ParseStateId> parse_state_ids;
|
||||
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
|
||||
deque<ParseStateQueueEntry> parse_state_queue;
|
||||
ParseTable parse_table;
|
||||
set<string> conflicts;
|
||||
ParseItemSetBuilder item_set_builder;
|
||||
set<const Production *> fragile_productions;
|
||||
vector<set<Symbol>> incompatible_tokens_by_index;
|
||||
bool allow_any_conflict;
|
||||
vector<set<Symbol::Index>> following_terminals_by_terminal_index;
|
||||
bool processing_recovery_states;
|
||||
|
||||
public:
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar,
|
||||
const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
allow_any_conflict(false) {}
|
||||
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar)
|
||||
: grammar(grammar),
|
||||
lexical_grammar(lex_grammar),
|
||||
item_set_builder(grammar, lex_grammar),
|
||||
incompatible_tokens_by_index(lexical_grammar.variables.size()),
|
||||
following_terminals_by_terminal_index(lexical_grammar.variables.size()),
|
||||
processing_recovery_states(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol::terminal(0) :
|
||||
Symbol::non_terminal(0);
|
||||
Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0};
|
||||
|
||||
Production start_production{
|
||||
ProductionStep{start_symbol, 0, rules::AssociativityNone},
|
||||
};
|
||||
|
||||
// Placeholder for error state
|
||||
add_parse_state(ParseItemSet());
|
||||
|
||||
add_parse_state(ParseItemSet({
|
||||
ParseStateId error_state_id = add_parse_state({}, ParseItemSet());
|
||||
add_parse_state({}, ParseItemSet({
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({ END_OF_INPUT() }),
|
||||
LookaheadSet({END_OF_INPUT()}),
|
||||
},
|
||||
}));
|
||||
|
||||
CompileError error = process_part_state_queue();
|
||||
if (error.type != TSCompileErrorTypeNone) {
|
||||
return { parse_table, error };
|
||||
}
|
||||
if (error.type != TSCompileErrorTypeNone) return {parse_table, error};
|
||||
|
||||
compute_unmergable_token_pairs();
|
||||
|
||||
build_error_parse_state();
|
||||
|
||||
allow_any_conflict = true;
|
||||
processing_recovery_states = true;
|
||||
build_error_parse_state(error_state_id);
|
||||
process_part_state_queue();
|
||||
allow_any_conflict = false;
|
||||
|
||||
mark_fragile_actions();
|
||||
remove_duplicate_parse_states();
|
||||
|
||||
return { parse_table, CompileError::none() };
|
||||
return {parse_table, CompileError::none()};
|
||||
}
|
||||
|
||||
private:
|
||||
CompileError process_part_state_queue() {
|
||||
while (!item_sets_to_process.empty()) {
|
||||
auto pair = item_sets_to_process.back();
|
||||
ParseItemSet &item_set = pair.first;
|
||||
ParseStateId state_id = pair.second;
|
||||
item_sets_to_process.pop_back();
|
||||
while (!parse_state_queue.empty()) {
|
||||
auto entry = parse_state_queue.front();
|
||||
parse_state_queue.pop_front();
|
||||
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
string conflict = add_actions(item_set, state_id);
|
||||
item_set_builder.apply_transitive_closure(&entry.item_set);
|
||||
string conflict = add_actions(
|
||||
move(entry.preceding_symbols),
|
||||
move(entry.item_set),
|
||||
entry.state_id
|
||||
);
|
||||
|
||||
if (!conflict.empty()) {
|
||||
return CompileError(TSCompileErrorTypeParseConflict, conflict);
|
||||
|
|
@ -106,7 +111,7 @@ class ParseTableBuilder {
|
|||
return CompileError::none();
|
||||
}
|
||||
|
||||
void build_error_parse_state() {
|
||||
void build_error_parse_state(ParseStateId state_id) {
|
||||
ParseState error_state;
|
||||
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
|
|
@ -141,14 +146,14 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
|
||||
parse_table.states[0] = error_state;
|
||||
parse_table.states[state_id] = error_state;
|
||||
}
|
||||
|
||||
void add_out_of_context_parse_state(ParseState *error_state,
|
||||
const rules::Symbol &symbol) {
|
||||
const ParseItemSet &item_set = recovery_states[symbol];
|
||||
if (!item_set.entries.empty()) {
|
||||
ParseStateId state = add_parse_state(item_set);
|
||||
ParseStateId state = add_parse_state({}, item_set);
|
||||
if (symbol.is_non_terminal()) {
|
||||
error_state->nonterminal_entries[symbol.index] = state;
|
||||
} else {
|
||||
|
|
@ -157,21 +162,24 @@ class ParseTableBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
ParseStateId state_id = parse_table.states.size();
|
||||
parse_table.states.push_back(ParseState());
|
||||
parse_state_ids[item_set] = state_id;
|
||||
parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature();
|
||||
item_sets_to_process.push_back({ std::move(item_set), state_id });
|
||||
parse_state_queue.push_back({
|
||||
move(preceding_symbols),
|
||||
move(item_set),
|
||||
state_id
|
||||
});
|
||||
return state_id;
|
||||
} else {
|
||||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
string add_actions(SymbolSequence &&sequence, ParseItemSet &&item_set, ParseStateId state_id) {
|
||||
map<Symbol, ParseItemSet> terminal_successors;
|
||||
map<Symbol::Index, ParseItemSet> nonterminal_successors;
|
||||
set<Symbol> lookaheads_with_conflicts;
|
||||
|
|
@ -197,7 +205,7 @@ class ParseTableBuilder {
|
|||
parse_table.add_terminal_action(state_id, lookahead, action);
|
||||
} else {
|
||||
ParseAction &existing_action = entry.actions[0];
|
||||
if (existing_action.type == ParseActionTypeAccept || allow_any_conflict) {
|
||||
if (existing_action.type == ParseActionTypeAccept || processing_recovery_states) {
|
||||
entry.actions.push_back(action);
|
||||
} else {
|
||||
int existing_precedence = existing_action.precedence();
|
||||
|
|
@ -236,29 +244,31 @@ class ParseTableBuilder {
|
|||
for (auto &pair : terminal_successors) {
|
||||
Symbol lookahead = pair.first;
|
||||
ParseItemSet &next_item_set = pair.second;
|
||||
ParseStateId next_state_id = add_parse_state(next_item_set);
|
||||
ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set);
|
||||
ParseState &state = parse_table.states[state_id];
|
||||
bool had_existing_action = !state.terminal_entries[lookahead].actions.empty();
|
||||
parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id));
|
||||
if (!allow_any_conflict) {
|
||||
if (had_existing_action)
|
||||
if (!processing_recovery_states) {
|
||||
if (had_existing_action) {
|
||||
lookaheads_with_conflicts.insert(lookahead);
|
||||
}
|
||||
recovery_states[lookahead].add(next_item_set);
|
||||
}
|
||||
}
|
||||
|
||||
// Add a Shift action for each non-terminal transition.
|
||||
for (auto &pair : nonterminal_successors) {
|
||||
Symbol::Index lookahead = pair.first;
|
||||
Symbol lookahead = Symbol::non_terminal(pair.first);
|
||||
ParseItemSet &next_item_set = pair.second;
|
||||
ParseStateId next_state = add_parse_state(next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
|
||||
if (!allow_any_conflict)
|
||||
recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set);
|
||||
ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set);
|
||||
parse_table.set_nonterminal_action(state_id, lookahead.index, next_state_id);
|
||||
if (!processing_recovery_states) {
|
||||
recovery_states[lookahead].add(next_item_set);
|
||||
}
|
||||
}
|
||||
|
||||
for (Symbol lookahead : lookaheads_with_conflicts) {
|
||||
string conflict = handle_conflict(item_set, state_id, lookahead);
|
||||
string conflict = handle_conflict(item_set, sequence, state_id, lookahead);
|
||||
if (!conflict.empty()) return conflict;
|
||||
}
|
||||
|
||||
|
|
@ -266,7 +276,7 @@ class ParseTableBuilder {
|
|||
ParseState &state = parse_table.states[state_id];
|
||||
for (const Symbol &extra_symbol : grammar.extra_tokens) {
|
||||
if (!state.terminal_entries.count(extra_symbol) ||
|
||||
state.has_shift_action() || allow_any_conflict) {
|
||||
state.has_shift_action() || processing_recovery_states) {
|
||||
parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
|
||||
}
|
||||
}
|
||||
|
|
@ -281,9 +291,10 @@ class ParseTableBuilder {
|
|||
|
||||
for (ParseAction &action : actions) {
|
||||
if (action.type == ParseActionTypeReduce) {
|
||||
if (has_fragile_production(action.production))
|
||||
if (has_fragile_production(action.production)) {
|
||||
action.fragile = true;
|
||||
action.production = NULL;
|
||||
}
|
||||
action.production = nullptr;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -305,8 +316,6 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void compute_unmergable_token_pairs() {
|
||||
incompatible_tokens_by_index.resize(lexical_grammar.variables.size());
|
||||
|
||||
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
|
||||
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
|
|
@ -314,7 +323,7 @@ class ParseTableBuilder {
|
|||
|
||||
for (unsigned j = 0; j < n; j++) {
|
||||
if (i == j) continue;
|
||||
if (lex_table_builder->detect_conflict(i, j)) {
|
||||
if (lex_table_builder->detect_conflict(i, j, following_terminals_by_terminal_index)) {
|
||||
incompatible_indices.insert(Symbol::terminal(j));
|
||||
}
|
||||
}
|
||||
|
|
@ -330,11 +339,12 @@ class ParseTableBuilder {
|
|||
}
|
||||
|
||||
void remove_duplicate_parse_states() {
|
||||
map<size_t, set<ParseStateId>> state_indices_by_signature;
|
||||
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
|
||||
|
||||
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
state_indices_by_signature[state.shift_actions_signature].insert(i);
|
||||
for (auto &pair : parse_state_ids) {
|
||||
const ParseItemSet &item_set = pair.first;
|
||||
ParseStateId state_id = pair.second;
|
||||
state_indices_by_signature[item_set.unfinished_item_signature()].insert(state_id);
|
||||
}
|
||||
|
||||
set<ParseStateId> deleted_states;
|
||||
|
|
@ -343,14 +353,18 @@ class ParseTableBuilder {
|
|||
map<ParseStateId, ParseStateId> state_replacements;
|
||||
|
||||
for (auto &pair : state_indices_by_signature) {
|
||||
auto &state_group = pair.second;
|
||||
auto &state_indices = pair.second;
|
||||
|
||||
for (ParseStateId i : state_group) {
|
||||
for (ParseStateId j : state_group) {
|
||||
if (j == i) break;
|
||||
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
|
||||
state_replacements.insert({ i, j });
|
||||
deleted_states.insert(i);
|
||||
for (auto i = state_indices.begin(), end = state_indices.end(); i != end;) {
|
||||
for (ParseStateId j : state_indices) {
|
||||
if (j == *i) {
|
||||
++i;
|
||||
break;
|
||||
}
|
||||
if (!state_replacements.count(j) && merge_parse_state(j, *i)) {
|
||||
state_replacements.insert({*i, j});
|
||||
deleted_states.insert(*i);
|
||||
i = state_indices.erase(i);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
|
@ -360,11 +374,8 @@ class ParseTableBuilder {
|
|||
if (state_replacements.empty()) break;
|
||||
|
||||
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
|
||||
if (state_replacements.count(i)) {
|
||||
state_indices_by_signature[state.shift_actions_signature].erase(i);
|
||||
} else {
|
||||
if (!state_replacements.count(i)) {
|
||||
ParseState &state = parse_table.states[i];
|
||||
state.each_referenced_state([&state_replacements](ParseStateId *state_index) {
|
||||
auto replacement = state_replacements.find(*state_index);
|
||||
if (replacement != state_replacements.end()) {
|
||||
|
|
@ -404,7 +415,7 @@ class ParseTableBuilder {
|
|||
|
||||
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
|
||||
for (const auto &pair : state.terminal_entries)
|
||||
if (pair.second == entry)
|
||||
if (pair.second.actions == entry.actions)
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
|
@ -417,13 +428,12 @@ class ParseTableBuilder {
|
|||
|
||||
for (auto &entry : state.terminal_entries) {
|
||||
Symbol lookahead = entry.first;
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
|
||||
const auto &other_entry = other.terminal_entries.find(lookahead);
|
||||
if (other_entry == other.terminal_entries.end()) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
|
||||
if (!has_entry(other, entry.second)) return false;
|
||||
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
|
||||
if (other.terminal_entries.count(incompatible_token)) return false;
|
||||
|
|
@ -440,10 +450,9 @@ class ParseTableBuilder {
|
|||
Symbol lookahead = entry.first;
|
||||
|
||||
if (!state.terminal_entries.count(lookahead)) {
|
||||
if (lookahead.is_external()) return false;
|
||||
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
|
||||
if (!has_entry(state, entry.second)) return false;
|
||||
|
||||
if (lookahead.is_external()) return false;
|
||||
if (!lookahead.is_built_in()) {
|
||||
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
|
||||
if (state.terminal_entries.count(incompatible_token)) return false;
|
||||
|
|
@ -461,8 +470,8 @@ class ParseTableBuilder {
|
|||
return true;
|
||||
}
|
||||
|
||||
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
|
||||
Symbol lookahead) {
|
||||
string handle_conflict(const ParseItemSet &item_set, const SymbolSequence &preceding_symbols,
|
||||
ParseStateId state_id, Symbol lookahead) {
|
||||
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
|
||||
int reduction_precedence = entry.actions.front().precedence();
|
||||
set<ParseItem> shift_items;
|
||||
|
|
@ -556,24 +565,13 @@ class ParseTableBuilder {
|
|||
if (expected_conflict == actual_conflict)
|
||||
return "";
|
||||
|
||||
ParseItem earliest_starting_item;
|
||||
for (const ParseAction &action : entry.actions)
|
||||
if (action.type == ParseActionTypeReduce)
|
||||
if (action.consumed_symbol_count > earliest_starting_item.step_index)
|
||||
earliest_starting_item = ParseItem(action.symbol, *action.production, action.consumed_symbol_count);
|
||||
|
||||
for (const ParseItem &shift_item : shift_items)
|
||||
if (shift_item.step_index > earliest_starting_item.step_index)
|
||||
earliest_starting_item = shift_item;
|
||||
|
||||
string description = "Unresolved conflict for symbol sequence:\n\n";
|
||||
for (size_t i = 0; i < earliest_starting_item.step_index; i++) {
|
||||
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
|
||||
for (auto &symbol : preceding_symbols) {
|
||||
description += " " + symbol_name(symbol);
|
||||
}
|
||||
|
||||
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
|
||||
description += "\n\n";
|
||||
|
||||
description += "Possible interpretations:\n\n";
|
||||
|
||||
size_t interpretation_count = 1;
|
||||
|
|
@ -581,12 +579,12 @@ class ParseTableBuilder {
|
|||
if (action.type == ParseActionTypeReduce) {
|
||||
description += " " + to_string(interpretation_count++) + ":";
|
||||
|
||||
for (size_t i = 0; i < earliest_starting_item.step_index - action.consumed_symbol_count; i++) {
|
||||
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
|
||||
for (size_t i = 0; i < preceding_symbols.size() - action.consumed_symbol_count; i++) {
|
||||
description += " " + symbol_name(preceding_symbols[i]);
|
||||
}
|
||||
|
||||
description += " (" + symbol_name(action.symbol);
|
||||
for (const ProductionStep &step : *action.production) {
|
||||
for (const ProductionStep &step : action.production->steps) {
|
||||
description += " " + symbol_name(step.symbol);
|
||||
}
|
||||
description += ")";
|
||||
|
|
@ -598,8 +596,8 @@ class ParseTableBuilder {
|
|||
for (const ParseItem &shift_item : shift_items) {
|
||||
description += " " + to_string(interpretation_count++) + ":";
|
||||
|
||||
for (size_t i = 0; i < earliest_starting_item.step_index - shift_item.step_index; i++) {
|
||||
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
|
||||
for (size_t i = 0; i < preceding_symbols.size() - shift_item.step_index; i++) {
|
||||
description += " " + symbol_name(preceding_symbols[i]);
|
||||
}
|
||||
|
||||
description += " (" + symbol_name(shift_item.lhs());
|
||||
|
|
@ -690,6 +688,30 @@ class ParseTableBuilder {
|
|||
bool has_fragile_production(const Production *production) {
|
||||
return fragile_productions.find(production) != fragile_productions.end();
|
||||
}
|
||||
|
||||
SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) {
|
||||
if (!sequence.empty()) {
|
||||
const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back());
|
||||
const LookaheadSet &right_tokens = item_set_builder.get_first_set(symbol);
|
||||
|
||||
if (!left_tokens.empty() && !right_tokens.empty()) {
|
||||
for (const Symbol &left_symbol : *left_tokens.entries) {
|
||||
if (left_symbol.is_terminal() && !left_symbol.is_built_in()) {
|
||||
for (const Symbol &right_symbol : *right_tokens.entries) {
|
||||
if (right_symbol.is_terminal() && !right_symbol.is_built_in()) {
|
||||
following_terminals_by_terminal_index[left_symbol.index].insert(right_symbol.index);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
SymbolSequence result(sequence.size() + 1);
|
||||
result.assign(sequence.begin(), sequence.end());
|
||||
result.push_back(symbol);
|
||||
return result;
|
||||
}
|
||||
};
|
||||
|
||||
pair<ParseTable, CompileError> build_parse_table(
|
||||
|
|
|
|||
|
|
@ -5,6 +5,7 @@
|
|||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include <cwctype>
|
||||
#include <vector>
|
||||
#include "compiler/build_tables/lex_conflict_manager.h"
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
|
|
@ -15,6 +16,7 @@
|
|||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
||||
using std::iswalpha;
|
||||
using std::map;
|
||||
using std::pair;
|
||||
using std::set;
|
||||
|
|
@ -70,14 +72,16 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
LexTable lex_table;
|
||||
const LexicalGrammar grammar;
|
||||
vector<Rule> separator_rules;
|
||||
CharacterSet separator_start_characters;
|
||||
CharacterSet token_start_characters;
|
||||
LexConflictManager conflict_manager;
|
||||
unordered_map<LexItemSet, LexStateId> lex_state_ids;
|
||||
|
||||
public:
|
||||
vector<bool> shadowed_token_indices;
|
||||
map<Symbol::Index, CharacterSet> following_characters_by_token_index;
|
||||
CharacterSet separator_start_characters;
|
||||
CharacterSet current_conflict_detection_following_characters;
|
||||
Symbol::Index current_conflict_detection_token_index;
|
||||
bool current_conflict_value;
|
||||
|
||||
public:
|
||||
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
|
||||
StartingCharacterAggregator separator_character_aggregator;
|
||||
for (const auto &rule : grammar.separators) {
|
||||
|
|
@ -86,20 +90,6 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
}
|
||||
separator_rules.push_back(Blank{});
|
||||
separator_start_characters = separator_character_aggregator.result;
|
||||
|
||||
StartingCharacterAggregator token_start_character_aggregator;
|
||||
for (const auto &variable : grammar.variables) {
|
||||
token_start_character_aggregator.apply(variable.rule);
|
||||
}
|
||||
token_start_characters = token_start_character_aggregator.result;
|
||||
token_start_characters
|
||||
.exclude('a', 'z')
|
||||
.exclude('A', 'Z')
|
||||
.exclude('0', '9')
|
||||
.exclude('_')
|
||||
.exclude('$');
|
||||
|
||||
shadowed_token_indices.resize(grammar.variables.size());
|
||||
}
|
||||
|
||||
LexTable build(ParseTable *parse_table) {
|
||||
|
|
@ -113,7 +103,10 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
return lex_table;
|
||||
}
|
||||
|
||||
bool detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
bool detect_conflict(Symbol::Index left, Symbol::Index right,
|
||||
const vector<set<Symbol::Index>> &following_terminals_by_terminal_index) {
|
||||
clear();
|
||||
|
||||
StartingCharacterAggregator left_starting_characters;
|
||||
StartingCharacterAggregator right_starting_characters;
|
||||
left_starting_characters.apply(grammar.variables[left].rule);
|
||||
|
|
@ -124,12 +117,47 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
return false;
|
||||
}
|
||||
|
||||
clear();
|
||||
map<Symbol, ParseTableEntry> terminals;
|
||||
terminals[Symbol::terminal(left)];
|
||||
terminals[Symbol::terminal(right)];
|
||||
add_lex_state(item_set_for_terminals(terminals));
|
||||
return shadowed_token_indices[right];
|
||||
auto following_characters_entry = following_characters_by_token_index.find(right);
|
||||
if (following_characters_entry == following_characters_by_token_index.end()) {
|
||||
StartingCharacterAggregator aggregator;
|
||||
for (auto following_token_index : following_terminals_by_terminal_index[right]) {
|
||||
aggregator.apply(grammar.variables[following_token_index].rule);
|
||||
}
|
||||
following_characters_entry =
|
||||
following_characters_by_token_index.insert({right, aggregator.result}).first;
|
||||
|
||||
// TODO - Refactor this. In general, a keyword token cannot be followed immediately by
|
||||
// another alphanumeric character. But this requirement is currently not expressed anywhere in
|
||||
// the grammar. So without this hack, we would be overly conservative about merging parse
|
||||
// states because we would often consider `identifier` tokens to *conflict* with keyword
|
||||
// tokens.
|
||||
if (is_keyword(grammar.variables[right])) {
|
||||
following_characters_entry->second
|
||||
.exclude('a', 'z')
|
||||
.exclude('A', 'Z')
|
||||
.exclude('0', '9')
|
||||
.exclude('_')
|
||||
.exclude('$');
|
||||
}
|
||||
}
|
||||
|
||||
current_conflict_detection_token_index = right;
|
||||
current_conflict_detection_following_characters = following_characters_entry->second;
|
||||
add_lex_state(item_set_for_terminals({{Symbol::terminal(left), {}}, {Symbol::terminal(right), {}}}));
|
||||
return current_conflict_value;
|
||||
}
|
||||
|
||||
bool is_keyword(const LexicalVariable &variable) {
|
||||
return variable.is_string && iswalpha(get_last_character(variable.rule));
|
||||
}
|
||||
|
||||
static uint32_t get_last_character(const Rule &rule) {
|
||||
return rule.match(
|
||||
[](const Seq &sequence) { return get_last_character(*sequence.right); },
|
||||
[](const rules::CharacterSet &rule) { return *rule.included_chars.begin(); },
|
||||
[](const rules::Metadata &rule) { return get_last_character(*rule.rule); },
|
||||
[](auto) { return 0; }
|
||||
);
|
||||
}
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
|
|
@ -149,7 +177,8 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
void clear() {
|
||||
lex_table.states.clear();
|
||||
lex_state_ids.clear();
|
||||
shadowed_token_indices.assign(grammar.variables.size(), false);
|
||||
current_conflict_detection_following_characters = CharacterSet();
|
||||
current_conflict_value = false;
|
||||
}
|
||||
|
||||
private:
|
||||
|
|
@ -166,17 +195,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
for (const LexItem &item : transition.destination.entries) {
|
||||
if (item.lhs == accept_action.symbol) {
|
||||
can_advance_for_accepted_token = true;
|
||||
} else if (!prefer_advancing && !transition.in_main_token && !item.lhs.is_built_in()) {
|
||||
shadowed_token_indices[item.lhs.index] = true;
|
||||
} else if (item.lhs.index == current_conflict_detection_token_index &&
|
||||
!prefer_advancing && !transition.in_main_token) {
|
||||
current_conflict_value = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (!can_advance_for_accepted_token) {
|
||||
if (characters.intersects(separator_start_characters) ||
|
||||
(grammar.variables[accept_action.symbol.index].is_string &&
|
||||
characters.intersects(token_start_characters))) {
|
||||
shadowed_token_indices[accept_action.symbol.index] = true;
|
||||
}
|
||||
if (accept_action.symbol.index == current_conflict_detection_token_index &&
|
||||
!can_advance_for_accepted_token &&
|
||||
(characters.intersects(separator_start_characters) ||
|
||||
(characters.intersects(current_conflict_detection_following_characters) &&
|
||||
grammar.variables[accept_action.symbol.index].is_string))) {
|
||||
current_conflict_value = true;
|
||||
}
|
||||
|
||||
if (!prefer_advancing) continue;
|
||||
|
|
@ -346,8 +376,9 @@ LexTable LexTableBuilder::build(ParseTable *parse_table) {
|
|||
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
|
||||
}
|
||||
|
||||
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right);
|
||||
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right,
|
||||
const vector<set<Symbol::Index>> &following_terminals) {
|
||||
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right, following_terminals);
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -2,6 +2,8 @@
|
|||
#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
|
||||
|
||||
#include <memory>
|
||||
#include <vector>
|
||||
#include <set>
|
||||
#include "compiler/lex_table.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -15,7 +17,11 @@ class LexTableBuilder {
|
|||
public:
|
||||
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
|
||||
LexTable build(ParseTable *);
|
||||
bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index);
|
||||
bool detect_conflict(
|
||||
rules::Symbol::Index,
|
||||
rules::Symbol::Index,
|
||||
const std::vector<std::set<rules::Symbol::Index>> &following_terminals_by_terminal_index
|
||||
);
|
||||
protected:
|
||||
LexTableBuilder() = default;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -60,6 +60,10 @@ int ParseItem::precedence() const {
|
|||
}
|
||||
}
|
||||
|
||||
int ParseItem::dynamic_precedence() const {
|
||||
return production->dynamic_precedence;
|
||||
}
|
||||
|
||||
rules::Associativity ParseItem::associativity() const {
|
||||
if (is_done()) {
|
||||
if (production->empty()) {
|
||||
|
|
@ -93,13 +97,12 @@ size_t ParseItemSet::unfinished_item_signature() const {
|
|||
ParseItem previous_item;
|
||||
for (auto &pair : entries) {
|
||||
const ParseItem &item = pair.first;
|
||||
if (item.step_index < item.production->size()) {
|
||||
if (item.variable_index != previous_item.variable_index &&
|
||||
item.step_index != previous_item.step_index) {
|
||||
hash_combine(&result, item.variable_index);
|
||||
hash_combine(&result, item.step_index);
|
||||
previous_item = item;
|
||||
}
|
||||
if (item.step_index < item.production->size() &&
|
||||
(item.variable_index != previous_item.variable_index ||
|
||||
item.step_index != previous_item.step_index)) {
|
||||
hash_combine(&result, item.variable_index);
|
||||
hash_combine(&result, item.step_index);
|
||||
previous_item = item;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -26,6 +26,7 @@ struct ParseItem {
|
|||
rules::Symbol lhs() const;
|
||||
rules::Symbol next_symbol() const;
|
||||
int precedence() const;
|
||||
int dynamic_precedence() const;
|
||||
rules::Associativity associativity() const;
|
||||
bool is_done() const;
|
||||
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
#include <cassert>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
#include <utility>
|
||||
|
|
@ -26,18 +27,20 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
|
||||
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::terminal(i);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
first_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
last_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::external(i);
|
||||
first_sets.insert({symbol, LookaheadSet({ symbol })});
|
||||
first_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
last_sets.insert({symbol, LookaheadSet({symbol})});
|
||||
}
|
||||
|
||||
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
Symbol symbol = Symbol::non_terminal(i);
|
||||
LookaheadSet first_set;
|
||||
|
||||
LookaheadSet first_set;
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push_back(symbol);
|
||||
|
|
@ -57,6 +60,26 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
|
|||
}
|
||||
|
||||
first_sets.insert({symbol, first_set});
|
||||
|
||||
LookaheadSet last_set;
|
||||
processed_non_terminals.clear();
|
||||
symbols_to_process.clear();
|
||||
symbols_to_process.push_back(symbol);
|
||||
while (!symbols_to_process.empty()) {
|
||||
Symbol current_symbol = symbols_to_process.back();
|
||||
symbols_to_process.pop_back();
|
||||
|
||||
if (!current_symbol.is_non_terminal()) {
|
||||
last_set.insert(current_symbol);
|
||||
} else if (processed_non_terminals.insert(current_symbol.index).second) {
|
||||
for (const Production &production : grammar.variables[current_symbol.index].productions) {
|
||||
if (!production.empty()) {
|
||||
symbols_to_process.push_back(production.back().symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
last_sets.insert({symbol, last_set});
|
||||
}
|
||||
|
||||
vector<ParseItemSetComponent> components_to_process;
|
||||
|
|
@ -161,5 +184,9 @@ LookaheadSet ParseItemSetBuilder::get_first_set(const rules::Symbol &symbol) con
|
|||
return first_sets.find(symbol)->second;
|
||||
}
|
||||
|
||||
LookaheadSet ParseItemSetBuilder::get_last_set(const rules::Symbol &symbol) const {
|
||||
return last_sets.find(symbol)->second;
|
||||
}
|
||||
|
||||
} // namespace build_tables
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ class ParseItemSetBuilder {
|
|||
};
|
||||
|
||||
std::map<rules::Symbol, LookaheadSet> first_sets;
|
||||
std::map<rules::Symbol, LookaheadSet> last_sets;
|
||||
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
|
||||
std::vector<std::pair<ParseItem, LookaheadSet>> item_set_buffer;
|
||||
|
||||
|
|
@ -27,6 +28,7 @@ class ParseItemSetBuilder {
|
|||
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
|
||||
void apply_transitive_closure(ParseItemSet *);
|
||||
LookaheadSet get_first_set(const rules::Symbol &) const;
|
||||
LookaheadSet get_last_set(const rules::Symbol &) const;
|
||||
};
|
||||
|
||||
} // namespace build_tables
|
||||
|
|
|
|||
|
|
@ -401,52 +401,106 @@ class CCodeGenerator {
|
|||
add_accept_token_action(lex_state.accept_action);
|
||||
}
|
||||
|
||||
set<uint32_t> ruled_out_characters;
|
||||
for (const auto &pair : lex_state.advance_actions) {
|
||||
if (!pair.first.is_empty()) {
|
||||
_if([&]() { add_character_set_condition(pair.first); },
|
||||
[&]() { add_advance_action(pair.second); });
|
||||
if (pair.first.is_empty()) continue;
|
||||
|
||||
size_t current_length = buffer.size();
|
||||
|
||||
line("if (");
|
||||
if (add_character_set_condition(pair.first, ruled_out_characters)) {
|
||||
add(")");
|
||||
indent([&]() { add_advance_action(pair.second); });
|
||||
ruled_out_characters.insert(pair.first.included_chars.begin(), pair.first.included_chars.end());
|
||||
} else {
|
||||
buffer.resize(current_length);
|
||||
add_advance_action(pair.second);
|
||||
}
|
||||
}
|
||||
|
||||
line("END_STATE();");
|
||||
}
|
||||
|
||||
void add_character_set_condition(const rules::CharacterSet &rule) {
|
||||
bool add_character_set_condition(const rules::CharacterSet &rule, const set<uint32_t> &ruled_out_characters) {
|
||||
if (rule.includes_all) {
|
||||
add("!(");
|
||||
add_character_range_conditions(rule.excluded_ranges());
|
||||
add(")");
|
||||
return add_character_range_conditions(rule.excluded_ranges(), ruled_out_characters, true);
|
||||
} else {
|
||||
add_character_range_conditions(rule.included_ranges());
|
||||
return add_character_range_conditions(rule.included_ranges(), ruled_out_characters, false);
|
||||
}
|
||||
}
|
||||
|
||||
void add_character_range_conditions(const vector<rules::CharacterRange> &ranges) {
|
||||
if (ranges.size() == 1) {
|
||||
add_character_range_condition(*ranges.begin());
|
||||
} else {
|
||||
bool first = true;
|
||||
for (const auto &range : ranges) {
|
||||
if (!first) {
|
||||
add(" ||");
|
||||
line(" ");
|
||||
bool add_character_range_conditions(const vector<rules::CharacterRange> &ranges,
|
||||
const set<uint32_t> &ruled_out_characters,
|
||||
bool is_negated) {
|
||||
bool first = true;
|
||||
for (auto iter = ranges.begin(), end = ranges.end(); iter != end;) {
|
||||
auto range = *iter;
|
||||
|
||||
bool range_is_ruled_out = true;
|
||||
for (uint32_t c = range.min; c <= range.max; c++) {
|
||||
if (!ruled_out_characters.count(c)) {
|
||||
range_is_ruled_out = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (range_is_ruled_out) {
|
||||
++iter;
|
||||
continue;
|
||||
}
|
||||
|
||||
auto next_iter = iter + 1;
|
||||
while (next_iter != end) {
|
||||
bool can_join_ranges = true;
|
||||
for (uint32_t character = range.max + 1; character < next_iter->min; character++) {
|
||||
if (!ruled_out_characters.count(character)) {
|
||||
can_join_ranges = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
add("(");
|
||||
add_character_range_condition(range);
|
||||
add(")");
|
||||
|
||||
first = false;
|
||||
if (can_join_ranges) {
|
||||
range.max = next_iter->max;
|
||||
++next_iter;
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!first) {
|
||||
add(is_negated ? " &&" : " ||");
|
||||
line(" ");
|
||||
}
|
||||
|
||||
add_character_range_condition(range, is_negated);
|
||||
first = false;
|
||||
iter = next_iter;
|
||||
}
|
||||
|
||||
return !first;
|
||||
}
|
||||
|
||||
void add_character_range_condition(const rules::CharacterRange &range) {
|
||||
if (range.min == range.max) {
|
||||
add("lookahead == " + escape_char(range.min));
|
||||
void add_character_range_condition(const rules::CharacterRange &range, bool is_negated) {
|
||||
auto min = escape_char(range.min);
|
||||
auto max = escape_char(range.max);
|
||||
if (is_negated) {
|
||||
if (range.max == range.min) {
|
||||
add("lookahead != " + min);
|
||||
} else if (range.max == range.min + 1) {
|
||||
add("lookahead != " + min + " &&");
|
||||
line(" lookahead != " + max);
|
||||
} else {
|
||||
add("(lookahead < " + min + " || lookahead > " + max + ")");
|
||||
}
|
||||
} else {
|
||||
add(escape_char(range.min) + string(" <= lookahead && lookahead <= ") +
|
||||
escape_char(range.max));
|
||||
if (range.max == range.min) {
|
||||
add("lookahead == " + min);
|
||||
} else if (range.max == range.min + 1) {
|
||||
add("lookahead == " + min + " ||");
|
||||
line(" lookahead == " + max);
|
||||
} else {
|
||||
add("(" + min + " <= lookahead && lookahead <= " + max + ")");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -490,12 +544,17 @@ class CCodeGenerator {
|
|||
break;
|
||||
case ParseActionTypeReduce:
|
||||
if (action.fragile) {
|
||||
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
add("REDUCE_FRAGILE");
|
||||
} else {
|
||||
add("REDUCE(" + symbol_id(action.symbol) + ", " +
|
||||
to_string(action.consumed_symbol_count) + ")");
|
||||
add("REDUCE");
|
||||
}
|
||||
|
||||
add("(");
|
||||
add(symbol_id(action.symbol));
|
||||
add(", ");
|
||||
add(to_string(action.consumed_symbol_count));
|
||||
add(", " + to_string(action.dynamic_precedence));
|
||||
add(")");
|
||||
break;
|
||||
case ParseActionTypeRecover:
|
||||
add("RECOVER(" + to_string(action.state_index) + ")");
|
||||
|
|
@ -594,13 +653,6 @@ class CCodeGenerator {
|
|||
indent(body);
|
||||
}
|
||||
|
||||
void _if(function<void()> condition, function<void()> body) {
|
||||
line("if (");
|
||||
indent(condition);
|
||||
add(")");
|
||||
indent(body);
|
||||
}
|
||||
|
||||
string sanitize_name_for_string(string name) {
|
||||
util::str_replace(&name, "\\", "\\\\");
|
||||
util::str_replace(&name, "\n", "\\n");
|
||||
|
|
|
|||
|
|
@ -184,6 +184,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
if (type == "PREC_DYNAMIC") {
|
||||
json_value precedence_json = rule_json->operator[]("value");
|
||||
if (precedence_json.type != json_integer) {
|
||||
return "Precedence value must be an integer";
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid precedence content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
return "Unknown rule type: " + type;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,25 +13,14 @@ using std::vector;
|
|||
using std::function;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
Symbol symbol, size_t consumed_symbol_count,
|
||||
const Production *production)
|
||||
: type(type),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
state_index(state_index),
|
||||
symbol(symbol),
|
||||
consumed_symbol_count(consumed_symbol_count),
|
||||
production(production) {}
|
||||
|
||||
ParseAction::ParseAction()
|
||||
: type(ParseActionTypeError),
|
||||
: production(nullptr),
|
||||
consumed_symbol_count(0),
|
||||
symbol(rules::NONE()),
|
||||
type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
state_index(-1),
|
||||
symbol(rules::NONE()),
|
||||
consumed_symbol_count(0),
|
||||
production(nullptr) {}
|
||||
state_index(-1) {}
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction();
|
||||
|
|
@ -44,12 +33,17 @@ ParseAction ParseAction::Accept() {
|
|||
}
|
||||
|
||||
ParseAction ParseAction::Shift(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
|
||||
ParseAction result;
|
||||
result.type = ParseActionTypeShift;
|
||||
result.state_index = state_index;
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseAction ParseAction::Recover(ParseStateId state_index) {
|
||||
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
|
||||
nullptr);
|
||||
ParseAction result;
|
||||
result.type = ParseActionTypeRecover;
|
||||
result.state_index = state_index;
|
||||
return result;
|
||||
}
|
||||
|
||||
ParseAction ParseAction::ShiftExtra() {
|
||||
|
|
@ -61,8 +55,13 @@ ParseAction ParseAction::ShiftExtra() {
|
|||
|
||||
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
||||
const Production &production) {
|
||||
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
|
||||
&production);
|
||||
ParseAction result;
|
||||
result.type = ParseActionTypeReduce;
|
||||
result.symbol = symbol;
|
||||
result.consumed_symbol_count = consumed_symbol_count;
|
||||
result.production = &production;
|
||||
result.dynamic_precedence = production.dynamic_precedence;
|
||||
return result;
|
||||
}
|
||||
|
||||
int ParseAction::precedence() const {
|
||||
|
|
|
|||
|
|
@ -24,9 +24,6 @@ enum ParseActionType {
|
|||
|
||||
struct ParseAction {
|
||||
ParseAction();
|
||||
ParseAction(ParseActionType type, ParseStateId state_index,
|
||||
rules::Symbol symbol, size_t consumed_symbol_count,
|
||||
const Production *);
|
||||
static ParseAction Accept();
|
||||
static ParseAction Error();
|
||||
static ParseAction Shift(ParseStateId state_index);
|
||||
|
|
@ -39,13 +36,14 @@ struct ParseAction {
|
|||
rules::Associativity associativity() const;
|
||||
int precedence() const;
|
||||
|
||||
const Production *production;
|
||||
size_t consumed_symbol_count;
|
||||
rules::Symbol symbol;
|
||||
int dynamic_precedence;
|
||||
ParseActionType type;
|
||||
bool extra;
|
||||
bool fragile;
|
||||
ParseStateId state_index;
|
||||
rules::Symbol symbol;
|
||||
size_t consumed_symbol_count;
|
||||
const Production *production;
|
||||
};
|
||||
|
||||
struct ParseTableEntry {
|
||||
|
|
@ -71,7 +69,6 @@ struct ParseState {
|
|||
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
|
||||
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
|
||||
LexStateId lex_state_id;
|
||||
size_t shift_actions_signature;
|
||||
};
|
||||
|
||||
struct ParseTableSymbolMetadata {
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include <vector>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
|
|
@ -26,7 +27,7 @@ class FlattenRule {
|
|||
void apply(const Rule &rule) {
|
||||
rule.match(
|
||||
[&](const rules::Symbol &symbol) {
|
||||
production.push_back(ProductionStep{
|
||||
production.steps.push_back(ProductionStep{
|
||||
symbol,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
|
|
@ -42,6 +43,10 @@ class FlattenRule {
|
|||
associativity_stack.push_back(metadata.params.associativity);
|
||||
}
|
||||
|
||||
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
|
||||
production.dynamic_precedence = metadata.params.dynamic_precedence;
|
||||
}
|
||||
|
||||
apply(*metadata.rule);
|
||||
|
||||
if (metadata.params.has_precedence) {
|
||||
|
|
|
|||
|
|
@ -38,20 +38,11 @@ static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right)
|
|||
return result;
|
||||
}
|
||||
|
||||
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
|
||||
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &characters) {
|
||||
vector<CharacterRange> result;
|
||||
for (uint32_t c : chars) {
|
||||
auto size = result.size();
|
||||
if (size >= 2 && result[size - 2].max == (c - 2)) {
|
||||
result.pop_back();
|
||||
for (uint32_t c : characters) {
|
||||
if (!result.empty() && result.back().max == c - 1) {
|
||||
result.back().max = c;
|
||||
} else if (size >= 1) {
|
||||
CharacterRange &last = result.back();
|
||||
if (last.min < last.max && last.max == (c - 1)) {
|
||||
last.max = c;
|
||||
} else {
|
||||
result.push_back(CharacterRange(c));
|
||||
}
|
||||
} else {
|
||||
result.push_back(CharacterRange(c));
|
||||
}
|
||||
|
|
@ -70,15 +61,17 @@ bool CharacterSet::operator==(const CharacterSet &other) const {
|
|||
}
|
||||
|
||||
bool CharacterSet::operator<(const CharacterSet &other) const {
|
||||
if (!includes_all && other.includes_all)
|
||||
return true;
|
||||
if (includes_all && !other.includes_all)
|
||||
return false;
|
||||
if (included_chars < other.included_chars)
|
||||
return true;
|
||||
if (other.included_chars < included_chars)
|
||||
return false;
|
||||
return excluded_chars < other.excluded_chars;
|
||||
if (!includes_all && other.includes_all) return true;
|
||||
if (includes_all && !other.includes_all) return false;
|
||||
if (includes_all) {
|
||||
if (excluded_chars.size() > other.excluded_chars.size()) return true;
|
||||
if (excluded_chars.size() < other.excluded_chars.size()) return false;
|
||||
return excluded_chars < other.excluded_chars;
|
||||
} else {
|
||||
if (included_chars.size() < other.included_chars.size()) return true;
|
||||
if (included_chars.size() > other.included_chars.size()) return false;
|
||||
return included_chars < other.included_chars;
|
||||
}
|
||||
}
|
||||
|
||||
CharacterSet &CharacterSet::include_all() {
|
||||
|
|
@ -131,8 +124,7 @@ void CharacterSet::add_set(const CharacterSet &other) {
|
|||
excluded_chars.insert(c);
|
||||
included_chars.clear();
|
||||
} else {
|
||||
for (uint32_t c : other.included_chars)
|
||||
included_chars.insert(c);
|
||||
included_chars.insert(other.included_chars.begin(), other.included_chars.end());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -51,6 +51,12 @@ Metadata Metadata::prec_right(int precedence, const Rule &rule) {
|
|||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::prec_dynamic(int dynamic_precedence, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.dynamic_precedence = dynamic_precedence;
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::separator(const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.has_precedence = true;
|
||||
|
|
|
|||
|
|
@ -14,6 +14,7 @@ enum Associativity {
|
|||
|
||||
struct MetadataParams {
|
||||
int precedence;
|
||||
int dynamic_precedence;
|
||||
Associativity associativity;
|
||||
bool has_precedence;
|
||||
bool has_associativity;
|
||||
|
|
@ -23,8 +24,8 @@ struct MetadataParams {
|
|||
bool is_main_token;
|
||||
|
||||
inline MetadataParams() :
|
||||
precedence{0}, associativity{AssociativityNone}, has_precedence{false},
|
||||
has_associativity{false}, is_token{false}, is_string{false},
|
||||
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
|
||||
has_precedence{false}, has_associativity{false}, is_token{false}, is_string{false},
|
||||
is_active{false}, is_main_token{false} {}
|
||||
|
||||
inline bool operator==(const MetadataParams &other) const {
|
||||
|
|
@ -33,6 +34,7 @@ struct MetadataParams {
|
|||
associativity == other.associativity &&
|
||||
has_precedence == other.has_precedence &&
|
||||
has_associativity == other.has_associativity &&
|
||||
dynamic_precedence == other.dynamic_precedence &&
|
||||
is_token == other.is_token &&
|
||||
is_string == other.is_string &&
|
||||
is_active == other.is_active &&
|
||||
|
|
@ -54,6 +56,7 @@ struct Metadata {
|
|||
static Metadata prec(int precedence, const Rule &rule);
|
||||
static Metadata prec_left(int precedence, const Rule &rule);
|
||||
static Metadata prec_right(int precedence, const Rule &rule);
|
||||
static Metadata prec_dynamic(int precedence, const Rule &rule);
|
||||
static Metadata separator(const Rule &rule);
|
||||
static Metadata main_token(const Rule &rule);
|
||||
|
||||
|
|
@ -63,4 +66,4 @@ struct Metadata {
|
|||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
#endif // COMPILER_RULES_METADATA_H_
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@ namespace tree_sitter {
|
|||
|
||||
struct ProductionStep {
|
||||
inline bool operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol && precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
return symbol == other.symbol &&
|
||||
precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
rules::Symbol symbol;
|
||||
|
|
@ -20,7 +21,21 @@ struct ProductionStep {
|
|||
rules::Associativity associativity;
|
||||
};
|
||||
|
||||
typedef std::vector<ProductionStep> Production;
|
||||
struct Production {
|
||||
std::vector<ProductionStep> steps;
|
||||
int dynamic_precedence = 0;
|
||||
|
||||
inline bool operator==(const Production &other) const {
|
||||
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
|
||||
}
|
||||
|
||||
inline ProductionStep &back() { return steps.back(); }
|
||||
inline const ProductionStep &back() const { return steps.back(); }
|
||||
inline bool empty() const { return steps.empty(); }
|
||||
inline size_t size() const { return steps.size(); }
|
||||
inline const ProductionStep &operator[](int i) const { return steps[i]; }
|
||||
inline const ProductionStep &at(int i) const { return steps[i]; }
|
||||
};
|
||||
|
||||
struct SyntaxVariable {
|
||||
std::string name;
|
||||
|
|
|
|||
|
|
@ -437,22 +437,36 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
|
||||
if (!left)
|
||||
return true;
|
||||
if (!right)
|
||||
return false;
|
||||
if (!left) return true;
|
||||
if (!right) return false;
|
||||
|
||||
if (right->error_cost < left->error_cost) {
|
||||
LOG("select_smaller_error symbol:%s, over_symbol:%s",
|
||||
SYM_NAME(right->symbol), SYM_NAME(left->symbol));
|
||||
return true;
|
||||
}
|
||||
|
||||
if (left->error_cost < right->error_cost) {
|
||||
LOG("select_smaller_error symbol:%s, over_symbol:%s",
|
||||
SYM_NAME(left->symbol), SYM_NAME(right->symbol));
|
||||
return false;
|
||||
}
|
||||
|
||||
if (left->error_cost > 0) return -1;
|
||||
if (right->dynamic_precedence > left->dynamic_precedence) {
|
||||
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
|
||||
SYM_NAME(right->symbol), right->dynamic_precedence, SYM_NAME(left->symbol),
|
||||
left->dynamic_precedence);
|
||||
return true;
|
||||
}
|
||||
|
||||
if (left->dynamic_precedence > right->dynamic_precedence) {
|
||||
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
|
||||
SYM_NAME(left->symbol), left->dynamic_precedence, SYM_NAME(right->symbol),
|
||||
right->dynamic_precedence);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (left->error_cost > 0) return true;
|
||||
|
||||
int comparison = ts_tree_compare(left, right);
|
||||
switch (comparison) {
|
||||
|
|
@ -544,7 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree,
|
|||
|
||||
static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
||||
TSSymbol symbol, unsigned count,
|
||||
bool fragile, bool allow_skipping) {
|
||||
bool fragile, int dynamic_precedence,
|
||||
bool allow_skipping) {
|
||||
uint32_t initial_version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
|
||||
|
|
@ -587,6 +602,8 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
|||
}
|
||||
}
|
||||
|
||||
parent->dynamic_precedence += dynamic_precedence;
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, slice.version);
|
||||
TSStateId next_state = ts_language_next_state(language, state, symbol);
|
||||
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
|
||||
|
|
@ -929,6 +946,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
|
|||
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
|
||||
.symbol = action.params.symbol,
|
||||
.count = action.params.child_count,
|
||||
.dynamic_precedence = action.params.dynamic_precedence
|
||||
});
|
||||
default:
|
||||
break;
|
||||
|
|
@ -939,8 +957,10 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
|
|||
bool did_reduce = false;
|
||||
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
|
||||
ReduceAction action = self->reduce_actions.contents[i];
|
||||
StackPopResult reduction =
|
||||
parser__reduce(self, version, action.symbol, action.count, true, false);
|
||||
StackPopResult reduction = parser__reduce(
|
||||
self, version, action.symbol, action.count, true,
|
||||
action.dynamic_precedence, false
|
||||
);
|
||||
if (reduction.stopped_at_error) {
|
||||
ts_tree_array_delete(&reduction.slices.contents[0].trees);
|
||||
ts_stack_remove_version(self->stack, reduction.slices.contents[0].version);
|
||||
|
|
@ -1180,12 +1200,13 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
|
||||
unsigned child_count = action.params.child_count;
|
||||
TSSymbol symbol = action.params.symbol;
|
||||
unsigned dynamic_precedence = action.params.dynamic_precedence;
|
||||
bool fragile = action.fragile;
|
||||
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
|
||||
|
||||
StackPopResult reduction =
|
||||
parser__reduce(self, version, symbol, child_count, fragile, true);
|
||||
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
|
||||
StackSlice slice = *array_front(&reduction.slices);
|
||||
if (reduction.stopped_at_error) {
|
||||
reduction_stopped_at_error = true;
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ extern "C" {
|
|||
typedef struct {
|
||||
uint32_t count;
|
||||
TSSymbol symbol;
|
||||
int dynamic_precedence;
|
||||
} ReduceAction;
|
||||
|
||||
typedef Array(ReduceAction) ReduceActionSet;
|
||||
|
|
|
|||
|
|
@ -150,6 +150,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
self->visible_child_count = 0;
|
||||
self->error_cost = 0;
|
||||
self->has_external_tokens = false;
|
||||
self->dynamic_precedence = 0;
|
||||
|
||||
for (uint32_t i = 0; i < child_count; i++) {
|
||||
Tree *child = children[i];
|
||||
|
|
@ -165,6 +166,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
|
|||
}
|
||||
|
||||
self->error_cost += child->error_cost;
|
||||
self->dynamic_precedence += child->dynamic_precedence;
|
||||
|
||||
if (child->visible) {
|
||||
self->visible_child_count++;
|
||||
|
|
|
|||
|
|
@ -46,6 +46,7 @@ typedef struct Tree {
|
|||
} first_leaf;
|
||||
|
||||
uint32_t ref_count;
|
||||
int dynamic_precedence;
|
||||
bool visible : 1;
|
||||
bool named : 1;
|
||||
bool extra : 1;
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
auto builder = LexTableBuilder::create(LexicalGrammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
"token_0",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ 'a' }),
|
||||
|
|
@ -26,7 +26,7 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
false
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_2",
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ 'b' }),
|
||||
|
|
@ -39,22 +39,22 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
separators
|
||||
});
|
||||
|
||||
AssertThat(builder->detect_conflict(0, 1), IsFalse());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
|
||||
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true when one token matches a string that the other matches, "
|
||||
"plus some addition content that begins with a separator character", [&]() {
|
||||
it("returns true when the left token can match a string that the right token matches, "
|
||||
"plus a separator character", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
"token_0",
|
||||
VariableTypeNamed,
|
||||
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
|
||||
false
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_2",
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
|
||||
true
|
||||
|
|
@ -64,24 +64,32 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
};
|
||||
|
||||
auto builder = LexTableBuilder::create(grammar);
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
|
||||
|
||||
grammar.variables[1].is_string = false;
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
|
||||
});
|
||||
|
||||
it("returns true when one token matches a string that the other matches, "
|
||||
"plus some addition content that matches another one-character token", [&]() {
|
||||
it("returns true when the left token matches a string that the right token matches, "
|
||||
"plus the first character of some token that can follow the right token", [&]() {
|
||||
LexicalGrammar grammar{
|
||||
{
|
||||
LexicalVariable{
|
||||
"token_0",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ '>' }),
|
||||
CharacterSet({ '=' }),
|
||||
}),
|
||||
true
|
||||
},
|
||||
LexicalVariable{
|
||||
"token_1",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ '>' }),
|
||||
CharacterSet({ '>' }),
|
||||
}),
|
||||
true
|
||||
},
|
||||
|
|
@ -89,7 +97,7 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
"token_2",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
CharacterSet({ '>' }),
|
||||
CharacterSet({ '=' }),
|
||||
}),
|
||||
true
|
||||
},
|
||||
|
|
@ -97,9 +105,17 @@ describe("LexTableBuilder::detect_conflict", []() {
|
|||
separators
|
||||
};
|
||||
|
||||
// If no tokens can follow token_1, then there's no conflict
|
||||
auto builder = LexTableBuilder::create(grammar);
|
||||
AssertThat(builder->detect_conflict(0, 1), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0), IsFalse());
|
||||
vector<set<Symbol::Index>> following_tokens_by_token_index(3);
|
||||
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
|
||||
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
|
||||
|
||||
// If token_2 can follow token_1, then token_0 conflicts with token_1
|
||||
builder = LexTableBuilder::create(grammar);
|
||||
following_tokens_by_token_index[1].insert(2);
|
||||
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
|
||||
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -25,25 +25,25 @@ describe("ParseItemSetBuilder", []() {
|
|||
it("adds items at the beginnings of referenced rules", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
}),
|
||||
}, 0},
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
}, 0},
|
||||
Production{{
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone},
|
||||
})
|
||||
}, 0}
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::terminal(14), 0, AssociativityNone},
|
||||
{Symbol::terminal(15), 0, AssociativityNone},
|
||||
})
|
||||
}, 0}
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
|
|
@ -84,17 +84,17 @@ describe("ParseItemSetBuilder", []() {
|
|||
it("handles rules with empty productions", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
}),
|
||||
}, 0},
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
}),
|
||||
Production({})
|
||||
}, 0},
|
||||
Production{{}, 0}
|
||||
}},
|
||||
}, {}, {}, {}};
|
||||
|
||||
|
|
|
|||
|
|
@ -34,22 +34,64 @@ describe("flatten_grammar", []() {
|
|||
AssertThat(result.name, Equals("test"));
|
||||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(3), 102, AssociativityRight},
|
||||
{Symbol::non_terminal(4), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
}),
|
||||
Production({
|
||||
}, 0},
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(5), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
}, 0}
|
||||
})));
|
||||
});
|
||||
|
||||
it("stores the maximum dynamic precedence specified in each production", [&]() {
|
||||
SyntaxVariable result = flatten_rule({
|
||||
"test",
|
||||
VariableTypeNamed,
|
||||
Rule::seq({
|
||||
Symbol::non_terminal(1),
|
||||
Metadata::prec_dynamic(101, Rule::seq({
|
||||
Symbol::non_terminal(2),
|
||||
Rule::choice({
|
||||
Metadata::prec_dynamic(102, Rule::seq({
|
||||
Symbol::non_terminal(3),
|
||||
Symbol::non_terminal(4)
|
||||
})),
|
||||
Symbol::non_terminal(5),
|
||||
}),
|
||||
Symbol::non_terminal(6),
|
||||
})),
|
||||
Symbol::non_terminal(7),
|
||||
})
|
||||
})))
|
||||
});
|
||||
|
||||
AssertThat(result.name, Equals("test"));
|
||||
AssertThat(result.type, Equals(VariableTypeNamed));
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(3), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(4), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
}, 102},
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(5), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(6), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(7), 0, AssociativityNone},
|
||||
}, 101}
|
||||
})));
|
||||
});
|
||||
|
||||
it("uses the last assigned precedence", [&]() {
|
||||
|
|
@ -63,11 +105,11 @@ describe("flatten_grammar", []() {
|
|||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 101, AssociativityLeft},
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
{Symbol::non_terminal(2), 101, AssociativityLeft},
|
||||
}, 0}
|
||||
})));
|
||||
|
||||
result = flatten_rule({
|
||||
"test2",
|
||||
|
|
@ -78,10 +120,10 @@ describe("flatten_grammar", []() {
|
|||
});
|
||||
|
||||
AssertThat(result.productions, Equals(vector<Production>({
|
||||
Production({
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 101, AssociativityLeft},
|
||||
})
|
||||
})))
|
||||
}, 0}
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
|
|
@ -305,29 +305,17 @@ describe("CharacterSet", []() {
|
|||
});
|
||||
|
||||
describe("::included_ranges", [&]() {
|
||||
it("consolidates sequences of 3 or more consecutive characters into ranges", [&]() {
|
||||
it("consolidates consecutive sequences of characters into ranges", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'c')
|
||||
.include('g')
|
||||
.include('e', 'j')
|
||||
.include('m')
|
||||
.include('z');
|
||||
|
||||
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
|
||||
CharacterRange{'a', 'c'},
|
||||
CharacterRange('g'),
|
||||
CharacterRange('z'),
|
||||
})));
|
||||
});
|
||||
|
||||
it("doesn't consolidate sequences of 2 consecutive characters", [&]() {
|
||||
CharacterSet set1 = CharacterSet()
|
||||
.include('a', 'b')
|
||||
.include('g')
|
||||
.include('z');
|
||||
|
||||
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
|
||||
CharacterRange('a'),
|
||||
CharacterRange('b'),
|
||||
CharacterRange('g'),
|
||||
CharacterRange{'e', 'j'},
|
||||
CharacterRange('m'),
|
||||
CharacterRange('z'),
|
||||
})));
|
||||
});
|
||||
|
|
|
|||
25
test/fixtures/test_grammars/dynamic_precedence/corpus.txt
vendored
Normal file
25
test/fixtures/test_grammars/dynamic_precedence/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
===============================
|
||||
Declarations
|
||||
===============================
|
||||
|
||||
int * x
|
||||
|
||||
---
|
||||
|
||||
(program (declaration
|
||||
(type (identifier))
|
||||
(declarator (identifier))))
|
||||
|
||||
===============================
|
||||
Expressions
|
||||
===============================
|
||||
|
||||
int * x * y
|
||||
|
||||
---
|
||||
|
||||
(program (expression
|
||||
(expression
|
||||
(expression (identifier))
|
||||
(expression (identifier)))
|
||||
(expression (identifier))))
|
||||
73
test/fixtures/test_grammars/dynamic_precedence/grammar.json
vendored
Normal file
73
test/fixtures/test_grammars/dynamic_precedence/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,73 @@
|
|||
{
|
||||
"name": "dynamic_precedence",
|
||||
|
||||
"conflicts": [
|
||||
["expression", "type"]
|
||||
],
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"program": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "declaration"},
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
]
|
||||
},
|
||||
|
||||
"expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "expression"},
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "expression"}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"declaration": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "type"},
|
||||
{"type": "SYMBOL", "name": "declarator"}
|
||||
]
|
||||
},
|
||||
|
||||
"declarator": {
|
||||
"type": "PREC_DYNAMIC",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "*"},
|
||||
{"type": "SYMBOL", "name": "identifier"}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"type": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
},
|
||||
|
||||
"identifier": {
|
||||
"type": "PATTERN",
|
||||
"value": "[a-zA-Z]+"
|
||||
}
|
||||
}
|
||||
}
|
||||
1
test/fixtures/test_grammars/dynamic_precedence/readme.md
vendored
Normal file
1
test/fixtures/test_grammars/dynamic_precedence/readme.md
vendored
Normal file
|
|
@ -0,0 +1 @@
|
|||
This grammar contains a conflict that is resolved at runtime. The PREC_DYNAMIC rule is used to indicate that the `declarator` rule should be preferred to the `expression` rule at runtime.
|
||||
|
|
@ -1,11 +1,11 @@
|
|||
Unresolved conflict for symbol sequence:
|
||||
|
||||
identifier • '{' …
|
||||
identifier identifier • '{' …
|
||||
|
||||
Possible interpretations:
|
||||
|
||||
1: (expression identifier) • '{' …
|
||||
2: (function_call identifier • block)
|
||||
1: identifier (expression identifier) • '{' …
|
||||
2: identifier (function_call identifier • block)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -136,9 +136,14 @@ ostream &operator<<(ostream &stream, const Variable &variable) {
|
|||
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const Production &production) {
|
||||
return stream << "(Production " << production.steps << " " <<
|
||||
to_string(production.dynamic_precedence) << ")";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
|
||||
return stream << "(Variable " << variable.name << " " << variable.productions <<
|
||||
" " << to_string(variable.type) << "}";
|
||||
" " << to_string(variable.type) << ")";
|
||||
}
|
||||
|
||||
ostream &operator<<(ostream &stream, const LexicalVariable &variable) {
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ ostream &operator<<(ostream &, const InputGrammar &);
|
|||
ostream &operator<<(ostream &, const CompileError &);
|
||||
ostream &operator<<(ostream &, const ExternalToken &);
|
||||
ostream &operator<<(ostream &, const ProductionStep &);
|
||||
ostream &operator<<(ostream &, const Production &);
|
||||
ostream &operator<<(ostream &, const PrecedenceRange &);
|
||||
ostream &operator<<(ostream &, const Variable &);
|
||||
ostream &operator<<(ostream &, const LexicalVariable &);
|
||||
|
|
|
|||
|
|
@ -13,7 +13,7 @@ vector<string> test_languages = list_directory(grammars_dir_path);
|
|||
for (auto &language_name : test_languages) {
|
||||
if (language_name == "readme.md") continue;
|
||||
|
||||
describe(("test language: " + language_name).c_str(), [&]() {
|
||||
describe(("test grammar: " + language_name).c_str(), [&]() {
|
||||
string directory_path = grammars_dir_path + "/" + language_name;
|
||||
string grammar_path = directory_path + "/grammar.json";
|
||||
string grammar_json = read_file(grammar_path);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue