Merge pull request #87 from tree-sitter/dynamic-precedence

Introduce rule for resolving runtime ambiguities
This commit is contained in:
Max Brunsfeld 2017-07-10 16:43:58 -07:00 committed by GitHub
commit 43d347c225
36 changed files with 689 additions and 364 deletions

2
.gitmodules vendored
View file

@ -12,4 +12,4 @@
url = https://github.com/udp/json-parser.git
[submodule "externals/crypto-algorithms"]
path = externals/crypto-algorithms
url = https://github.com/B-Con/crypto-algorithms.git
url = https://github.com/maxbrunsfeld/crypto-algorithms.git

View file

@ -182,41 +182,7 @@
"properties": {
"type": {
"type": "string",
"pattern": "^PREC$"
},
"value": {
"type": "integer"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content", "value"]
},
"prec-left-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"pattern": "^PREC_LEFT$"
},
"value": {
"type": "integer"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content", "value"]
},
"prec-right-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"pattern": "^PREC_RIGHT$"
"pattern": "^(PREC|PREC_LEFT|PREC_RIGHT|PREC_DYNAMIC)$"
},
"value": {
"type": "integer"
@ -239,9 +205,7 @@
{ "$ref": "#/definitions/repeat1-rule" },
{ "$ref": "#/definitions/repeat-rule" },
{ "$ref": "#/definitions/token-rule" },
{ "$ref": "#/definitions/prec-rule" },
{ "$ref": "#/definitions/prec-left-rule" },
{ "$ref": "#/definitions/prec-right-rule" }
{ "$ref": "#/definitions/prec-rule" }
]
}
}

@ -1 +1 @@
Subproject commit cfbde48414baacf51fc7c74f275190881f037d32
Subproject commit c7e5c23ab04ecfb5465cbefbe17ba23d4cb3bc9d

View file

@ -42,6 +42,7 @@ typedef struct {
union {
TSStateId to_state;
struct {
short dynamic_precedence;
TSSymbol symbol;
unsigned short child_count;
};
@ -145,21 +146,30 @@ typedef struct TSLanguage {
{ .type = TSParseActionTypeShift, .extra = true } \
}
#define REDUCE(symbol_val, child_count_val) \
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = {.symbol = symbol_val, .child_count = child_count_val } \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val) \
{ \
{ \
.type = TSParseActionTypeReduce, .fragile = true, \
.params = {.symbol = symbol_val, .child_count = child_count_val } \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.fragile = true, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define ACCEPT_INPUT() \
{ \

View file

@ -9,7 +9,7 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>
#define TREE_SITTER_LANGUAGE_VERSION 2
#define TREE_SITTER_LANGUAGE_VERSION 3
typedef unsigned short TSSymbol;
typedef struct TSLanguage TSLanguage;

View file

@ -2,6 +2,7 @@
#include <algorithm>
#include <map>
#include <set>
#include <deque>
#include <string>
#include <unordered_map>
#include <utility>
@ -16,11 +17,13 @@
namespace tree_sitter {
namespace build_tables {
using std::deque;
using std::find;
using std::pair;
using std::vector;
using std::set;
using std::map;
using std::move;
using std::string;
using std::to_string;
using std::unordered_map;
@ -28,75 +31,77 @@ using rules::Associativity;
using rules::Symbol;
using rules::END_OF_INPUT;
using SymbolSequence = vector<Symbol>;
struct ParseStateQueueEntry {
SymbolSequence preceding_symbols;
ParseItemSet item_set;
ParseStateId state_id;
};
class ParseTableBuilder {
const SyntaxGrammar grammar;
const LexicalGrammar lexical_grammar;
unordered_map<Symbol, ParseItemSet> recovery_states;
unordered_map<ParseItemSet, ParseStateId> parse_state_ids;
vector<pair<ParseItemSet, ParseStateId>> item_sets_to_process;
deque<ParseStateQueueEntry> parse_state_queue;
ParseTable parse_table;
set<string> conflicts;
ParseItemSetBuilder item_set_builder;
set<const Production *> fragile_productions;
vector<set<Symbol>> incompatible_tokens_by_index;
bool allow_any_conflict;
vector<set<Symbol::Index>> following_terminals_by_terminal_index;
bool processing_recovery_states;
public:
ParseTableBuilder(const SyntaxGrammar &grammar,
const LexicalGrammar &lex_grammar)
: grammar(grammar),
lexical_grammar(lex_grammar),
item_set_builder(grammar, lex_grammar),
allow_any_conflict(false) {}
ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar)
: grammar(grammar),
lexical_grammar(lex_grammar),
item_set_builder(grammar, lex_grammar),
incompatible_tokens_by_index(lexical_grammar.variables.size()),
following_terminals_by_terminal_index(lexical_grammar.variables.size()),
processing_recovery_states(false) {}
pair<ParseTable, CompileError> build() {
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0};
Production start_production{
ProductionStep{start_symbol, 0, rules::AssociativityNone},
};
// Placeholder for error state
add_parse_state(ParseItemSet());
add_parse_state(ParseItemSet({
ParseStateId error_state_id = add_parse_state({}, ParseItemSet());
add_parse_state({}, ParseItemSet({
{
ParseItem(rules::START(), start_production, 0),
LookaheadSet({ END_OF_INPUT() }),
LookaheadSet({END_OF_INPUT()}),
},
}));
CompileError error = process_part_state_queue();
if (error.type != TSCompileErrorTypeNone) {
return { parse_table, error };
}
if (error.type != TSCompileErrorTypeNone) return {parse_table, error};
compute_unmergable_token_pairs();
build_error_parse_state();
allow_any_conflict = true;
processing_recovery_states = true;
build_error_parse_state(error_state_id);
process_part_state_queue();
allow_any_conflict = false;
mark_fragile_actions();
remove_duplicate_parse_states();
return { parse_table, CompileError::none() };
return {parse_table, CompileError::none()};
}
private:
CompileError process_part_state_queue() {
while (!item_sets_to_process.empty()) {
auto pair = item_sets_to_process.back();
ParseItemSet &item_set = pair.first;
ParseStateId state_id = pair.second;
item_sets_to_process.pop_back();
while (!parse_state_queue.empty()) {
auto entry = parse_state_queue.front();
parse_state_queue.pop_front();
item_set_builder.apply_transitive_closure(&item_set);
string conflict = add_actions(item_set, state_id);
item_set_builder.apply_transitive_closure(&entry.item_set);
string conflict = add_actions(
move(entry.preceding_symbols),
move(entry.item_set),
entry.state_id
);
if (!conflict.empty()) {
return CompileError(TSCompileErrorTypeParseConflict, conflict);
@ -106,7 +111,7 @@ class ParseTableBuilder {
return CompileError::none();
}
void build_error_parse_state() {
void build_error_parse_state(ParseStateId state_id) {
ParseState error_state;
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
@ -141,14 +146,14 @@ class ParseTableBuilder {
}
error_state.terminal_entries[END_OF_INPUT()].actions.push_back(ParseAction::Recover(0));
parse_table.states[0] = error_state;
parse_table.states[state_id] = error_state;
}
void add_out_of_context_parse_state(ParseState *error_state,
const rules::Symbol &symbol) {
const ParseItemSet &item_set = recovery_states[symbol];
if (!item_set.entries.empty()) {
ParseStateId state = add_parse_state(item_set);
ParseStateId state = add_parse_state({}, item_set);
if (symbol.is_non_terminal()) {
error_state->nonterminal_entries[symbol.index] = state;
} else {
@ -157,21 +162,24 @@ class ParseTableBuilder {
}
}
ParseStateId add_parse_state(const ParseItemSet &item_set) {
ParseStateId add_parse_state(SymbolSequence &&preceding_symbols, const ParseItemSet &item_set) {
auto pair = parse_state_ids.find(item_set);
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.states.size();
parse_table.states.push_back(ParseState());
parse_state_ids[item_set] = state_id;
parse_table.states[state_id].shift_actions_signature = item_set.unfinished_item_signature();
item_sets_to_process.push_back({ std::move(item_set), state_id });
parse_state_queue.push_back({
move(preceding_symbols),
move(item_set),
state_id
});
return state_id;
} else {
return pair->second;
}
}
string add_actions(const ParseItemSet &item_set, ParseStateId state_id) {
string add_actions(SymbolSequence &&sequence, ParseItemSet &&item_set, ParseStateId state_id) {
map<Symbol, ParseItemSet> terminal_successors;
map<Symbol::Index, ParseItemSet> nonterminal_successors;
set<Symbol> lookaheads_with_conflicts;
@ -197,7 +205,7 @@ class ParseTableBuilder {
parse_table.add_terminal_action(state_id, lookahead, action);
} else {
ParseAction &existing_action = entry.actions[0];
if (existing_action.type == ParseActionTypeAccept || allow_any_conflict) {
if (existing_action.type == ParseActionTypeAccept || processing_recovery_states) {
entry.actions.push_back(action);
} else {
int existing_precedence = existing_action.precedence();
@ -236,29 +244,31 @@ class ParseTableBuilder {
for (auto &pair : terminal_successors) {
Symbol lookahead = pair.first;
ParseItemSet &next_item_set = pair.second;
ParseStateId next_state_id = add_parse_state(next_item_set);
ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set);
ParseState &state = parse_table.states[state_id];
bool had_existing_action = !state.terminal_entries[lookahead].actions.empty();
parse_table.add_terminal_action(state_id, lookahead, ParseAction::Shift(next_state_id));
if (!allow_any_conflict) {
if (had_existing_action)
if (!processing_recovery_states) {
if (had_existing_action) {
lookaheads_with_conflicts.insert(lookahead);
}
recovery_states[lookahead].add(next_item_set);
}
}
// Add a Shift action for each non-terminal transition.
for (auto &pair : nonterminal_successors) {
Symbol::Index lookahead = pair.first;
Symbol lookahead = Symbol::non_terminal(pair.first);
ParseItemSet &next_item_set = pair.second;
ParseStateId next_state = add_parse_state(next_item_set);
parse_table.set_nonterminal_action(state_id, lookahead, next_state);
if (!allow_any_conflict)
recovery_states[Symbol::non_terminal(lookahead)].add(next_item_set);
ParseStateId next_state_id = add_parse_state(append_symbol(sequence, lookahead), next_item_set);
parse_table.set_nonterminal_action(state_id, lookahead.index, next_state_id);
if (!processing_recovery_states) {
recovery_states[lookahead].add(next_item_set);
}
}
for (Symbol lookahead : lookaheads_with_conflicts) {
string conflict = handle_conflict(item_set, state_id, lookahead);
string conflict = handle_conflict(item_set, sequence, state_id, lookahead);
if (!conflict.empty()) return conflict;
}
@ -266,7 +276,7 @@ class ParseTableBuilder {
ParseState &state = parse_table.states[state_id];
for (const Symbol &extra_symbol : grammar.extra_tokens) {
if (!state.terminal_entries.count(extra_symbol) ||
state.has_shift_action() || allow_any_conflict) {
state.has_shift_action() || processing_recovery_states) {
parse_table.add_terminal_action(state_id, extra_symbol, shift_extra);
}
}
@ -281,9 +291,10 @@ class ParseTableBuilder {
for (ParseAction &action : actions) {
if (action.type == ParseActionTypeReduce) {
if (has_fragile_production(action.production))
if (has_fragile_production(action.production)) {
action.fragile = true;
action.production = NULL;
}
action.production = nullptr;
}
}
@ -305,8 +316,6 @@ class ParseTableBuilder {
}
void compute_unmergable_token_pairs() {
incompatible_tokens_by_index.resize(lexical_grammar.variables.size());
auto lex_table_builder = LexTableBuilder::create(lexical_grammar);
for (unsigned i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol token = Symbol::terminal(i);
@ -314,7 +323,7 @@ class ParseTableBuilder {
for (unsigned j = 0; j < n; j++) {
if (i == j) continue;
if (lex_table_builder->detect_conflict(i, j)) {
if (lex_table_builder->detect_conflict(i, j, following_terminals_by_terminal_index)) {
incompatible_indices.insert(Symbol::terminal(j));
}
}
@ -330,11 +339,12 @@ class ParseTableBuilder {
}
void remove_duplicate_parse_states() {
map<size_t, set<ParseStateId>> state_indices_by_signature;
unordered_map<size_t, set<ParseStateId>> state_indices_by_signature;
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
ParseState &state = parse_table.states[i];
state_indices_by_signature[state.shift_actions_signature].insert(i);
for (auto &pair : parse_state_ids) {
const ParseItemSet &item_set = pair.first;
ParseStateId state_id = pair.second;
state_indices_by_signature[item_set.unfinished_item_signature()].insert(state_id);
}
set<ParseStateId> deleted_states;
@ -343,14 +353,18 @@ class ParseTableBuilder {
map<ParseStateId, ParseStateId> state_replacements;
for (auto &pair : state_indices_by_signature) {
auto &state_group = pair.second;
auto &state_indices = pair.second;
for (ParseStateId i : state_group) {
for (ParseStateId j : state_group) {
if (j == i) break;
if (!state_replacements.count(j) && merge_parse_state(j, i)) {
state_replacements.insert({ i, j });
deleted_states.insert(i);
for (auto i = state_indices.begin(), end = state_indices.end(); i != end;) {
for (ParseStateId j : state_indices) {
if (j == *i) {
++i;
break;
}
if (!state_replacements.count(j) && merge_parse_state(j, *i)) {
state_replacements.insert({*i, j});
deleted_states.insert(*i);
i = state_indices.erase(i);
break;
}
}
@ -360,11 +374,8 @@ class ParseTableBuilder {
if (state_replacements.empty()) break;
for (ParseStateId i = 0, n = parse_table.states.size(); i < n; i++) {
ParseState &state = parse_table.states[i];
if (state_replacements.count(i)) {
state_indices_by_signature[state.shift_actions_signature].erase(i);
} else {
if (!state_replacements.count(i)) {
ParseState &state = parse_table.states[i];
state.each_referenced_state([&state_replacements](ParseStateId *state_index) {
auto replacement = state_replacements.find(*state_index);
if (replacement != state_replacements.end()) {
@ -404,7 +415,7 @@ class ParseTableBuilder {
static bool has_entry(const ParseState &state, const ParseTableEntry &entry) {
for (const auto &pair : state.terminal_entries)
if (pair.second == entry)
if (pair.second.actions == entry.actions)
return true;
return false;
}
@ -417,13 +428,12 @@ class ParseTableBuilder {
for (auto &entry : state.terminal_entries) {
Symbol lookahead = entry.first;
const auto &other_entry = other.terminal_entries.find(lookahead);
const auto &other_entry = other.terminal_entries.find(lookahead);
if (other_entry == other.terminal_entries.end()) {
if (lookahead.is_external()) return false;
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_entry(other, entry.second)) return false;
if (lookahead.is_external()) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
if (other.terminal_entries.count(incompatible_token)) return false;
@ -440,10 +450,9 @@ class ParseTableBuilder {
Symbol lookahead = entry.first;
if (!state.terminal_entries.count(lookahead)) {
if (lookahead.is_external()) return false;
if (entry.second.actions.back().type != ParseActionTypeReduce) return false;
if (!has_entry(state, entry.second)) return false;
if (lookahead.is_external()) return false;
if (!lookahead.is_built_in()) {
for (const Symbol &incompatible_token : incompatible_tokens_by_index[lookahead.index]) {
if (state.terminal_entries.count(incompatible_token)) return false;
@ -461,8 +470,8 @@ class ParseTableBuilder {
return true;
}
string handle_conflict(const ParseItemSet &item_set, ParseStateId state_id,
Symbol lookahead) {
string handle_conflict(const ParseItemSet &item_set, const SymbolSequence &preceding_symbols,
ParseStateId state_id, Symbol lookahead) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
int reduction_precedence = entry.actions.front().precedence();
set<ParseItem> shift_items;
@ -556,24 +565,13 @@ class ParseTableBuilder {
if (expected_conflict == actual_conflict)
return "";
ParseItem earliest_starting_item;
for (const ParseAction &action : entry.actions)
if (action.type == ParseActionTypeReduce)
if (action.consumed_symbol_count > earliest_starting_item.step_index)
earliest_starting_item = ParseItem(action.symbol, *action.production, action.consumed_symbol_count);
for (const ParseItem &shift_item : shift_items)
if (shift_item.step_index > earliest_starting_item.step_index)
earliest_starting_item = shift_item;
string description = "Unresolved conflict for symbol sequence:\n\n";
for (size_t i = 0; i < earliest_starting_item.step_index; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
for (auto &symbol : preceding_symbols) {
description += " " + symbol_name(symbol);
}
description += " \u2022 " + symbol_name(lookahead) + " \u2026";
description += "\n\n";
description += "Possible interpretations:\n\n";
size_t interpretation_count = 1;
@ -581,12 +579,12 @@ class ParseTableBuilder {
if (action.type == ParseActionTypeReduce) {
description += " " + to_string(interpretation_count++) + ":";
for (size_t i = 0; i < earliest_starting_item.step_index - action.consumed_symbol_count; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
for (size_t i = 0; i < preceding_symbols.size() - action.consumed_symbol_count; i++) {
description += " " + symbol_name(preceding_symbols[i]);
}
description += " (" + symbol_name(action.symbol);
for (const ProductionStep &step : *action.production) {
for (const ProductionStep &step : action.production->steps) {
description += " " + symbol_name(step.symbol);
}
description += ")";
@ -598,8 +596,8 @@ class ParseTableBuilder {
for (const ParseItem &shift_item : shift_items) {
description += " " + to_string(interpretation_count++) + ":";
for (size_t i = 0; i < earliest_starting_item.step_index - shift_item.step_index; i++) {
description += " " + symbol_name(earliest_starting_item.production->at(i).symbol);
for (size_t i = 0; i < preceding_symbols.size() - shift_item.step_index; i++) {
description += " " + symbol_name(preceding_symbols[i]);
}
description += " (" + symbol_name(shift_item.lhs());
@ -690,6 +688,30 @@ class ParseTableBuilder {
bool has_fragile_production(const Production *production) {
return fragile_productions.find(production) != fragile_productions.end();
}
SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) {
if (!sequence.empty()) {
const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back());
const LookaheadSet &right_tokens = item_set_builder.get_first_set(symbol);
if (!left_tokens.empty() && !right_tokens.empty()) {
for (const Symbol &left_symbol : *left_tokens.entries) {
if (left_symbol.is_terminal() && !left_symbol.is_built_in()) {
for (const Symbol &right_symbol : *right_tokens.entries) {
if (right_symbol.is_terminal() && !right_symbol.is_built_in()) {
following_terminals_by_terminal_index[left_symbol.index].insert(right_symbol.index);
}
}
}
}
}
}
SymbolSequence result(sequence.size() + 1);
result.assign(sequence.begin(), sequence.end());
result.push_back(symbol);
return result;
}
};
pair<ParseTable, CompileError> build_parse_table(

View file

@ -5,6 +5,7 @@
#include <string>
#include <unordered_map>
#include <utility>
#include <cwctype>
#include <vector>
#include "compiler/build_tables/lex_conflict_manager.h"
#include "compiler/build_tables/lex_item.h"
@ -15,6 +16,7 @@
namespace tree_sitter {
namespace build_tables {
using std::iswalpha;
using std::map;
using std::pair;
using std::set;
@ -70,14 +72,16 @@ class LexTableBuilderImpl : public LexTableBuilder {
LexTable lex_table;
const LexicalGrammar grammar;
vector<Rule> separator_rules;
CharacterSet separator_start_characters;
CharacterSet token_start_characters;
LexConflictManager conflict_manager;
unordered_map<LexItemSet, LexStateId> lex_state_ids;
public:
vector<bool> shadowed_token_indices;
map<Symbol::Index, CharacterSet> following_characters_by_token_index;
CharacterSet separator_start_characters;
CharacterSet current_conflict_detection_following_characters;
Symbol::Index current_conflict_detection_token_index;
bool current_conflict_value;
public:
LexTableBuilderImpl(const LexicalGrammar &grammar) : grammar(grammar) {
StartingCharacterAggregator separator_character_aggregator;
for (const auto &rule : grammar.separators) {
@ -86,20 +90,6 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
separator_rules.push_back(Blank{});
separator_start_characters = separator_character_aggregator.result;
StartingCharacterAggregator token_start_character_aggregator;
for (const auto &variable : grammar.variables) {
token_start_character_aggregator.apply(variable.rule);
}
token_start_characters = token_start_character_aggregator.result;
token_start_characters
.exclude('a', 'z')
.exclude('A', 'Z')
.exclude('0', '9')
.exclude('_')
.exclude('$');
shadowed_token_indices.resize(grammar.variables.size());
}
LexTable build(ParseTable *parse_table) {
@ -113,7 +103,10 @@ class LexTableBuilderImpl : public LexTableBuilder {
return lex_table;
}
bool detect_conflict(Symbol::Index left, Symbol::Index right) {
bool detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals_by_terminal_index) {
clear();
StartingCharacterAggregator left_starting_characters;
StartingCharacterAggregator right_starting_characters;
left_starting_characters.apply(grammar.variables[left].rule);
@ -124,12 +117,47 @@ class LexTableBuilderImpl : public LexTableBuilder {
return false;
}
clear();
map<Symbol, ParseTableEntry> terminals;
terminals[Symbol::terminal(left)];
terminals[Symbol::terminal(right)];
add_lex_state(item_set_for_terminals(terminals));
return shadowed_token_indices[right];
auto following_characters_entry = following_characters_by_token_index.find(right);
if (following_characters_entry == following_characters_by_token_index.end()) {
StartingCharacterAggregator aggregator;
for (auto following_token_index : following_terminals_by_terminal_index[right]) {
aggregator.apply(grammar.variables[following_token_index].rule);
}
following_characters_entry =
following_characters_by_token_index.insert({right, aggregator.result}).first;
// TODO - Refactor this. In general, a keyword token cannot be followed immediately by
// another alphanumeric character. But this requirement is currently not expressed anywhere in
// the grammar. So without this hack, we would be overly conservative about merging parse
// states because we would often consider `identifier` tokens to *conflict* with keyword
// tokens.
if (is_keyword(grammar.variables[right])) {
following_characters_entry->second
.exclude('a', 'z')
.exclude('A', 'Z')
.exclude('0', '9')
.exclude('_')
.exclude('$');
}
}
current_conflict_detection_token_index = right;
current_conflict_detection_following_characters = following_characters_entry->second;
add_lex_state(item_set_for_terminals({{Symbol::terminal(left), {}}, {Symbol::terminal(right), {}}}));
return current_conflict_value;
}
bool is_keyword(const LexicalVariable &variable) {
return variable.is_string && iswalpha(get_last_character(variable.rule));
}
static uint32_t get_last_character(const Rule &rule) {
return rule.match(
[](const Seq &sequence) { return get_last_character(*sequence.right); },
[](const rules::CharacterSet &rule) { return *rule.included_chars.begin(); },
[](const rules::Metadata &rule) { return get_last_character(*rule.rule); },
[](auto) { return 0; }
);
}
LexStateId add_lex_state(const LexItemSet &item_set) {
@ -149,7 +177,8 @@ class LexTableBuilderImpl : public LexTableBuilder {
void clear() {
lex_table.states.clear();
lex_state_ids.clear();
shadowed_token_indices.assign(grammar.variables.size(), false);
current_conflict_detection_following_characters = CharacterSet();
current_conflict_value = false;
}
private:
@ -166,17 +195,18 @@ class LexTableBuilderImpl : public LexTableBuilder {
for (const LexItem &item : transition.destination.entries) {
if (item.lhs == accept_action.symbol) {
can_advance_for_accepted_token = true;
} else if (!prefer_advancing && !transition.in_main_token && !item.lhs.is_built_in()) {
shadowed_token_indices[item.lhs.index] = true;
} else if (item.lhs.index == current_conflict_detection_token_index &&
!prefer_advancing && !transition.in_main_token) {
current_conflict_value = true;
}
}
if (!can_advance_for_accepted_token) {
if (characters.intersects(separator_start_characters) ||
(grammar.variables[accept_action.symbol.index].is_string &&
characters.intersects(token_start_characters))) {
shadowed_token_indices[accept_action.symbol.index] = true;
}
if (accept_action.symbol.index == current_conflict_detection_token_index &&
!can_advance_for_accepted_token &&
(characters.intersects(separator_start_characters) ||
(characters.intersects(current_conflict_detection_following_characters) &&
grammar.variables[accept_action.symbol.index].is_string))) {
current_conflict_value = true;
}
if (!prefer_advancing) continue;
@ -346,8 +376,9 @@ LexTable LexTableBuilder::build(ParseTable *parse_table) {
return static_cast<LexTableBuilderImpl *>(this)->build(parse_table);
}
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right) {
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right);
bool LexTableBuilder::detect_conflict(Symbol::Index left, Symbol::Index right,
const vector<set<Symbol::Index>> &following_terminals) {
return static_cast<LexTableBuilderImpl *>(this)->detect_conflict(left, right, following_terminals);
}
} // namespace build_tables

View file

@ -2,6 +2,8 @@
#define COMPILER_BUILD_TABLES_LEX_TABLE_BUILDER_H_
#include <memory>
#include <vector>
#include <set>
#include "compiler/lex_table.h"
namespace tree_sitter {
@ -15,7 +17,11 @@ class LexTableBuilder {
public:
static std::unique_ptr<LexTableBuilder> create(const LexicalGrammar &);
LexTable build(ParseTable *);
bool detect_conflict(rules::Symbol::Index, rules::Symbol::Index);
bool detect_conflict(
rules::Symbol::Index,
rules::Symbol::Index,
const std::vector<std::set<rules::Symbol::Index>> &following_terminals_by_terminal_index
);
protected:
LexTableBuilder() = default;
};

View file

@ -60,6 +60,10 @@ int ParseItem::precedence() const {
}
}
int ParseItem::dynamic_precedence() const {
return production->dynamic_precedence;
}
rules::Associativity ParseItem::associativity() const {
if (is_done()) {
if (production->empty()) {
@ -93,13 +97,12 @@ size_t ParseItemSet::unfinished_item_signature() const {
ParseItem previous_item;
for (auto &pair : entries) {
const ParseItem &item = pair.first;
if (item.step_index < item.production->size()) {
if (item.variable_index != previous_item.variable_index &&
item.step_index != previous_item.step_index) {
hash_combine(&result, item.variable_index);
hash_combine(&result, item.step_index);
previous_item = item;
}
if (item.step_index < item.production->size() &&
(item.variable_index != previous_item.variable_index ||
item.step_index != previous_item.step_index)) {
hash_combine(&result, item.variable_index);
hash_combine(&result, item.step_index);
previous_item = item;
}
}
return result;

View file

@ -26,6 +26,7 @@ struct ParseItem {
rules::Symbol lhs() const;
rules::Symbol next_symbol() const;
int precedence() const;
int dynamic_precedence() const;
rules::Associativity associativity() const;
bool is_done() const;

View file

@ -1,4 +1,5 @@
#include "compiler/build_tables/parse_item_set_builder.h"
#include <cassert>
#include <set>
#include <vector>
#include <utility>
@ -26,18 +27,20 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
for (size_t i = 0, n = lexical_grammar.variables.size(); i < n; i++) {
Symbol symbol = Symbol::terminal(i);
first_sets.insert({symbol, LookaheadSet({ symbol })});
first_sets.insert({symbol, LookaheadSet({symbol})});
last_sets.insert({symbol, LookaheadSet({symbol})});
}
for (size_t i = 0, n = grammar.external_tokens.size(); i < n; i++) {
Symbol symbol = Symbol::external(i);
first_sets.insert({symbol, LookaheadSet({ symbol })});
first_sets.insert({symbol, LookaheadSet({symbol})});
last_sets.insert({symbol, LookaheadSet({symbol})});
}
for (size_t i = 0, n = grammar.variables.size(); i < n; i++) {
Symbol symbol = Symbol::non_terminal(i);
LookaheadSet first_set;
LookaheadSet first_set;
processed_non_terminals.clear();
symbols_to_process.clear();
symbols_to_process.push_back(symbol);
@ -57,6 +60,26 @@ ParseItemSetBuilder::ParseItemSetBuilder(const SyntaxGrammar &grammar,
}
first_sets.insert({symbol, first_set});
LookaheadSet last_set;
processed_non_terminals.clear();
symbols_to_process.clear();
symbols_to_process.push_back(symbol);
while (!symbols_to_process.empty()) {
Symbol current_symbol = symbols_to_process.back();
symbols_to_process.pop_back();
if (!current_symbol.is_non_terminal()) {
last_set.insert(current_symbol);
} else if (processed_non_terminals.insert(current_symbol.index).second) {
for (const Production &production : grammar.variables[current_symbol.index].productions) {
if (!production.empty()) {
symbols_to_process.push_back(production.back().symbol);
}
}
}
}
last_sets.insert({symbol, last_set});
}
vector<ParseItemSetComponent> components_to_process;
@ -161,5 +184,9 @@ LookaheadSet ParseItemSetBuilder::get_first_set(const rules::Symbol &symbol) con
return first_sets.find(symbol)->second;
}
LookaheadSet ParseItemSetBuilder::get_last_set(const rules::Symbol &symbol) const {
return last_sets.find(symbol)->second;
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -20,6 +20,7 @@ class ParseItemSetBuilder {
};
std::map<rules::Symbol, LookaheadSet> first_sets;
std::map<rules::Symbol, LookaheadSet> last_sets;
std::map<rules::Symbol::Index, std::vector<ParseItemSetComponent>> component_cache;
std::vector<std::pair<ParseItem, LookaheadSet>> item_set_buffer;
@ -27,6 +28,7 @@ class ParseItemSetBuilder {
ParseItemSetBuilder(const SyntaxGrammar &, const LexicalGrammar &);
void apply_transitive_closure(ParseItemSet *);
LookaheadSet get_first_set(const rules::Symbol &) const;
LookaheadSet get_last_set(const rules::Symbol &) const;
};
} // namespace build_tables

View file

@ -401,52 +401,106 @@ class CCodeGenerator {
add_accept_token_action(lex_state.accept_action);
}
set<uint32_t> ruled_out_characters;
for (const auto &pair : lex_state.advance_actions) {
if (!pair.first.is_empty()) {
_if([&]() { add_character_set_condition(pair.first); },
[&]() { add_advance_action(pair.second); });
if (pair.first.is_empty()) continue;
size_t current_length = buffer.size();
line("if (");
if (add_character_set_condition(pair.first, ruled_out_characters)) {
add(")");
indent([&]() { add_advance_action(pair.second); });
ruled_out_characters.insert(pair.first.included_chars.begin(), pair.first.included_chars.end());
} else {
buffer.resize(current_length);
add_advance_action(pair.second);
}
}
line("END_STATE();");
}
void add_character_set_condition(const rules::CharacterSet &rule) {
bool add_character_set_condition(const rules::CharacterSet &rule, const set<uint32_t> &ruled_out_characters) {
if (rule.includes_all) {
add("!(");
add_character_range_conditions(rule.excluded_ranges());
add(")");
return add_character_range_conditions(rule.excluded_ranges(), ruled_out_characters, true);
} else {
add_character_range_conditions(rule.included_ranges());
return add_character_range_conditions(rule.included_ranges(), ruled_out_characters, false);
}
}
void add_character_range_conditions(const vector<rules::CharacterRange> &ranges) {
if (ranges.size() == 1) {
add_character_range_condition(*ranges.begin());
} else {
bool first = true;
for (const auto &range : ranges) {
if (!first) {
add(" ||");
line(" ");
bool add_character_range_conditions(const vector<rules::CharacterRange> &ranges,
const set<uint32_t> &ruled_out_characters,
bool is_negated) {
bool first = true;
for (auto iter = ranges.begin(), end = ranges.end(); iter != end;) {
auto range = *iter;
bool range_is_ruled_out = true;
for (uint32_t c = range.min; c <= range.max; c++) {
if (!ruled_out_characters.count(c)) {
range_is_ruled_out = false;
break;
}
}
if (range_is_ruled_out) {
++iter;
continue;
}
auto next_iter = iter + 1;
while (next_iter != end) {
bool can_join_ranges = true;
for (uint32_t character = range.max + 1; character < next_iter->min; character++) {
if (!ruled_out_characters.count(character)) {
can_join_ranges = false;
break;
}
}
add("(");
add_character_range_condition(range);
add(")");
first = false;
if (can_join_ranges) {
range.max = next_iter->max;
++next_iter;
} else {
break;
}
}
if (!first) {
add(is_negated ? " &&" : " ||");
line(" ");
}
add_character_range_condition(range, is_negated);
first = false;
iter = next_iter;
}
return !first;
}
void add_character_range_condition(const rules::CharacterRange &range) {
if (range.min == range.max) {
add("lookahead == " + escape_char(range.min));
void add_character_range_condition(const rules::CharacterRange &range, bool is_negated) {
auto min = escape_char(range.min);
auto max = escape_char(range.max);
if (is_negated) {
if (range.max == range.min) {
add("lookahead != " + min);
} else if (range.max == range.min + 1) {
add("lookahead != " + min + " &&");
line(" lookahead != " + max);
} else {
add("(lookahead < " + min + " || lookahead > " + max + ")");
}
} else {
add(escape_char(range.min) + string(" <= lookahead && lookahead <= ") +
escape_char(range.max));
if (range.max == range.min) {
add("lookahead == " + min);
} else if (range.max == range.min + 1) {
add("lookahead == " + min + " ||");
line(" lookahead == " + max);
} else {
add("(" + min + " <= lookahead && lookahead <= " + max + ")");
}
}
}
@ -490,12 +544,17 @@ class CCodeGenerator {
break;
case ParseActionTypeReduce:
if (action.fragile) {
add("REDUCE_FRAGILE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
add("REDUCE_FRAGILE");
} else {
add("REDUCE(" + symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
add("REDUCE");
}
add("(");
add(symbol_id(action.symbol));
add(", ");
add(to_string(action.consumed_symbol_count));
add(", " + to_string(action.dynamic_precedence));
add(")");
break;
case ParseActionTypeRecover:
add("RECOVER(" + to_string(action.state_index) + ")");
@ -594,13 +653,6 @@ class CCodeGenerator {
indent(body);
}
void _if(function<void()> condition, function<void()> body) {
line("if (");
indent(condition);
add(")");
indent(body);
}
string sanitize_name_for_string(string name) {
util::str_replace(&name, "\\", "\\\\");
util::str_replace(&name, "\n", "\\n");

View file

@ -184,6 +184,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
return Rule(Metadata::prec_right(precedence_json.u.integer, result.rule));
}
if (type == "PREC_DYNAMIC") {
json_value precedence_json = rule_json->operator[]("value");
if (precedence_json.type != json_integer) {
return "Precedence value must be an integer";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
}
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
}
return "Unknown rule type: " + type;
}

View file

@ -13,25 +13,14 @@ using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction(ParseActionType type, ParseStateId state_index,
Symbol symbol, size_t consumed_symbol_count,
const Production *production)
: type(type),
extra(false),
fragile(false),
state_index(state_index),
symbol(symbol),
consumed_symbol_count(consumed_symbol_count),
production(production) {}
ParseAction::ParseAction()
: type(ParseActionTypeError),
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1),
symbol(rules::NONE()),
consumed_symbol_count(0),
production(nullptr) {}
state_index(-1) {}
ParseAction ParseAction::Error() {
return ParseAction();
@ -44,12 +33,17 @@ ParseAction ParseAction::Accept() {
}
ParseAction ParseAction::Shift(ParseStateId state_index) {
return ParseAction(ParseActionTypeShift, state_index, rules::NONE(), 0, nullptr);
ParseAction result;
result.type = ParseActionTypeShift;
result.state_index = state_index;
return result;
}
ParseAction ParseAction::Recover(ParseStateId state_index) {
return ParseAction(ParseActionTypeRecover, state_index, rules::NONE(), 0,
nullptr);
ParseAction result;
result.type = ParseActionTypeRecover;
result.state_index = state_index;
return result;
}
ParseAction ParseAction::ShiftExtra() {
@ -61,8 +55,13 @@ ParseAction ParseAction::ShiftExtra() {
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
const Production &production) {
return ParseAction(ParseActionTypeReduce, 0, symbol, consumed_symbol_count,
&production);
ParseAction result;
result.type = ParseActionTypeReduce;
result.symbol = symbol;
result.consumed_symbol_count = consumed_symbol_count;
result.production = &production;
result.dynamic_precedence = production.dynamic_precedence;
return result;
}
int ParseAction::precedence() const {

View file

@ -24,9 +24,6 @@ enum ParseActionType {
struct ParseAction {
ParseAction();
ParseAction(ParseActionType type, ParseStateId state_index,
rules::Symbol symbol, size_t consumed_symbol_count,
const Production *);
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index);
@ -39,13 +36,14 @@ struct ParseAction {
rules::Associativity associativity() const;
int precedence() const;
const Production *production;
size_t consumed_symbol_count;
rules::Symbol symbol;
int dynamic_precedence;
ParseActionType type;
bool extra;
bool fragile;
ParseStateId state_index;
rules::Symbol symbol;
size_t consumed_symbol_count;
const Production *production;
};
struct ParseTableEntry {
@ -71,7 +69,6 @@ struct ParseState {
std::map<rules::Symbol, ParseTableEntry> terminal_entries;
std::map<rules::Symbol::Index, ParseStateId> nonterminal_entries;
LexStateId lex_state_id;
size_t shift_actions_signature;
};
struct ParseTableSymbolMetadata {

View file

@ -1,6 +1,7 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include <vector>
#include <cassert>
#include <cmath>
#include <algorithm>
#include "compiler/prepare_grammar/extract_choices.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
@ -26,7 +27,7 @@ class FlattenRule {
void apply(const Rule &rule) {
rule.match(
[&](const rules::Symbol &symbol) {
production.push_back(ProductionStep{
production.steps.push_back(ProductionStep{
symbol,
precedence_stack.back(),
associativity_stack.back()
@ -42,6 +43,10 @@ class FlattenRule {
associativity_stack.push_back(metadata.params.associativity);
}
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
production.dynamic_precedence = metadata.params.dynamic_precedence;
}
apply(*metadata.rule);
if (metadata.params.has_precedence) {

View file

@ -38,20 +38,11 @@ static set<uint32_t> add_chars(set<uint32_t> *left, const set<uint32_t> &right)
return result;
}
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &chars) {
static vector<CharacterRange> consolidate_ranges(const set<uint32_t> &characters) {
vector<CharacterRange> result;
for (uint32_t c : chars) {
auto size = result.size();
if (size >= 2 && result[size - 2].max == (c - 2)) {
result.pop_back();
for (uint32_t c : characters) {
if (!result.empty() && result.back().max == c - 1) {
result.back().max = c;
} else if (size >= 1) {
CharacterRange &last = result.back();
if (last.min < last.max && last.max == (c - 1)) {
last.max = c;
} else {
result.push_back(CharacterRange(c));
}
} else {
result.push_back(CharacterRange(c));
}
@ -70,15 +61,17 @@ bool CharacterSet::operator==(const CharacterSet &other) const {
}
bool CharacterSet::operator<(const CharacterSet &other) const {
if (!includes_all && other.includes_all)
return true;
if (includes_all && !other.includes_all)
return false;
if (included_chars < other.included_chars)
return true;
if (other.included_chars < included_chars)
return false;
return excluded_chars < other.excluded_chars;
if (!includes_all && other.includes_all) return true;
if (includes_all && !other.includes_all) return false;
if (includes_all) {
if (excluded_chars.size() > other.excluded_chars.size()) return true;
if (excluded_chars.size() < other.excluded_chars.size()) return false;
return excluded_chars < other.excluded_chars;
} else {
if (included_chars.size() < other.included_chars.size()) return true;
if (included_chars.size() > other.included_chars.size()) return false;
return included_chars < other.included_chars;
}
}
CharacterSet &CharacterSet::include_all() {
@ -131,8 +124,7 @@ void CharacterSet::add_set(const CharacterSet &other) {
excluded_chars.insert(c);
included_chars.clear();
} else {
for (uint32_t c : other.included_chars)
included_chars.insert(c);
included_chars.insert(other.included_chars.begin(), other.included_chars.end());
}
}
}

View file

@ -51,6 +51,12 @@ Metadata Metadata::prec_right(int precedence, const Rule &rule) {
return Metadata{rule, params};
}
Metadata Metadata::prec_dynamic(int dynamic_precedence, const Rule &rule) {
MetadataParams params;
params.dynamic_precedence = dynamic_precedence;
return Metadata{rule, params};
}
Metadata Metadata::separator(const Rule &rule) {
MetadataParams params;
params.has_precedence = true;

View file

@ -14,6 +14,7 @@ enum Associativity {
struct MetadataParams {
int precedence;
int dynamic_precedence;
Associativity associativity;
bool has_precedence;
bool has_associativity;
@ -23,8 +24,8 @@ struct MetadataParams {
bool is_main_token;
inline MetadataParams() :
precedence{0}, associativity{AssociativityNone}, has_precedence{false},
has_associativity{false}, is_token{false}, is_string{false},
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
has_precedence{false}, has_associativity{false}, is_token{false}, is_string{false},
is_active{false}, is_main_token{false} {}
inline bool operator==(const MetadataParams &other) const {
@ -33,6 +34,7 @@ struct MetadataParams {
associativity == other.associativity &&
has_precedence == other.has_precedence &&
has_associativity == other.has_associativity &&
dynamic_precedence == other.dynamic_precedence &&
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
@ -54,6 +56,7 @@ struct Metadata {
static Metadata prec(int precedence, const Rule &rule);
static Metadata prec_left(int precedence, const Rule &rule);
static Metadata prec_right(int precedence, const Rule &rule);
static Metadata prec_dynamic(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
@ -63,4 +66,4 @@ struct Metadata {
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_METADATA_H_
#endif // COMPILER_RULES_METADATA_H_

View file

@ -11,8 +11,9 @@ namespace tree_sitter {
struct ProductionStep {
inline bool operator==(const ProductionStep &other) const {
return symbol == other.symbol && precedence == other.precedence &&
associativity == other.associativity;
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity;
}
rules::Symbol symbol;
@ -20,7 +21,21 @@ struct ProductionStep {
rules::Associativity associativity;
};
typedef std::vector<ProductionStep> Production;
struct Production {
std::vector<ProductionStep> steps;
int dynamic_precedence = 0;
inline bool operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
inline ProductionStep &back() { return steps.back(); }
inline const ProductionStep &back() const { return steps.back(); }
inline bool empty() const { return steps.empty(); }
inline size_t size() const { return steps.size(); }
inline const ProductionStep &operator[](int i) const { return steps[i]; }
inline const ProductionStep &at(int i) const { return steps[i]; }
};
struct SyntaxVariable {
std::string name;

View file

@ -437,22 +437,36 @@ static Tree *parser__get_lookahead(Parser *self, StackVersion version,
}
static bool parser__select_tree(Parser *self, Tree *left, Tree *right) {
if (!left)
return true;
if (!right)
return false;
if (!left) return true;
if (!right) return false;
if (right->error_cost < left->error_cost) {
LOG("select_smaller_error symbol:%s, over_symbol:%s",
SYM_NAME(right->symbol), SYM_NAME(left->symbol));
return true;
}
if (left->error_cost < right->error_cost) {
LOG("select_smaller_error symbol:%s, over_symbol:%s",
SYM_NAME(left->symbol), SYM_NAME(right->symbol));
return false;
}
if (left->error_cost > 0) return -1;
if (right->dynamic_precedence > left->dynamic_precedence) {
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
SYM_NAME(right->symbol), right->dynamic_precedence, SYM_NAME(left->symbol),
left->dynamic_precedence);
return true;
}
if (left->dynamic_precedence > right->dynamic_precedence) {
LOG("select_higher_precedence symbol:%s, prec:%u, over_symbol:%s, other_prec:%u",
SYM_NAME(left->symbol), left->dynamic_precedence, SYM_NAME(right->symbol),
right->dynamic_precedence);
return false;
}
if (left->error_cost > 0) return true;
int comparison = ts_tree_compare(left, right);
switch (comparison) {
@ -544,7 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree,
static StackPopResult parser__reduce(Parser *self, StackVersion version,
TSSymbol symbol, unsigned count,
bool fragile, bool allow_skipping) {
bool fragile, int dynamic_precedence,
bool allow_skipping) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
@ -587,6 +602,8 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
}
}
parent->dynamic_precedence += dynamic_precedence;
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(language, state, symbol);
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
@ -929,6 +946,7 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence
});
default:
break;
@ -939,8 +957,10 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
bool did_reduce = false;
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
ReduceAction action = self->reduce_actions.contents[i];
StackPopResult reduction =
parser__reduce(self, version, action.symbol, action.count, true, false);
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.count, true,
action.dynamic_precedence, false
);
if (reduction.stopped_at_error) {
ts_tree_array_delete(&reduction.slices.contents[0].trees);
ts_stack_remove_version(self->stack, reduction.slices.contents[0].version);
@ -1180,12 +1200,13 @@ static void parser__advance(Parser *self, StackVersion version,
unsigned child_count = action.params.child_count;
TSSymbol symbol = action.params.symbol;
unsigned dynamic_precedence = action.params.dynamic_precedence;
bool fragile = action.fragile;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
StackPopResult reduction =
parser__reduce(self, version, symbol, child_count, fragile, true);
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
StackSlice slice = *array_front(&reduction.slices);
if (reduction.stopped_at_error) {
reduction_stopped_at_error = true;

View file

@ -11,6 +11,7 @@ extern "C" {
typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -150,6 +150,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
self->visible_child_count = 0;
self->error_cost = 0;
self->has_external_tokens = false;
self->dynamic_precedence = 0;
for (uint32_t i = 0; i < child_count; i++) {
Tree *child = children[i];
@ -165,6 +166,7 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
}
self->error_cost += child->error_cost;
self->dynamic_precedence += child->dynamic_precedence;
if (child->visible) {
self->visible_child_count++;

View file

@ -46,6 +46,7 @@ typedef struct Tree {
} first_leaf;
uint32_t ref_count;
int dynamic_precedence;
bool visible : 1;
bool named : 1;
bool extra : 1;

View file

@ -16,7 +16,7 @@ describe("LexTableBuilder::detect_conflict", []() {
auto builder = LexTableBuilder::create(LexicalGrammar{
{
LexicalVariable{
"token_1",
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'a' }),
@ -26,7 +26,7 @@ describe("LexTableBuilder::detect_conflict", []() {
false
},
LexicalVariable{
"token_2",
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ 'b' }),
@ -39,22 +39,22 @@ describe("LexTableBuilder::detect_conflict", []() {
separators
});
AssertThat(builder->detect_conflict(0, 1), IsFalse());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that begins with a separator character", [&]() {
it("returns true when the left token can match a string that the right token matches, "
"plus a separator character", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_1",
"token_0",
VariableTypeNamed,
Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
false
},
LexicalVariable{
"token_2",
"token_1",
VariableTypeNamed,
Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
true
@ -64,24 +64,32 @@ describe("LexTableBuilder::detect_conflict", []() {
};
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
grammar.variables[1].is_string = false;
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
});
it("returns true when one token matches a string that the other matches, "
"plus some addition content that matches another one-character token", [&]() {
it("returns true when the left token matches a string that the right token matches, "
"plus the first character of some token that can follow the right token", [&]() {
LexicalGrammar grammar{
{
LexicalVariable{
"token_0",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
LexicalVariable{
"token_1",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '>' }),
}),
true
},
@ -89,7 +97,7 @@ describe("LexTableBuilder::detect_conflict", []() {
"token_2",
VariableTypeNamed,
Rule::seq({
CharacterSet({ '>' }),
CharacterSet({ '=' }),
}),
true
},
@ -97,9 +105,17 @@ describe("LexTableBuilder::detect_conflict", []() {
separators
};
// If no tokens can follow token_1, then there's no conflict
auto builder = LexTableBuilder::create(grammar);
AssertThat(builder->detect_conflict(0, 1), IsTrue());
AssertThat(builder->detect_conflict(1, 0), IsFalse());
vector<set<Symbol::Index>> following_tokens_by_token_index(3);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
// If token_2 can follow token_1, then token_0 conflicts with token_1
builder = LexTableBuilder::create(grammar);
following_tokens_by_token_index[1].insert(2);
AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
});
});

View file

@ -25,25 +25,25 @@ describe("ParseItemSetBuilder", []() {
it("adds items at the beginnings of referenced rules", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({
}, 0},
Production{{
{Symbol::non_terminal(2), 0, AssociativityNone},
})
}, 0}
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(14), 0, AssociativityNone},
{Symbol::terminal(15), 0, AssociativityNone},
})
}, 0}
}},
}, {}, {}, {}};
@ -84,17 +84,17 @@ describe("ParseItemSetBuilder", []() {
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
}),
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production({
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
}),
Production({})
}, 0},
Production{{}, 0}
}},
}, {}, {}, {}};

View file

@ -34,22 +34,64 @@ describe("flatten_grammar", []() {
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(3), 102, AssociativityRight},
{Symbol::non_terminal(4), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}),
Production({
}, 0},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(5), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 0}
})));
});
it("stores the maximum dynamic precedence specified in each production", [&]() {
SyntaxVariable result = flatten_rule({
"test",
VariableTypeNamed,
Rule::seq({
Symbol::non_terminal(1),
Metadata::prec_dynamic(101, Rule::seq({
Symbol::non_terminal(2),
Rule::choice({
Metadata::prec_dynamic(102, Rule::seq({
Symbol::non_terminal(3),
Symbol::non_terminal(4)
})),
Symbol::non_terminal(5),
}),
Symbol::non_terminal(6),
})),
Symbol::non_terminal(7),
})
})))
});
AssertThat(result.name, Equals("test"));
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(3), 0, AssociativityNone},
{Symbol::non_terminal(4), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 102},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(5), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
}, 101}
})));
});
it("uses the last assigned precedence", [&]() {
@ -63,11 +105,11 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
{Symbol::non_terminal(2), 101, AssociativityLeft},
})
})))
{Symbol::non_terminal(2), 101, AssociativityLeft},
}, 0}
})));
result = flatten_rule({
"test2",
@ -78,10 +120,10 @@ describe("flatten_grammar", []() {
});
AssertThat(result.productions, Equals(vector<Production>({
Production({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
})
})))
}, 0}
})));
});
});

View file

@ -305,29 +305,17 @@ describe("CharacterSet", []() {
});
describe("::included_ranges", [&]() {
it("consolidates sequences of 3 or more consecutive characters into ranges", [&]() {
it("consolidates consecutive sequences of characters into ranges", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'c')
.include('g')
.include('e', 'j')
.include('m')
.include('z');
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
CharacterRange{'a', 'c'},
CharacterRange('g'),
CharacterRange('z'),
})));
});
it("doesn't consolidate sequences of 2 consecutive characters", [&]() {
CharacterSet set1 = CharacterSet()
.include('a', 'b')
.include('g')
.include('z');
AssertThat(set1.included_ranges(), Equals(vector<CharacterRange>({
CharacterRange('a'),
CharacterRange('b'),
CharacterRange('g'),
CharacterRange{'e', 'j'},
CharacterRange('m'),
CharacterRange('z'),
})));
});

View file

@ -0,0 +1,25 @@
===============================
Declarations
===============================
int * x
---
(program (declaration
(type (identifier))
(declarator (identifier))))
===============================
Expressions
===============================
int * x * y
---
(program (expression
(expression
(expression (identifier))
(expression (identifier)))
(expression (identifier))))

View file

@ -0,0 +1,73 @@
{
"name": "dynamic_precedence",
"conflicts": [
["expression", "type"]
],
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"program": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "declaration"},
{"type": "SYMBOL", "name": "expression"},
]
},
"expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "CHOICE",
"members": [
{
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "expression"}
]
},
{
"type": "SYMBOL",
"name": "identifier"
}
]
}
},
"declaration": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "type"},
{"type": "SYMBOL", "name": "declarator"}
]
},
"declarator": {
"type": "PREC_DYNAMIC",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "*"},
{"type": "SYMBOL", "name": "identifier"}
]
}
},
"type": {
"type": "SYMBOL",
"name": "identifier"
},
"identifier": {
"type": "PATTERN",
"value": "[a-zA-Z]+"
}
}
}

View file

@ -0,0 +1 @@
This grammar contains a conflict that is resolved at runtime. The PREC_DYNAMIC rule is used to indicate that the `declarator` rule should be preferred to the `expression` rule at runtime.

View file

@ -1,11 +1,11 @@
Unresolved conflict for symbol sequence:
identifier • '{' …
identifier identifier • '{' …
Possible interpretations:
1: (expression identifier) • '{' …
2: (function_call identifier • block)
1: identifier (expression identifier) • '{' …
2: identifier (function_call identifier • block)
Possible resolutions:

View file

@ -136,9 +136,14 @@ ostream &operator<<(ostream &stream, const Variable &variable) {
return stream << "(Variable " << variable.name << " " << variable.rule << ")";
}
ostream &operator<<(ostream &stream, const Production &production) {
return stream << "(Production " << production.steps << " " <<
to_string(production.dynamic_precedence) << ")";
}
ostream &operator<<(ostream &stream, const SyntaxVariable &variable) {
return stream << "(Variable " << variable.name << " " << variable.productions <<
" " << to_string(variable.type) << "}";
" " << to_string(variable.type) << ")";
}
ostream &operator<<(ostream &stream, const LexicalVariable &variable) {

View file

@ -110,6 +110,7 @@ ostream &operator<<(ostream &, const InputGrammar &);
ostream &operator<<(ostream &, const CompileError &);
ostream &operator<<(ostream &, const ExternalToken &);
ostream &operator<<(ostream &, const ProductionStep &);
ostream &operator<<(ostream &, const Production &);
ostream &operator<<(ostream &, const PrecedenceRange &);
ostream &operator<<(ostream &, const Variable &);
ostream &operator<<(ostream &, const LexicalVariable &);

View file

@ -13,7 +13,7 @@ vector<string> test_languages = list_directory(grammars_dir_path);
for (auto &language_name : test_languages) {
if (language_name == "readme.md") continue;
describe(("test language: " + language_name).c_str(), [&]() {
describe(("test grammar: " + language_name).c_str(), [&]() {
string directory_path = grammars_dir_path + "/" + language_name;
string grammar_path = directory_path + "/grammar.json";
string grammar_json = read_file(grammar_path);