Introduce RENAME rule type
This commit is contained in:
parent
0b94e9d814
commit
b3a72954ff
26 changed files with 516 additions and 246 deletions
|
|
@ -135,6 +135,23 @@
|
|||
"required": ["type", "members"]
|
||||
},
|
||||
|
||||
"rename-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
"type": {
|
||||
"type": "string",
|
||||
"pattern": "^RENAME$"
|
||||
},
|
||||
"value": {
|
||||
"type": "string"
|
||||
},
|
||||
"content": {
|
||||
"$ref": "#/definitions/rule"
|
||||
}
|
||||
},
|
||||
"required": ["type", "content", "value"]
|
||||
},
|
||||
|
||||
"repeat-rule": {
|
||||
"type": "object",
|
||||
"properties": {
|
||||
|
|
@ -202,6 +219,7 @@
|
|||
{ "$ref": "#/definitions/symbol-rule" },
|
||||
{ "$ref": "#/definitions/seq-rule" },
|
||||
{ "$ref": "#/definitions/choice-rule" },
|
||||
{ "$ref": "#/definitions/rename-rule" },
|
||||
{ "$ref": "#/definitions/repeat1-rule" },
|
||||
{ "$ref": "#/definitions/repeat-rule" },
|
||||
{ "$ref": "#/definitions/token-rule" },
|
||||
|
|
|
|||
|
|
@ -9,9 +9,8 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
typedef unsigned short TSSymbol;
|
||||
typedef unsigned short TSStateId;
|
||||
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSStateId;
|
||||
typedef uint8_t TSExternalTokenState[16];
|
||||
|
||||
#define ts_builtin_sym_error ((TSSymbol)-1)
|
||||
|
|
@ -40,16 +39,19 @@ typedef enum {
|
|||
|
||||
typedef struct {
|
||||
union {
|
||||
TSStateId to_state;
|
||||
struct {
|
||||
short dynamic_precedence;
|
||||
TSSymbol symbol;
|
||||
unsigned short child_count;
|
||||
TSStateId to_state;
|
||||
bool extra : 1;
|
||||
};
|
||||
} params;
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
uint16_t dynamic_precedence;
|
||||
uint8_t child_count;
|
||||
uint8_t rename_sequence_id : 7;
|
||||
bool fragile : 1;
|
||||
};
|
||||
};
|
||||
TSParseActionType type : 4;
|
||||
bool extra : 1;
|
||||
bool fragile : 1;
|
||||
} TSParseAction;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -60,7 +62,7 @@ typedef struct {
|
|||
typedef union {
|
||||
TSParseAction action;
|
||||
struct {
|
||||
unsigned short count;
|
||||
uint8_t count;
|
||||
bool reusable : 1;
|
||||
bool depends_on_lookahead : 1;
|
||||
};
|
||||
|
|
@ -73,9 +75,11 @@ typedef struct TSLanguage {
|
|||
uint32_t external_token_count;
|
||||
const char **symbol_names;
|
||||
const TSSymbolMetadata *symbol_metadata;
|
||||
const unsigned short *parse_table;
|
||||
const uint16_t *parse_table;
|
||||
const TSParseActionEntry *parse_actions;
|
||||
const TSLexMode *lex_modes;
|
||||
const TSSymbol *rename_sequences;
|
||||
uint16_t max_rename_sequence_length;
|
||||
bool (*lex_fn)(TSLexer *, TSStateId);
|
||||
struct {
|
||||
const bool *states;
|
||||
|
|
@ -127,70 +131,62 @@ typedef struct TSLanguage {
|
|||
#define STATE(id) id
|
||||
#define ACTIONS(id) id
|
||||
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, .params = {.to_state = to_state_value } \
|
||||
} \
|
||||
#define SHIFT(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.to_state = to_state_value, \
|
||||
} \
|
||||
}
|
||||
|
||||
#define RECOVER(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeRecover, .params = {.to_state = to_state_value } \
|
||||
} \
|
||||
#define RECOVER(to_state_value) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeRecover, \
|
||||
.to_state = to_state_value \
|
||||
} \
|
||||
}
|
||||
|
||||
#define SHIFT_EXTRA() \
|
||||
{ \
|
||||
{ .type = TSParseActionTypeShift, .extra = true } \
|
||||
#define SHIFT_EXTRA() \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeShift, \
|
||||
.extra = true \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.params = { \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
.dynamic_precedence = dynamic_precedence_val, \
|
||||
} \
|
||||
} \
|
||||
#define REDUCE(symbol_val, child_count_val, ...) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
__VA_ARGS__ \
|
||||
} \
|
||||
}
|
||||
|
||||
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
|
||||
{ \
|
||||
{ \
|
||||
.type = TSParseActionTypeReduce, \
|
||||
.fragile = true, \
|
||||
.params = { \
|
||||
.symbol = symbol_val, \
|
||||
.child_count = child_count_val, \
|
||||
.dynamic_precedence = dynamic_precedence_val, \
|
||||
} \
|
||||
} \
|
||||
}
|
||||
|
||||
#define ACCEPT_INPUT() \
|
||||
{ \
|
||||
{ .type = TSParseActionTypeAccept } \
|
||||
}
|
||||
|
||||
#define GET_LANGUAGE(...) \
|
||||
static TSLanguage language = { \
|
||||
.version = LANGUAGE_VERSION, \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_modes = ts_lex_modes, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.lex_fn = ts_lex, \
|
||||
.external_token_count = EXTERNAL_TOKEN_COUNT, \
|
||||
.external_scanner = {__VA_ARGS__} \
|
||||
}; \
|
||||
return &language \
|
||||
#define GET_LANGUAGE(...) \
|
||||
static TSLanguage language = { \
|
||||
.version = LANGUAGE_VERSION, \
|
||||
.symbol_count = SYMBOL_COUNT, \
|
||||
.token_count = TOKEN_COUNT, \
|
||||
.symbol_metadata = ts_symbol_metadata, \
|
||||
.parse_table = (const unsigned short *)ts_parse_table, \
|
||||
.parse_actions = ts_parse_actions, \
|
||||
.lex_modes = ts_lex_modes, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.rename_sequences = (const TSSymbol *)ts_rename_sequences, \
|
||||
.max_rename_sequence_length = MAX_RENAME_SEQUENCE_LENGTH, \
|
||||
.lex_fn = ts_lex, \
|
||||
.external_token_count = EXTERNAL_TOKEN_COUNT, \
|
||||
.external_scanner = {__VA_ARGS__} \
|
||||
}; \
|
||||
return &language \
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@
|
|||
'src/compiler/prepare_grammar/prepare_grammar.cc',
|
||||
'src/compiler/prepare_grammar/token_description.cc',
|
||||
'src/compiler/rule.cc',
|
||||
'src/compiler/syntax_grammar.cc',
|
||||
'src/compiler/rules/character_set.cc',
|
||||
'src/compiler/rules/choice.cc',
|
||||
'src/compiler/rules/metadata.cc',
|
||||
|
|
|
|||
|
|
@ -63,21 +63,26 @@ class ParseTableBuilder {
|
|||
processing_recovery_states(false) {}
|
||||
|
||||
pair<ParseTable, CompileError> build() {
|
||||
// Ensure that the empty rename sequence has index 0.
|
||||
parse_table.rename_sequences.push_back({});
|
||||
|
||||
// Ensure that the error state has index 0.
|
||||
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
|
||||
|
||||
// Add the starting state.
|
||||
Symbol start_symbol = grammar.variables.empty() ?
|
||||
Symbol::terminal(0) :
|
||||
Symbol::non_terminal(0);
|
||||
Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0};
|
||||
|
||||
ParseStateId error_state_id = add_parse_state({}, ParseItemSet());
|
||||
add_parse_state({}, ParseItemSet({
|
||||
Production start_production{{{start_symbol, 0, rules::AssociativityNone, ""}}, 0};
|
||||
add_parse_state({}, ParseItemSet{{
|
||||
{
|
||||
ParseItem(rules::START(), start_production, 0),
|
||||
LookaheadSet({END_OF_INPUT()}),
|
||||
},
|
||||
}));
|
||||
}});
|
||||
|
||||
CompileError error = process_part_state_queue();
|
||||
if (error.type != TSCompileErrorTypeNone) return {parse_table, error};
|
||||
if (error) return {parse_table, error};
|
||||
|
||||
compute_unmergable_token_pairs();
|
||||
|
||||
|
|
@ -191,9 +196,14 @@ class ParseTableBuilder {
|
|||
// If the item is finished, immediately add a Reduce or Accept action to
|
||||
// the parse table for each of its lookahead terminals.
|
||||
if (item.is_done()) {
|
||||
ParseAction action = (item.lhs() == rules::START()) ?
|
||||
ParseAction::Accept() :
|
||||
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
|
||||
ParseAction action;
|
||||
|
||||
if (item.lhs() == rules::START()) {
|
||||
action = ParseAction::Accept();
|
||||
} else {
|
||||
action = ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
|
||||
action.rename_sequence_id = get_rename_sequence_id(*item.production);
|
||||
}
|
||||
|
||||
int precedence = item.precedence();
|
||||
lookahead_symbols.for_each([&](Symbol lookahead) {
|
||||
|
|
@ -688,6 +698,27 @@ class ParseTableBuilder {
|
|||
return fragile_productions.find(production) != fragile_productions.end();
|
||||
}
|
||||
|
||||
unsigned get_rename_sequence_id(const Production &production) {
|
||||
RenameSequence rename_sequence;
|
||||
for (unsigned i = 0, n = production.size(); i < n; i++) {
|
||||
auto &step = production.at(i);
|
||||
if (!step.name_replacement.empty()) {
|
||||
rename_sequence.resize(production.size());
|
||||
rename_sequence[i] = step.name_replacement;
|
||||
}
|
||||
}
|
||||
|
||||
auto begin = parse_table.rename_sequences.begin();
|
||||
auto end = parse_table.rename_sequences.end();
|
||||
auto iter = find(begin, end, rename_sequence);
|
||||
if (iter != end) {
|
||||
return iter - begin;
|
||||
} else {
|
||||
parse_table.rename_sequences.push_back(move(rename_sequence));
|
||||
return parse_table.rename_sequences.size() - 1;
|
||||
}
|
||||
}
|
||||
|
||||
SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) {
|
||||
if (!sequence.empty()) {
|
||||
const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back());
|
||||
|
|
|
|||
|
|
@ -27,6 +27,11 @@ bool ParseItem::operator==(const ParseItem &other) const {
|
|||
if (step_index != other.step_index) return false;
|
||||
if (variable_index != other.variable_index) return false;
|
||||
if (production->size() != other.production->size()) return false;
|
||||
for (size_t i = 0; i < step_index; i++) {
|
||||
if (production->at(i).name_replacement != other.production->at(i).name_replacement) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if (is_done()) {
|
||||
if (!production->empty()) {
|
||||
if (production->back().precedence != other.production->back().precedence) return false;
|
||||
|
|
@ -47,6 +52,10 @@ bool ParseItem::operator<(const ParseItem &other) const {
|
|||
if (other.variable_index < variable_index) return false;
|
||||
if (production->size() < other.production->size()) return true;
|
||||
if (other.production->size() < production->size()) return false;
|
||||
for (size_t i = 0; i < step_index; i++) {
|
||||
if (production->at(i).name_replacement < other.production->at(i).name_replacement) return true;
|
||||
if (other.production->at(i).name_replacement < production->at(i).name_replacement) return false;
|
||||
}
|
||||
if (is_done()) {
|
||||
if (!production->empty()) {
|
||||
if (production->back().precedence < other.production->back().precedence) return true;
|
||||
|
|
@ -106,11 +115,6 @@ Symbol ParseItem::next_symbol() const {
|
|||
return production->at(step_index).symbol;
|
||||
}
|
||||
|
||||
ParseItemSet::ParseItemSet() {}
|
||||
|
||||
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
|
||||
: entries(entries) {}
|
||||
|
||||
bool ParseItemSet::operator==(const ParseItemSet &other) const {
|
||||
return entries == other.entries;
|
||||
}
|
||||
|
|
@ -153,6 +157,9 @@ struct hash<ParseItem> {
|
|||
hash_combine(&result, item.step_index);
|
||||
hash_combine(&result, item.production->dynamic_precedence);
|
||||
hash_combine(&result, item.production->size());
|
||||
for (size_t i = 0; i < item.step_index; i++) {
|
||||
hash_combine(&result, item.production->at(i).name_replacement);
|
||||
}
|
||||
if (item.is_done()) {
|
||||
if (!item.production->empty()) {
|
||||
hash_combine(&result, item.production->back().precedence);
|
||||
|
|
|
|||
|
|
@ -36,9 +36,6 @@ struct ParseItem {
|
|||
};
|
||||
|
||||
struct ParseItemSet {
|
||||
ParseItemSet();
|
||||
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
|
||||
|
||||
bool operator==(const ParseItemSet &) const;
|
||||
void add(const ParseItemSet &);
|
||||
size_t unfinished_item_signature() const;
|
||||
|
|
|
|||
|
|
@ -23,19 +23,22 @@ using rules::Symbol;
|
|||
|
||||
static vector<Production> inline_production(const ParseItem &item, const SyntaxGrammar &grammar) {
|
||||
vector<Production> result;
|
||||
for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) {
|
||||
auto &inlined_step = item.production->at(item.step_index);
|
||||
auto &productions_to_insert = grammar.variables[inlined_step.symbol.index].productions;
|
||||
for (const Production &production_to_insert : productions_to_insert) {
|
||||
auto begin = item.production->steps.begin();
|
||||
auto end = item.production->steps.end();
|
||||
auto step = begin + item.step_index;
|
||||
|
||||
Production production{{begin, step}, item.production->dynamic_precedence};
|
||||
production.steps.insert(
|
||||
production.steps.end(),
|
||||
production_to_insert.steps.begin(),
|
||||
production_to_insert.steps.end()
|
||||
);
|
||||
production.back().precedence = item.precedence();
|
||||
production.back().associativity = item.associativity();
|
||||
for (auto &step : production_to_insert) {
|
||||
production.steps.push_back(step);
|
||||
if (!inlined_step.name_replacement.empty()) {
|
||||
production.steps.back().name_replacement = inlined_step.name_replacement;
|
||||
}
|
||||
}
|
||||
production.back().precedence = inlined_step.precedence;
|
||||
production.back().associativity = inlined_step.associativity;
|
||||
production.steps.insert(
|
||||
production.steps.end(),
|
||||
step + 1,
|
||||
|
|
|
|||
|
|
@ -76,6 +76,7 @@ class CCodeGenerator {
|
|||
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
|
||||
vector<set<Symbol::Index>> external_scanner_states;
|
||||
size_t next_parse_action_list_index;
|
||||
set<string> unique_replacement_names;
|
||||
|
||||
public:
|
||||
CCodeGenerator(string name, const ParseTable &parse_table,
|
||||
|
|
@ -98,6 +99,7 @@ class CCodeGenerator {
|
|||
add_symbol_enum();
|
||||
add_symbol_names_list();
|
||||
add_symbol_metadata_list();
|
||||
add_rename_sequences();
|
||||
add_lex_function();
|
||||
add_lex_modes_list();
|
||||
|
||||
|
|
@ -139,11 +141,22 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
unsigned max_rename_sequence_length = 0;
|
||||
for (const RenameSequence &rename_sequence : parse_table.rename_sequences) {
|
||||
if (rename_sequence.size() > max_rename_sequence_length) {
|
||||
max_rename_sequence_length = rename_sequence.size();
|
||||
}
|
||||
for (const string &name_replacement : rename_sequence) {
|
||||
unique_replacement_names.insert(name_replacement);
|
||||
}
|
||||
}
|
||||
|
||||
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
|
||||
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
|
||||
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
|
||||
line("#define TOKEN_COUNT " + to_string(token_count));
|
||||
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
|
||||
line("#define MAX_RENAME_SEQUENCE_LENGTH " + to_string(max_rename_sequence_length));
|
||||
line();
|
||||
}
|
||||
|
||||
|
|
@ -158,6 +171,11 @@ class CCodeGenerator {
|
|||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
for (const string &replacement_name : unique_replacement_names) {
|
||||
line(rename_id(replacement_name) + " = " + to_string(i) + ",");
|
||||
i++;
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
|
|
@ -166,9 +184,45 @@ class CCodeGenerator {
|
|||
void add_symbol_names_list() {
|
||||
line("static const char *ts_symbol_names[] = {");
|
||||
indent([&]() {
|
||||
for (const auto &entry : parse_table.symbols)
|
||||
line("[" + symbol_id(entry.first) + "] = \"" +
|
||||
sanitize_name_for_string(symbol_name(entry.first)) + "\",");
|
||||
for (const auto &entry : parse_table.symbols) {
|
||||
line(
|
||||
"[" + symbol_id(entry.first) + "] = \"" +
|
||||
sanitize_name_for_string(symbol_name(entry.first)) + "\","
|
||||
);
|
||||
}
|
||||
|
||||
for (const string &replacement_name : unique_replacement_names) {
|
||||
line(
|
||||
"[" + rename_id(replacement_name) + "] = \"" +
|
||||
sanitize_name_for_string(replacement_name) + "\","
|
||||
);
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
}
|
||||
|
||||
void add_rename_sequences() {
|
||||
|
||||
line(
|
||||
"static TSSymbol ts_rename_sequences[" +
|
||||
to_string(parse_table.rename_sequences.size()) +
|
||||
"][MAX_RENAME_SEQUENCE_LENGTH] = {"
|
||||
);
|
||||
|
||||
indent([&]() {
|
||||
for (unsigned i = 1, n = parse_table.rename_sequences.size(); i < n; i++) {
|
||||
const RenameSequence &sequence = parse_table.rename_sequences[i];
|
||||
line("[" + to_string(i) + "] = {");
|
||||
indent([&]() {
|
||||
for (unsigned j = 0, n = sequence.size(); j < n; j++) {
|
||||
if (!sequence[j].empty()) {
|
||||
line("[" + to_string(j) + "] = " + rename_id(sequence[j]) + ",");
|
||||
}
|
||||
}
|
||||
});
|
||||
line("},");
|
||||
}
|
||||
});
|
||||
line("};");
|
||||
line();
|
||||
|
|
@ -332,7 +386,7 @@ class CCodeGenerator {
|
|||
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
|
||||
|
||||
size_t state_id = 0;
|
||||
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
|
||||
line("static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
|
||||
|
||||
indent([&]() {
|
||||
for (const auto &state : parse_table.states) {
|
||||
|
|
@ -543,17 +597,23 @@ class CCodeGenerator {
|
|||
}
|
||||
break;
|
||||
case ParseActionTypeReduce:
|
||||
if (action.fragile) {
|
||||
add("REDUCE_FRAGILE");
|
||||
} else {
|
||||
add("REDUCE");
|
||||
}
|
||||
|
||||
add("(");
|
||||
add("REDUCE(");
|
||||
add(symbol_id(action.symbol));
|
||||
add(", ");
|
||||
add(to_string(action.consumed_symbol_count));
|
||||
add(", " + to_string(action.dynamic_precedence));
|
||||
|
||||
if (action.fragile) {
|
||||
add(", .fragile = true");
|
||||
}
|
||||
|
||||
if (action.dynamic_precedence != 0) {
|
||||
add(", .dynamic_precedence = " + to_string(action.dynamic_precedence));
|
||||
}
|
||||
|
||||
if (action.rename_sequence_id != 0) {
|
||||
add(", .rename_sequence_id = " + to_string(action.rename_sequence_id));
|
||||
}
|
||||
|
||||
add(")");
|
||||
break;
|
||||
case ParseActionTypeRecover:
|
||||
|
|
@ -605,6 +665,10 @@ class CCodeGenerator {
|
|||
}
|
||||
}
|
||||
|
||||
string rename_id(const string &name) {
|
||||
return "rename_sym_" + sanitize_name(name);
|
||||
}
|
||||
|
||||
string symbol_name(const Symbol &symbol) {
|
||||
if (symbol == rules::END_OF_INPUT())
|
||||
return "END";
|
||||
|
|
|
|||
|
|
@ -198,6 +198,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
|
|||
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
|
||||
}
|
||||
|
||||
if (type == "RENAME") {
|
||||
json_value name_json = rule_json->operator[]("value");
|
||||
if (name_json.type != json_string) {
|
||||
return "Rename value must be a string";
|
||||
}
|
||||
|
||||
json_value content_json = rule_json->operator[]("content");
|
||||
auto result = parse_rule(&content_json);
|
||||
if (!result.error_message.empty()) {
|
||||
return "Invalid rename content: " + result.error_message;
|
||||
}
|
||||
return Rule(Metadata::rename(string(name_json.u.string.ptr), result.rule));
|
||||
}
|
||||
|
||||
return "Unknown rule type: " + type;
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,21 +6,20 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
using std::string;
|
||||
using std::ostream;
|
||||
using std::to_string;
|
||||
using std::set;
|
||||
using std::vector;
|
||||
using std::function;
|
||||
using rules::Symbol;
|
||||
|
||||
ParseAction::ParseAction()
|
||||
: production(nullptr),
|
||||
consumed_symbol_count(0),
|
||||
symbol(rules::NONE()),
|
||||
type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
state_index(-1) {}
|
||||
: production(nullptr),
|
||||
consumed_symbol_count(0),
|
||||
symbol(rules::NONE()),
|
||||
type(ParseActionTypeError),
|
||||
extra(false),
|
||||
fragile(false),
|
||||
state_index(-1),
|
||||
rename_sequence_id(0) {}
|
||||
|
||||
ParseAction ParseAction::Error() {
|
||||
return ParseAction();
|
||||
|
|
@ -65,52 +64,49 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
|
|||
}
|
||||
|
||||
bool ParseAction::operator==(const ParseAction &other) const {
|
||||
return (type == other.type && extra == other.extra &&
|
||||
fragile == other.fragile && symbol == other.symbol &&
|
||||
state_index == other.state_index && production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count);
|
||||
return
|
||||
type == other.type &&
|
||||
extra == other.extra &&
|
||||
fragile == other.fragile &&
|
||||
symbol == other.symbol &&
|
||||
state_index == other.state_index &&
|
||||
production == other.production &&
|
||||
consumed_symbol_count == other.consumed_symbol_count &&
|
||||
rename_sequence_id == other.rename_sequence_id;
|
||||
}
|
||||
|
||||
bool ParseAction::operator<(const ParseAction &other) const {
|
||||
if (type < other.type)
|
||||
return true;
|
||||
if (other.type < type)
|
||||
return false;
|
||||
if (extra && !other.extra)
|
||||
return true;
|
||||
if (other.extra && !extra)
|
||||
return false;
|
||||
if (fragile && !other.fragile)
|
||||
return true;
|
||||
if (other.fragile && !fragile)
|
||||
return false;
|
||||
if (symbol < other.symbol)
|
||||
return true;
|
||||
if (other.symbol < symbol)
|
||||
return false;
|
||||
if (state_index < other.state_index)
|
||||
return true;
|
||||
if (other.state_index < state_index)
|
||||
return false;
|
||||
if (production < other.production)
|
||||
return true;
|
||||
if (other.production < production)
|
||||
return false;
|
||||
return consumed_symbol_count < other.consumed_symbol_count;
|
||||
if (type < other.type) return true;
|
||||
if (other.type < type) return false;
|
||||
if (extra && !other.extra) return true;
|
||||
if (other.extra && !extra) return false;
|
||||
if (fragile && !other.fragile) return true;
|
||||
if (other.fragile && !fragile) return false;
|
||||
if (symbol < other.symbol) return true;
|
||||
if (other.symbol < symbol) return false;
|
||||
if (state_index < other.state_index) return true;
|
||||
if (other.state_index < state_index) return false;
|
||||
if (production < other.production) return true;
|
||||
if (other.production < production) return false;
|
||||
if (consumed_symbol_count < other.consumed_symbol_count) return true;
|
||||
if (other.consumed_symbol_count < consumed_symbol_count) return false;
|
||||
return rename_sequence_id < other.rename_sequence_id;
|
||||
}
|
||||
|
||||
ParseTableEntry::ParseTableEntry()
|
||||
: reusable(true), depends_on_lookahead(false) {}
|
||||
: reusable(true), depends_on_lookahead(false) {}
|
||||
|
||||
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions,
|
||||
bool reusable, bool depends_on_lookahead)
|
||||
: actions(actions),
|
||||
reusable(reusable),
|
||||
depends_on_lookahead(depends_on_lookahead) {}
|
||||
: actions(actions),
|
||||
reusable(reusable),
|
||||
depends_on_lookahead(depends_on_lookahead) {}
|
||||
|
||||
bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
|
||||
return actions == other.actions && reusable == other.reusable &&
|
||||
depends_on_lookahead == other.depends_on_lookahead;
|
||||
return
|
||||
actions == other.actions &&
|
||||
reusable == other.reusable &&
|
||||
depends_on_lookahead == other.depends_on_lookahead;
|
||||
}
|
||||
|
||||
ParseState::ParseState() : lex_state_id(-1) {}
|
||||
|
|
|
|||
|
|
@ -41,6 +41,7 @@ struct ParseAction {
|
|||
bool extra;
|
||||
bool fragile;
|
||||
ParseStateId state_index;
|
||||
unsigned rename_sequence_id;
|
||||
};
|
||||
|
||||
struct ParseTableEntry {
|
||||
|
|
@ -73,12 +74,15 @@ struct ParseTableSymbolMetadata {
|
|||
bool structural;
|
||||
};
|
||||
|
||||
using RenameSequence = std::vector<std::string>;
|
||||
|
||||
struct ParseTable {
|
||||
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
|
||||
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
|
||||
|
||||
std::vector<ParseState> states;
|
||||
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
|
||||
std::vector<RenameSequence> rename_sequences;
|
||||
};
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
#include "compiler/prepare_grammar/flatten_grammar.h"
|
||||
#include <vector>
|
||||
#include <algorithm>
|
||||
#include <cassert>
|
||||
#include <cmath>
|
||||
#include <algorithm>
|
||||
#include <string>
|
||||
#include <vector>
|
||||
#include "compiler/prepare_grammar/extract_choices.h"
|
||||
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
|
||||
#include "compiler/grammar.h"
|
||||
|
|
@ -13,6 +14,7 @@ namespace prepare_grammar {
|
|||
|
||||
using std::find;
|
||||
using std::pair;
|
||||
using std::string;
|
||||
using std::vector;
|
||||
using rules::Rule;
|
||||
|
||||
|
|
@ -20,6 +22,7 @@ class FlattenRule {
|
|||
private:
|
||||
vector<int> precedence_stack;
|
||||
vector<rules::Associativity> associativity_stack;
|
||||
vector<string> name_replacement_stack;
|
||||
Production production;
|
||||
|
||||
void apply(const Rule &rule, bool at_end) {
|
||||
|
|
@ -28,7 +31,8 @@ class FlattenRule {
|
|||
production.steps.push_back(ProductionStep{
|
||||
symbol,
|
||||
precedence_stack.back(),
|
||||
associativity_stack.back()
|
||||
associativity_stack.back(),
|
||||
name_replacement_stack.back()
|
||||
});
|
||||
},
|
||||
|
||||
|
|
@ -41,6 +45,10 @@ class FlattenRule {
|
|||
associativity_stack.push_back(metadata.params.associativity);
|
||||
}
|
||||
|
||||
if (!metadata.params.name_replacement.empty()) {
|
||||
name_replacement_stack.push_back(metadata.params.name_replacement);
|
||||
}
|
||||
|
||||
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
|
||||
production.dynamic_precedence = metadata.params.dynamic_precedence;
|
||||
}
|
||||
|
|
@ -56,6 +64,10 @@ class FlattenRule {
|
|||
associativity_stack.pop_back();
|
||||
if (!at_end) production.back().associativity = associativity_stack.back();
|
||||
}
|
||||
|
||||
if (!metadata.params.name_replacement.empty()) {
|
||||
name_replacement_stack.pop_back();
|
||||
}
|
||||
},
|
||||
|
||||
[&](const rules::Seq &sequence) {
|
||||
|
|
@ -72,7 +84,10 @@ class FlattenRule {
|
|||
}
|
||||
|
||||
public:
|
||||
FlattenRule() : precedence_stack({ 0 }), associativity_stack({ rules::AssociativityNone }) {}
|
||||
FlattenRule() :
|
||||
precedence_stack({0}),
|
||||
associativity_stack({rules::AssociativityNone}),
|
||||
name_replacement_stack({""}) {}
|
||||
|
||||
Production flatten(const Rule &rule) {
|
||||
apply(rule, true);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,14 @@
|
|||
#include "compiler/rules/metadata.h"
|
||||
#include <climits>
|
||||
#include <string>
|
||||
#include "compiler/rule.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
||||
using std::move;
|
||||
using std::string;
|
||||
|
||||
Metadata::Metadata(const Rule &rule, MetadataParams params) :
|
||||
rule(std::make_shared<Rule>(rule)), params(params) {}
|
||||
|
||||
|
|
@ -73,5 +77,11 @@ Metadata Metadata::main_token(const Rule &rule) {
|
|||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
Metadata Metadata::rename(string &&name, const Rule &rule) {
|
||||
MetadataParams params;
|
||||
params.name_replacement = move(name);
|
||||
return Metadata{rule, params};
|
||||
}
|
||||
|
||||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
#ifndef COMPILER_RULES_METADATA_H_
|
||||
#define COMPILER_RULES_METADATA_H_
|
||||
|
||||
#include <string>
|
||||
#include <memory>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
|
@ -22,6 +23,7 @@ struct MetadataParams {
|
|||
bool is_string;
|
||||
bool is_active;
|
||||
bool is_main_token;
|
||||
std::string name_replacement;
|
||||
|
||||
inline MetadataParams() :
|
||||
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
|
||||
|
|
@ -38,7 +40,8 @@ struct MetadataParams {
|
|||
is_token == other.is_token &&
|
||||
is_string == other.is_string &&
|
||||
is_active == other.is_active &&
|
||||
is_main_token == other.is_main_token
|
||||
is_main_token == other.is_main_token &&
|
||||
name_replacement == other.name_replacement
|
||||
);
|
||||
}
|
||||
};
|
||||
|
|
@ -59,6 +62,7 @@ struct Metadata {
|
|||
static Metadata prec_dynamic(int precedence, const Rule &rule);
|
||||
static Metadata separator(const Rule &rule);
|
||||
static Metadata main_token(const Rule &rule);
|
||||
static Metadata rename(std::string &&name, const Rule &rule);
|
||||
|
||||
bool operator==(const Metadata &other) const;
|
||||
};
|
||||
|
|
|
|||
36
src/compiler/syntax_grammar.cc
Normal file
36
src/compiler/syntax_grammar.cc
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
#include "compiler/syntax_grammar.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
bool ProductionStep::operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol &&
|
||||
precedence == other.precedence &&
|
||||
associativity == other.associativity &&
|
||||
name_replacement == other.name_replacement;
|
||||
}
|
||||
|
||||
bool ProductionStep::operator!=(const ProductionStep &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
bool ProductionStep::operator<(const ProductionStep &other) const {
|
||||
if (symbol < other.symbol) return true;
|
||||
if (other.symbol < symbol) return false;
|
||||
if (precedence < other.precedence) return true;
|
||||
if (other.precedence < precedence) return false;
|
||||
if (associativity < other.associativity) return true;
|
||||
if (other.associativity < associativity) return false;
|
||||
return name_replacement < other.name_replacement;
|
||||
}
|
||||
|
||||
bool Production::operator==(const Production &other) const {
|
||||
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
|
||||
}
|
||||
|
||||
bool ExternalToken::operator==(const ExternalToken &other) const {
|
||||
return name == other.name &&
|
||||
type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
@ -10,43 +10,29 @@
|
|||
namespace tree_sitter {
|
||||
|
||||
struct ProductionStep {
|
||||
inline bool operator==(const ProductionStep &other) const {
|
||||
return symbol == other.symbol &&
|
||||
precedence == other.precedence &&
|
||||
associativity == other.associativity;
|
||||
}
|
||||
|
||||
inline bool operator!=(const ProductionStep &other) const {
|
||||
return !operator==(other);
|
||||
}
|
||||
|
||||
inline bool operator<(const ProductionStep &other) const {
|
||||
if (symbol < other.symbol) return true;
|
||||
if (other.symbol < symbol) return false;
|
||||
if (precedence < other.precedence) return true;
|
||||
if (other.precedence < precedence) return false;
|
||||
return associativity < other.associativity;
|
||||
}
|
||||
|
||||
rules::Symbol symbol;
|
||||
int precedence;
|
||||
rules::Associativity associativity;
|
||||
std::string name_replacement;
|
||||
|
||||
bool operator==(const ProductionStep &) const;
|
||||
bool operator!=(const ProductionStep &) const;
|
||||
bool operator<(const ProductionStep &) const;
|
||||
};
|
||||
|
||||
struct Production {
|
||||
std::vector<ProductionStep> steps;
|
||||
int dynamic_precedence = 0;
|
||||
|
||||
inline bool operator==(const Production &other) const {
|
||||
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
|
||||
}
|
||||
|
||||
bool operator==(const Production &) const;
|
||||
inline ProductionStep &back() { return steps.back(); }
|
||||
inline const ProductionStep &back() const { return steps.back(); }
|
||||
inline bool empty() const { return steps.empty(); }
|
||||
inline size_t size() const { return steps.size(); }
|
||||
inline const ProductionStep &operator[](int i) const { return steps[i]; }
|
||||
inline const ProductionStep &at(int i) const { return steps[i]; }
|
||||
inline std::vector<ProductionStep>::const_iterator begin() const { return steps.begin(); }
|
||||
inline std::vector<ProductionStep>::const_iterator end() const { return steps.end(); }
|
||||
};
|
||||
|
||||
struct SyntaxVariable {
|
||||
|
|
@ -55,24 +41,18 @@ struct SyntaxVariable {
|
|||
std::vector<Production> productions;
|
||||
};
|
||||
|
||||
using ConflictSet = std::set<rules::Symbol>;
|
||||
|
||||
struct ExternalToken {
|
||||
std::string name;
|
||||
VariableType type;
|
||||
rules::Symbol corresponding_internal_token;
|
||||
|
||||
inline bool operator==(const ExternalToken &other) const {
|
||||
return name == other.name &&
|
||||
type == other.type &&
|
||||
corresponding_internal_token == other.corresponding_internal_token;
|
||||
}
|
||||
bool operator==(const ExternalToken &) const;
|
||||
};
|
||||
|
||||
struct SyntaxGrammar {
|
||||
std::vector<SyntaxVariable> variables;
|
||||
std::set<rules::Symbol> extra_tokens;
|
||||
std::set<ConflictSet> expected_conflicts;
|
||||
std::set<std::set<rules::Symbol>> expected_conflicts;
|
||||
std::vector<ExternalToken> external_tokens;
|
||||
std::set<rules::Symbol> variables_to_inline;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -3,7 +3,8 @@
|
|||
#include "runtime/error_costs.h"
|
||||
|
||||
static const TSParseAction SHIFT_ERROR = {
|
||||
.type = TSParseActionTypeShift, .params = {.to_state = ERROR_STATE}
|
||||
.type = TSParseActionTypeShift,
|
||||
.to_state = ERROR_STATE,
|
||||
};
|
||||
|
||||
void ts_language_table_entry(const TSLanguage *self, TSStateId state,
|
||||
|
|
|
|||
|
|
@ -44,7 +44,7 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
|
|||
if (count > 0) {
|
||||
TSParseAction action = actions[count - 1];
|
||||
if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
|
||||
return action.params.to_state;
|
||||
return action.to_state;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
|
|
|
|||
|
|
@ -288,7 +288,8 @@ void ts_symbol_iterator_next(TSSymbolIterator *self) {
|
|||
}
|
||||
|
||||
const char *ts_node_type(TSNode self, const TSDocument *document) {
|
||||
TSSymbol symbol = ts_node__tree(self)->symbol;
|
||||
const Tree *tree = ts_node__tree(self);
|
||||
TSSymbol symbol = tree->context.rename_symbol ? tree->context.rename_symbol : tree->symbol;
|
||||
return ts_language_symbol_name(document->parser.language, symbol);
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -558,8 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree,
|
|||
|
||||
static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
||||
TSSymbol symbol, unsigned count,
|
||||
bool fragile, int dynamic_precedence,
|
||||
bool allow_skipping) {
|
||||
int dynamic_precedence, unsigned short rename_sequence_id,
|
||||
bool fragile, bool allow_skipping) {
|
||||
uint32_t initial_version_count = ts_stack_version_count(self->stack);
|
||||
|
||||
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
|
||||
|
|
@ -603,6 +603,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
parent->dynamic_precedence += dynamic_precedence;
|
||||
parent->rename_sequence_id = rename_sequence_id;
|
||||
|
||||
TSStateId state = ts_stack_top_state(self->stack, slice.version);
|
||||
TSStateId next_state = ts_language_next_state(language, state, symbol);
|
||||
|
|
@ -699,12 +700,12 @@ static const TSParseAction *parser__reductions_after_sequence(
|
|||
(*count)--;
|
||||
}
|
||||
|
||||
while (*count > 0 && actions[0].params.child_count < child_count) {
|
||||
while (*count > 0 && actions[0].child_count < child_count) {
|
||||
actions++;
|
||||
(*count)--;
|
||||
}
|
||||
|
||||
while (*count > 0 && actions[*count - 1].params.child_count > child_count) {
|
||||
while (*count > 0 && actions[*count - 1].child_count > child_count) {
|
||||
(*count)--;
|
||||
}
|
||||
|
||||
|
|
@ -756,7 +757,7 @@ static StackIterateAction parser__repair_error_callback(void *payload, TSStateId
|
|||
}
|
||||
|
||||
for (uint32_t j = 0; j < repair_reduction_count; j++) {
|
||||
if (repair_reductions[j].params.symbol == repair->symbol) {
|
||||
if (repair_reductions[j].symbol == repair->symbol) {
|
||||
result |= StackIteratePop;
|
||||
session->found_repair = true;
|
||||
session->best_repair = *repair;
|
||||
|
|
@ -788,8 +789,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
|
|||
array_clear(&self->reduce_actions);
|
||||
for (uint32_t i = 0; i < entry.action_count; i++) {
|
||||
if (entry.actions[i].type == TSParseActionTypeReduce) {
|
||||
TSSymbol symbol = entry.actions[i].params.symbol;
|
||||
uint32_t child_count = entry.actions[i].params.child_count;
|
||||
TSSymbol symbol = entry.actions[i].symbol;
|
||||
uint32_t child_count = entry.actions[i].child_count;
|
||||
if ((child_count > session.tree_count_above_error) ||
|
||||
(child_count == session.tree_count_above_error &&
|
||||
!ts_language_symbol_metadata(self->language, symbol).visible))
|
||||
|
|
@ -942,11 +943,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
|
|||
has_shift_action = true;
|
||||
break;
|
||||
case TSParseActionTypeReduce:
|
||||
if (action.params.child_count > 0)
|
||||
if (action.child_count > 0)
|
||||
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
|
||||
.symbol = action.params.symbol,
|
||||
.count = action.params.child_count,
|
||||
.dynamic_precedence = action.params.dynamic_precedence
|
||||
.symbol = action.symbol,
|
||||
.count = action.child_count,
|
||||
.dynamic_precedence = action.dynamic_precedence,
|
||||
.rename_sequence_id = action.rename_sequence_id,
|
||||
});
|
||||
default:
|
||||
break;
|
||||
|
|
@ -958,8 +960,9 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
|
|||
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
|
||||
ReduceAction action = self->reduce_actions.contents[i];
|
||||
StackPopResult reduction = parser__reduce(
|
||||
self, version, action.symbol, action.count, true,
|
||||
action.dynamic_precedence, false
|
||||
self, version, action.symbol, action.count,
|
||||
action.dynamic_precedence, action.rename_sequence_id,
|
||||
true, false
|
||||
);
|
||||
if (reduction.stopped_at_error) {
|
||||
ts_tree_array_delete(&reduction.slices.contents[0].trees);
|
||||
|
|
@ -1168,7 +1171,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
next_state = state;
|
||||
LOG("shift_extra");
|
||||
} else {
|
||||
next_state = action.params.to_state;
|
||||
next_state = action.to_state;
|
||||
LOG("shift state:%u", next_state);
|
||||
}
|
||||
|
||||
|
|
@ -1195,18 +1198,14 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
}
|
||||
|
||||
case TSParseActionTypeReduce: {
|
||||
if (reduction_stopped_at_error)
|
||||
continue;
|
||||
if (reduction_stopped_at_error) continue;
|
||||
|
||||
unsigned child_count = action.params.child_count;
|
||||
TSSymbol symbol = action.params.symbol;
|
||||
unsigned dynamic_precedence = action.params.dynamic_precedence;
|
||||
bool fragile = action.fragile;
|
||||
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
|
||||
|
||||
StackPopResult reduction =
|
||||
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
|
||||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), action.child_count);
|
||||
StackPopResult reduction = parser__reduce(
|
||||
self, version, action.symbol, action.child_count,
|
||||
action.dynamic_precedence, action.rename_sequence_id,
|
||||
action.fragile, true
|
||||
);
|
||||
StackSlice slice = *array_front(&reduction.slices);
|
||||
if (reduction.stopped_at_error) {
|
||||
reduction_stopped_at_error = true;
|
||||
|
|
@ -1237,7 +1236,7 @@ static void parser__advance(Parser *self, StackVersion version,
|
|||
ts_tree_retain(lookahead);
|
||||
}
|
||||
|
||||
parser__recover(self, version, action.params.to_state, lookahead);
|
||||
parser__recover(self, version, action.to_state, lookahead);
|
||||
if (lookahead == reusable_node->tree) {
|
||||
reusable_node_pop(reusable_node);
|
||||
}
|
||||
|
|
@ -1355,6 +1354,6 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
|
|||
LOG_TREE();
|
||||
ts_stack_clear(self->stack);
|
||||
parser__clear_cached_token(self);
|
||||
ts_tree_assign_parents(self->finished_tree, &self->tree_path1);
|
||||
ts_tree_assign_parents(self->finished_tree, &self->tree_path1, self->language);
|
||||
return self->finished_tree;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ typedef struct {
|
|||
uint32_t count;
|
||||
TSSymbol symbol;
|
||||
int dynamic_precedence;
|
||||
unsigned short rename_sequence_id;
|
||||
} ReduceAction;
|
||||
|
||||
typedef Array(ReduceAction) ReduceActionSet;
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
|
|||
.visible_child_count = 0,
|
||||
.named_child_count = 0,
|
||||
.children = NULL,
|
||||
.rename_sequence_id = 0,
|
||||
.padding = padding,
|
||||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
|
|
@ -120,18 +121,23 @@ Tree *ts_tree_make_copy(Tree *self) {
|
|||
return result;
|
||||
}
|
||||
|
||||
void ts_tree_assign_parents(Tree *self, TreePath *path) {
|
||||
void ts_tree_assign_parents(Tree *self, TreePath *path, const TSLanguage *language) {
|
||||
array_clear(path);
|
||||
array_push(path, ((TreePathEntry){self, length_zero(), 0}));
|
||||
while (path->size > 0) {
|
||||
Tree *tree = array_pop(path).tree;
|
||||
Length offset = length_zero();
|
||||
const TSSymbol *rename_symbols = language->rename_sequences +
|
||||
tree->rename_sequence_id * language->max_rename_sequence_length;
|
||||
for (uint32_t i = 0; i < tree->child_count; i++) {
|
||||
Tree *child = tree->children[i];
|
||||
if (child->context.parent != tree || child->context.index != i) {
|
||||
child->context.parent = tree;
|
||||
child->context.index = i;
|
||||
child->context.offset = offset;
|
||||
if (tree->rename_sequence_id && rename_symbols[i] != 0) {
|
||||
child->context.rename_symbol = rename_symbols[i];
|
||||
}
|
||||
array_push(path, ((TreePathEntry){child, length_zero(), 0}));
|
||||
}
|
||||
offset = length_add(offset, ts_tree_total_size(child));
|
||||
|
|
@ -472,36 +478,32 @@ static size_t ts_tree__write_to_string(const Tree *self,
|
|||
const TSLanguage *language, char *string,
|
||||
size_t limit, bool is_root,
|
||||
bool include_all) {
|
||||
if (!self)
|
||||
return snprintf(string, limit, "(NULL)");
|
||||
if (!self) return snprintf(string, limit, "(NULL)");
|
||||
|
||||
char *cursor = string;
|
||||
char **writer = (limit > 0) ? &cursor : &string;
|
||||
bool visible = include_all || is_root || (self->visible && self->named);
|
||||
|
||||
if (visible && !is_root)
|
||||
if (visible && !is_root) {
|
||||
cursor += snprintf(*writer, limit, " ");
|
||||
}
|
||||
|
||||
if (visible) {
|
||||
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 &&
|
||||
self->size.chars > 0) {
|
||||
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) {
|
||||
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
|
||||
cursor +=
|
||||
ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
|
||||
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
|
||||
} else {
|
||||
cursor += snprintf(*writer, limit, "(%s",
|
||||
ts_language_symbol_name(language, self->symbol));
|
||||
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
|
||||
cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol));
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < self->child_count; i++) {
|
||||
Tree *child = self->children[i];
|
||||
cursor += ts_tree__write_to_string(child, language, *writer, limit, false,
|
||||
include_all);
|
||||
cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all);
|
||||
}
|
||||
|
||||
if (visible)
|
||||
cursor += snprintf(*writer, limit, ")");
|
||||
if (visible) cursor += snprintf(*writer, limit, ")");
|
||||
|
||||
return cursor - string;
|
||||
}
|
||||
|
|
@ -518,8 +520,8 @@ char *ts_tree_string(const Tree *self, const TSLanguage *language,
|
|||
|
||||
void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset,
|
||||
const TSLanguage *language, FILE *f) {
|
||||
fprintf(f, "tree_%p [label=\"%s\"", self,
|
||||
ts_language_symbol_name(language, self->symbol));
|
||||
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
|
||||
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
|
||||
|
||||
if (self->child_count == 0)
|
||||
fprintf(f, ", shape=plaintext");
|
||||
|
|
|
|||
|
|
@ -19,6 +19,7 @@ typedef struct Tree {
|
|||
struct Tree *parent;
|
||||
uint32_t index;
|
||||
Length offset;
|
||||
TSSymbol rename_symbol;
|
||||
} context;
|
||||
|
||||
uint32_t child_count;
|
||||
|
|
@ -26,6 +27,7 @@ typedef struct Tree {
|
|||
struct {
|
||||
uint32_t visible_child_count;
|
||||
uint32_t named_child_count;
|
||||
unsigned short rename_sequence_id;
|
||||
struct Tree **children;
|
||||
};
|
||||
TSExternalTokenState external_token_state;
|
||||
|
|
@ -85,7 +87,7 @@ int ts_tree_compare(const Tree *tree1, const Tree *tree2);
|
|||
uint32_t ts_tree_start_column(const Tree *self);
|
||||
uint32_t ts_tree_end_column(const Tree *self);
|
||||
void ts_tree_set_children(Tree *, uint32_t, Tree **);
|
||||
void ts_tree_assign_parents(Tree *, TreePath *);
|
||||
void ts_tree_assign_parents(Tree *, TreePath *, const TSLanguage *);
|
||||
void ts_tree_edit(Tree *, const TSInputEdit *edit);
|
||||
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
|
||||
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);
|
||||
|
|
|
|||
|
|
@ -12,6 +12,7 @@ START_TEST
|
|||
|
||||
describe("ParseItemSetBuilder", []() {
|
||||
vector<LexicalVariable> lexical_variables;
|
||||
|
||||
for (size_t i = 0; i < 20; i++) {
|
||||
lexical_variables.push_back({
|
||||
"token_" + to_string(i),
|
||||
|
|
@ -27,23 +28,23 @@ describe("ParseItemSetBuilder", []() {
|
|||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
|
||||
{Symbol::terminal(11), 0, AssociativityNone, ""},
|
||||
}, 0},
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production{{
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
{Symbol::terminal(12), 0, AssociativityNone, ""},
|
||||
{Symbol::terminal(13), 0, AssociativityNone, ""},
|
||||
}, 0},
|
||||
Production{{
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(2), 0, AssociativityNone, ""},
|
||||
}, 0}
|
||||
}},
|
||||
SyntaxVariable{"rule2", VariableTypeNamed, {
|
||||
Production{{
|
||||
{Symbol::terminal(14), 0, AssociativityNone},
|
||||
{Symbol::terminal(15), 0, AssociativityNone},
|
||||
{Symbol::terminal(14), 0, AssociativityNone, ""},
|
||||
{Symbol::terminal(15), 0, AssociativityNone, ""},
|
||||
}, 0}
|
||||
}},
|
||||
}, {}, {}, {}, {}};
|
||||
|
|
@ -52,21 +53,21 @@ describe("ParseItemSetBuilder", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set({
|
||||
ParseItemSet item_set{{
|
||||
{
|
||||
ParseItem(rules::START(), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) }),
|
||||
}
|
||||
});
|
||||
}};
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
AssertThat(item_set, Equals(ParseItemSet{{
|
||||
{
|
||||
ParseItem(rules::START(), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) })
|
||||
},
|
||||
},
|
||||
{
|
||||
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
|
|
@ -79,21 +80,21 @@ describe("ParseItemSetBuilder", []() {
|
|||
ParseItem(Symbol::non_terminal(2), production(2, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
})));
|
||||
}}));
|
||||
});
|
||||
|
||||
it("handles rules with empty productions", [&]() {
|
||||
SyntaxGrammar grammar{{
|
||||
SyntaxVariable{"rule0", VariableTypeNamed, {
|
||||
Production{{
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone},
|
||||
{Symbol::terminal(11), 0, AssociativityNone},
|
||||
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
|
||||
{Symbol::terminal(11), 0, AssociativityNone, ""},
|
||||
}, 0},
|
||||
}},
|
||||
SyntaxVariable{"rule1", VariableTypeNamed, {
|
||||
Production{{
|
||||
{Symbol::terminal(12), 0, AssociativityNone},
|
||||
{Symbol::terminal(13), 0, AssociativityNone},
|
||||
{Symbol::terminal(12), 0, AssociativityNone, ""},
|
||||
{Symbol::terminal(13), 0, AssociativityNone, ""},
|
||||
}, 0},
|
||||
Production{{}, 0}
|
||||
}},
|
||||
|
|
@ -103,17 +104,17 @@ describe("ParseItemSetBuilder", []() {
|
|||
return grammar.variables[variable_index].productions[production_index];
|
||||
};
|
||||
|
||||
ParseItemSet item_set({
|
||||
ParseItemSet item_set{{
|
||||
{
|
||||
ParseItem(rules::START(), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) }),
|
||||
}
|
||||
});
|
||||
}};
|
||||
|
||||
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
|
||||
item_set_builder.apply_transitive_closure(&item_set);
|
||||
|
||||
AssertThat(item_set, Equals(ParseItemSet({
|
||||
AssertThat(item_set, Equals(ParseItemSet{{
|
||||
{
|
||||
ParseItem(rules::START(), production(0, 0), 0),
|
||||
LookaheadSet({ Symbol::terminal(10) })
|
||||
|
|
@ -126,7 +127,7 @@ describe("ParseItemSetBuilder", []() {
|
|||
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
|
||||
LookaheadSet({ Symbol::terminal(11) })
|
||||
},
|
||||
})));
|
||||
}}));
|
||||
});
|
||||
});
|
||||
|
||||
|
|
|
|||
18
test/fixtures/test_grammars/renamed_rules/corpus.txt
vendored
Normal file
18
test/fixtures/test_grammars/renamed_rules/corpus.txt
vendored
Normal file
|
|
@ -0,0 +1,18 @@
|
|||
======================================
|
||||
Method calls
|
||||
======================================
|
||||
|
||||
a.b(c(d.e));
|
||||
|
||||
---
|
||||
|
||||
(statement
|
||||
(call_expression
|
||||
(member_expression
|
||||
(variable_name)
|
||||
(property_name))
|
||||
(call_expression
|
||||
(variable_name)
|
||||
(member_expression
|
||||
(variable_name)
|
||||
(property_name)))))
|
||||
69
test/fixtures/test_grammars/renamed_rules/grammar.json
vendored
Normal file
69
test/fixtures/test_grammars/renamed_rules/grammar.json
vendored
Normal file
|
|
@ -0,0 +1,69 @@
|
|||
{
|
||||
"name": "renamed_rules",
|
||||
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s"}
|
||||
],
|
||||
|
||||
"rules": {
|
||||
"statement": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": ";"}
|
||||
]
|
||||
},
|
||||
|
||||
"_expression": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "call_expression"},
|
||||
{"type": "SYMBOL", "name": "member_expression"},
|
||||
{
|
||||
"type": "RENAME",
|
||||
"value": "variable_name",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
|
||||
"call_expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 0,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": "("},
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": ")"},
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"member_expression": {
|
||||
"type": "PREC_LEFT",
|
||||
"value": 1,
|
||||
"content": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "SYMBOL", "name": "_expression"},
|
||||
{"type": "STRING", "value": "."},
|
||||
{
|
||||
"type": "RENAME",
|
||||
"value": "property_name",
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "identifier"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
|
||||
"identifier": {"type": "PATTERN", "value": "\\a+"}
|
||||
}
|
||||
}
|
||||
Loading…
Add table
Add a link
Reference in a new issue