Merge pull request #90 from tree-sitter/rename-rules

Add an API for renaming nodes based on their context
This commit is contained in:
Max Brunsfeld 2017-07-14 10:51:22 -07:00 committed by GitHub
commit 9c9311ccd7
36 changed files with 805 additions and 368 deletions

View file

@ -38,6 +38,14 @@
}
},
"inline": {
"type": "array",
"items": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*$"
}
},
"conflicts": {
"type": "array",
"items": {
@ -135,6 +143,23 @@
"required": ["type", "members"]
},
"rename-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"pattern": "^RENAME$"
},
"value": {
"type": "string"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content", "value"]
},
"repeat-rule": {
"type": "object",
"properties": {
@ -202,6 +227,7 @@
{ "$ref": "#/definitions/symbol-rule" },
{ "$ref": "#/definitions/seq-rule" },
{ "$ref": "#/definitions/choice-rule" },
{ "$ref": "#/definitions/rename-rule" },
{ "$ref": "#/definitions/repeat1-rule" },
{ "$ref": "#/definitions/repeat-rule" },
{ "$ref": "#/definitions/token-rule" },

View file

@ -9,9 +9,8 @@ extern "C" {
#include <stdint.h>
#include <stdlib.h>
typedef unsigned short TSSymbol;
typedef unsigned short TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSStateId;
typedef uint8_t TSExternalTokenState[16];
#define ts_builtin_sym_error ((TSSymbol)-1)
@ -40,16 +39,19 @@ typedef enum {
typedef struct {
union {
TSStateId to_state;
struct {
short dynamic_precedence;
TSSymbol symbol;
unsigned short child_count;
TSStateId to_state;
bool extra : 1;
};
} params;
struct {
TSSymbol symbol;
uint16_t dynamic_precedence;
uint8_t child_count;
uint8_t rename_sequence_id : 7;
bool fragile : 1;
};
};
TSParseActionType type : 4;
bool extra : 1;
bool fragile : 1;
} TSParseAction;
typedef struct {
@ -60,7 +62,7 @@ typedef struct {
typedef union {
TSParseAction action;
struct {
unsigned short count;
uint8_t count;
bool reusable : 1;
bool depends_on_lookahead : 1;
};
@ -73,9 +75,11 @@ typedef struct TSLanguage {
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const unsigned short *parse_table;
const uint16_t *parse_table;
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
const TSSymbol *rename_sequences;
uint16_t max_rename_sequence_length;
bool (*lex_fn)(TSLexer *, TSStateId);
struct {
const bool *states;
@ -127,70 +131,62 @@ typedef struct TSLanguage {
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(to_state_value) \
{ \
{ \
.type = TSParseActionTypeShift, .params = {.to_state = to_state_value } \
} \
#define SHIFT(to_state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.to_state = to_state_value, \
} \
}
#define RECOVER(to_state_value) \
{ \
{ \
.type = TSParseActionTypeRecover, .params = {.to_state = to_state_value } \
} \
#define RECOVER(to_state_value) \
{ \
{ \
.type = TSParseActionTypeRecover, \
.to_state = to_state_value \
} \
}
#define SHIFT_EXTRA() \
{ \
{ .type = TSParseActionTypeShift, .extra = true } \
#define SHIFT_EXTRA() \
{ \
{ \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
#define REDUCE(symbol_val, child_count_val, ...) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.fragile = true, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define ACCEPT_INPUT() \
{ \
{ .type = TSParseActionTypeAccept } \
}
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.rename_sequences = (const TSSymbol *)ts_rename_sequences, \
.max_rename_sequence_length = MAX_RENAME_SEQUENCE_LENGTH, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#ifdef __cplusplus
}

View file

@ -38,6 +38,7 @@
'src/compiler/prepare_grammar/prepare_grammar.cc',
'src/compiler/prepare_grammar/token_description.cc',
'src/compiler/rule.cc',
'src/compiler/syntax_grammar.cc',
'src/compiler/rules/character_set.cc',
'src/compiler/rules/choice.cc',
'src/compiler/rules/metadata.cc',

View file

@ -63,21 +63,26 @@ class ParseTableBuilder {
processing_recovery_states(false) {}
pair<ParseTable, CompileError> build() {
// Ensure that the empty rename sequence has index 0.
parse_table.rename_sequences.push_back({});
// Ensure that the error state has index 0.
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
// Add the starting state.
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0};
ParseStateId error_state_id = add_parse_state({}, ParseItemSet());
add_parse_state({}, ParseItemSet({
Production start_production{{{start_symbol, 0, rules::AssociativityNone, ""}}, 0};
add_parse_state({}, ParseItemSet{{
{
ParseItem(rules::START(), start_production, 0),
LookaheadSet({END_OF_INPUT()}),
},
}));
}});
CompileError error = process_part_state_queue();
if (error.type != TSCompileErrorTypeNone) return {parse_table, error};
if (error) return {parse_table, error};
compute_unmergable_token_pairs();
@ -191,9 +196,14 @@ class ParseTableBuilder {
// If the item is finished, immediately add a Reduce or Accept action to
// the parse table for each of its lookahead terminals.
if (item.is_done()) {
ParseAction action = (item.lhs() == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
ParseAction action;
if (item.lhs() == rules::START()) {
action = ParseAction::Accept();
} else {
action = ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
action.rename_sequence_id = get_rename_sequence_id(*item.production);
}
int precedence = item.precedence();
lookahead_symbols.for_each([&](Symbol lookahead) {
@ -208,7 +218,7 @@ class ParseTableBuilder {
if (existing_action.type == ParseActionTypeAccept || processing_recovery_states) {
entry.actions.push_back(action);
} else {
int existing_precedence = existing_action.precedence();
int existing_precedence = existing_action.production->back().precedence;
if (precedence > existing_precedence) {
for (const ParseAction &old_action : entry.actions)
fragile_productions.insert(old_action.production);
@ -472,7 +482,7 @@ class ParseTableBuilder {
string handle_conflict(const ParseItemSet &item_set, const SymbolSequence &preceding_symbols,
ParseStateId state_id, Symbol lookahead) {
ParseTableEntry &entry = parse_table.states[state_id].terminal_entries[lookahead];
int reduction_precedence = entry.actions.front().precedence();
int reduction_precedence = entry.actions.front().production->back().precedence;
set<ParseItem> shift_items;
bool considered_associativity = false;
@ -524,7 +534,7 @@ class ParseTableBuilder {
bool has_right_associative_reductions = false;
for (const ParseAction &action : entry.actions) {
if (action.type != ParseActionTypeReduce) break;
switch (action.associativity()) {
switch (action.production->back().associativity) {
case rules::AssociativityLeft:
has_left_associative_reductions = true;
break;
@ -688,6 +698,27 @@ class ParseTableBuilder {
return fragile_productions.find(production) != fragile_productions.end();
}
unsigned get_rename_sequence_id(const Production &production) {
RenameSequence rename_sequence;
for (unsigned i = 0, n = production.size(); i < n; i++) {
auto &step = production.at(i);
if (!step.name_replacement.empty()) {
rename_sequence.resize(production.size());
rename_sequence[i] = step.name_replacement;
}
}
auto begin = parse_table.rename_sequences.begin();
auto end = parse_table.rename_sequences.end();
auto iter = find(begin, end, rename_sequence);
if (iter != end) {
return iter - begin;
} else {
parse_table.rename_sequences.push_back(move(rename_sequence));
return parse_table.rename_sequences.size() - 1;
}
}
SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) {
if (!sequence.empty()) {
const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back());

View file

@ -27,6 +27,11 @@ bool ParseItem::operator==(const ParseItem &other) const {
if (step_index != other.step_index) return false;
if (variable_index != other.variable_index) return false;
if (production->size() != other.production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement != other.production->at(i).name_replacement) {
return false;
}
}
if (is_done()) {
if (!production->empty()) {
if (production->back().precedence != other.production->back().precedence) return false;
@ -47,6 +52,10 @@ bool ParseItem::operator<(const ParseItem &other) const {
if (other.variable_index < variable_index) return false;
if (production->size() < other.production->size()) return true;
if (other.production->size() < production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement < other.production->at(i).name_replacement) return true;
if (other.production->at(i).name_replacement < production->at(i).name_replacement) return false;
}
if (is_done()) {
if (!production->empty()) {
if (production->back().precedence < other.production->back().precedence) return true;
@ -106,11 +115,6 @@ Symbol ParseItem::next_symbol() const {
return production->at(step_index).symbol;
}
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
: entries(entries) {}
bool ParseItemSet::operator==(const ParseItemSet &other) const {
return entries == other.entries;
}
@ -153,13 +157,16 @@ struct hash<ParseItem> {
hash_combine(&result, item.step_index);
hash_combine(&result, item.production->dynamic_precedence);
hash_combine(&result, item.production->size());
for (size_t i = 0; i < item.step_index; i++) {
hash_combine(&result, item.production->at(i).name_replacement);
}
if (item.is_done()) {
if (!item.production->empty()) {
hash_combine(&result, item.production->back().precedence);
hash_combine<unsigned>(&result, item.production->back().associativity);
}
} else {
for (size_t i = 0, n = item.production->size(); i < n; i++) {
for (size_t i = item.step_index, n = item.production->size(); i < n; i++) {
auto &step = item.production->at(i);
hash_combine(&result, step.symbol);
hash_combine(&result, step.precedence);

View file

@ -36,9 +36,6 @@ struct ParseItem {
};
struct ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
bool operator==(const ParseItemSet &) const;
void add(const ParseItemSet &);
size_t unfinished_item_signature() const;

View file

@ -23,19 +23,22 @@ using rules::Symbol;
static vector<Production> inline_production(const ParseItem &item, const SyntaxGrammar &grammar) {
vector<Production> result;
for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) {
auto &inlined_step = item.production->at(item.step_index);
auto &productions_to_insert = grammar.variables[inlined_step.symbol.index].productions;
for (const Production &production_to_insert : productions_to_insert) {
auto begin = item.production->steps.begin();
auto end = item.production->steps.end();
auto step = begin + item.step_index;
Production production{{begin, step}, item.production->dynamic_precedence};
production.steps.insert(
production.steps.end(),
production_to_insert.steps.begin(),
production_to_insert.steps.end()
);
production.back().precedence = item.precedence();
production.back().associativity = item.associativity();
for (auto &step : production_to_insert) {
production.steps.push_back(step);
if (!inlined_step.name_replacement.empty()) {
production.steps.back().name_replacement = inlined_step.name_replacement;
}
}
production.back().precedence = inlined_step.precedence;
production.back().associativity = inlined_step.associativity;
production.steps.insert(
production.steps.end(),
step + 1,

View file

@ -76,6 +76,7 @@ class CCodeGenerator {
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
set<string> unique_replacement_names;
public:
CCodeGenerator(string name, const ParseTable &parse_table,
@ -98,6 +99,7 @@ class CCodeGenerator {
add_symbol_enum();
add_symbol_names_list();
add_symbol_metadata_list();
add_rename_sequences();
add_lex_function();
add_lex_modes_list();
@ -139,11 +141,22 @@ class CCodeGenerator {
}
}
unsigned max_rename_sequence_length = 0;
for (const RenameSequence &rename_sequence : parse_table.rename_sequences) {
if (rename_sequence.size() > max_rename_sequence_length) {
max_rename_sequence_length = rename_sequence.size();
}
for (const string &name_replacement : rename_sequence) {
unique_replacement_names.insert(name_replacement);
}
}
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define TOKEN_COUNT " + to_string(token_count));
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
line("#define MAX_RENAME_SEQUENCE_LENGTH " + to_string(max_rename_sequence_length));
line();
}
@ -158,6 +171,11 @@ class CCodeGenerator {
i++;
}
}
for (const string &replacement_name : unique_replacement_names) {
line(rename_id(replacement_name) + " = " + to_string(i) + ",");
i++;
}
});
line("};");
line();
@ -166,9 +184,45 @@ class CCodeGenerator {
void add_symbol_names_list() {
line("static const char *ts_symbol_names[] = {");
indent([&]() {
for (const auto &entry : parse_table.symbols)
line("[" + symbol_id(entry.first) + "] = \"" +
sanitize_name_for_string(symbol_name(entry.first)) + "\",");
for (const auto &entry : parse_table.symbols) {
line(
"[" + symbol_id(entry.first) + "] = \"" +
sanitize_name_for_string(symbol_name(entry.first)) + "\","
);
}
for (const string &replacement_name : unique_replacement_names) {
line(
"[" + rename_id(replacement_name) + "] = \"" +
sanitize_name_for_string(replacement_name) + "\","
);
}
});
line("};");
line();
}
void add_rename_sequences() {
line(
"static TSSymbol ts_rename_sequences[" +
to_string(parse_table.rename_sequences.size()) +
"][MAX_RENAME_SEQUENCE_LENGTH] = {"
);
indent([&]() {
for (unsigned i = 1, n = parse_table.rename_sequences.size(); i < n; i++) {
const RenameSequence &sequence = parse_table.rename_sequences[i];
line("[" + to_string(i) + "] = {");
indent([&]() {
for (unsigned j = 0, n = sequence.size(); j < n; j++) {
if (!sequence[j].empty()) {
line("[" + to_string(j) + "] = " + rename_id(sequence[j]) + ",");
}
}
});
line("},");
}
});
line("};");
line();
@ -332,7 +386,7 @@ class CCodeGenerator {
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
size_t state_id = 0;
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
line("static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
indent([&]() {
for (const auto &state : parse_table.states) {
@ -543,17 +597,23 @@ class CCodeGenerator {
}
break;
case ParseActionTypeReduce:
if (action.fragile) {
add("REDUCE_FRAGILE");
} else {
add("REDUCE");
}
add("(");
add("REDUCE(");
add(symbol_id(action.symbol));
add(", ");
add(to_string(action.consumed_symbol_count));
add(", " + to_string(action.dynamic_precedence));
if (action.fragile) {
add(", .fragile = true");
}
if (action.dynamic_precedence != 0) {
add(", .dynamic_precedence = " + to_string(action.dynamic_precedence));
}
if (action.rename_sequence_id != 0) {
add(", .rename_sequence_id = " + to_string(action.rename_sequence_id));
}
add(")");
break;
case ParseActionTypeRecover:
@ -605,6 +665,10 @@ class CCodeGenerator {
}
}
string rename_id(const string &name) {
return "rename_sym_" + sanitize_name(name);
}
string symbol_name(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return "END";

View file

@ -198,6 +198,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
}
if (type == "RENAME") {
json_value name_json = rule_json->operator[]("value");
if (name_json.type != json_string) {
return "Rename value must be a string";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid rename content: " + result.error_message;
}
return Rule(Metadata::rename(string(name_json.u.string.ptr), result.rule));
}
return "Unknown rule type: " + type;
}

View file

@ -6,21 +6,20 @@
namespace tree_sitter {
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction()
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1) {}
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1),
rename_sequence_id(0) {}
ParseAction ParseAction::Error() {
return ParseAction();
@ -64,77 +63,50 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
return result;
}
int ParseAction::precedence() const {
if (consumed_symbol_count >= production->size()) {
if (production->empty()) {
return 0;
} else {
return production->back().precedence;
}
} else {
return production->at(consumed_symbol_count).precedence;
}
}
rules::Associativity ParseAction::associativity() const {
if (consumed_symbol_count >= production->size()) {
if (production->empty()) {
return rules::AssociativityNone;
} else {
return production->back().associativity;
}
} else {
return production->at(consumed_symbol_count).associativity;
}
}
bool ParseAction::operator==(const ParseAction &other) const {
return (type == other.type && extra == other.extra &&
fragile == other.fragile && symbol == other.symbol &&
state_index == other.state_index && production == other.production &&
consumed_symbol_count == other.consumed_symbol_count);
return
type == other.type &&
extra == other.extra &&
fragile == other.fragile &&
symbol == other.symbol &&
state_index == other.state_index &&
production == other.production &&
consumed_symbol_count == other.consumed_symbol_count &&
rename_sequence_id == other.rename_sequence_id;
}
bool ParseAction::operator<(const ParseAction &other) const {
if (type < other.type)
return true;
if (other.type < type)
return false;
if (extra && !other.extra)
return true;
if (other.extra && !extra)
return false;
if (fragile && !other.fragile)
return true;
if (other.fragile && !fragile)
return false;
if (symbol < other.symbol)
return true;
if (other.symbol < symbol)
return false;
if (state_index < other.state_index)
return true;
if (other.state_index < state_index)
return false;
if (production < other.production)
return true;
if (other.production < production)
return false;
return consumed_symbol_count < other.consumed_symbol_count;
if (type < other.type) return true;
if (other.type < type) return false;
if (extra && !other.extra) return true;
if (other.extra && !extra) return false;
if (fragile && !other.fragile) return true;
if (other.fragile && !fragile) return false;
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (state_index < other.state_index) return true;
if (other.state_index < state_index) return false;
if (production < other.production) return true;
if (other.production < production) return false;
if (consumed_symbol_count < other.consumed_symbol_count) return true;
if (other.consumed_symbol_count < consumed_symbol_count) return false;
return rename_sequence_id < other.rename_sequence_id;
}
ParseTableEntry::ParseTableEntry()
: reusable(true), depends_on_lookahead(false) {}
: reusable(true), depends_on_lookahead(false) {}
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions,
bool reusable, bool depends_on_lookahead)
: actions(actions),
reusable(reusable),
depends_on_lookahead(depends_on_lookahead) {}
: actions(actions),
reusable(reusable),
depends_on_lookahead(depends_on_lookahead) {}
bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
return actions == other.actions && reusable == other.reusable &&
depends_on_lookahead == other.depends_on_lookahead;
return
actions == other.actions &&
reusable == other.reusable &&
depends_on_lookahead == other.depends_on_lookahead;
}
ParseState::ParseState() : lex_state_id(-1) {}

View file

@ -28,13 +28,10 @@ struct ParseAction {
static ParseAction Error();
static ParseAction Shift(ParseStateId state_index);
static ParseAction Recover(ParseStateId state_index);
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count,
const Production &);
static ParseAction Reduce(rules::Symbol symbol, size_t child_count, const Production &);
static ParseAction ShiftExtra();
bool operator==(const ParseAction &) const;
bool operator<(const ParseAction &) const;
rules::Associativity associativity() const;
int precedence() const;
const Production *production;
size_t consumed_symbol_count;
@ -44,6 +41,7 @@ struct ParseAction {
bool extra;
bool fragile;
ParseStateId state_index;
unsigned rename_sequence_id;
};
struct ParseTableEntry {
@ -76,12 +74,15 @@ struct ParseTableSymbolMetadata {
bool structural;
};
using RenameSequence = std::vector<std::string>;
struct ParseTable {
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::vector<RenameSequence> rename_sequences;
};
} // namespace tree_sitter

View file

@ -1,8 +1,9 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include <vector>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <algorithm>
#include <string>
#include <vector>
#include "compiler/prepare_grammar/extract_choices.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/grammar.h"
@ -13,6 +14,7 @@ namespace prepare_grammar {
using std::find;
using std::pair;
using std::string;
using std::vector;
using rules::Rule;
@ -20,17 +22,17 @@ class FlattenRule {
private:
vector<int> precedence_stack;
vector<rules::Associativity> associativity_stack;
int last_precedence;
rules::Associativity last_associativity;
vector<string> name_replacement_stack;
Production production;
void apply(const Rule &rule) {
void apply(const Rule &rule, bool at_end) {
rule.match(
[&](const rules::Symbol &symbol) {
production.steps.push_back(ProductionStep{
symbol,
precedence_stack.back(),
associativity_stack.back()
associativity_stack.back(),
name_replacement_stack.back()
});
},
@ -43,30 +45,34 @@ class FlattenRule {
associativity_stack.push_back(metadata.params.associativity);
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.push_back(metadata.params.name_replacement);
}
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
production.dynamic_precedence = metadata.params.dynamic_precedence;
}
apply(*metadata.rule);
apply(*metadata.rule, at_end);
if (metadata.params.has_precedence) {
last_precedence = precedence_stack.back();
precedence_stack.pop_back();
production.back().precedence = precedence_stack.back();
if (!at_end) production.back().precedence = precedence_stack.back();
}
if (metadata.params.has_associativity) {
last_associativity = associativity_stack.back();
associativity_stack.pop_back();
production.back().associativity = associativity_stack.back();
if (!at_end) production.back().associativity = associativity_stack.back();
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.pop_back();
}
},
[&](const rules::Seq &sequence) {
apply(*sequence.left);
last_precedence = 0;
last_associativity = rules::AssociativityNone;
apply(*sequence.right);
apply(*sequence.left, false);
apply(*sequence.right, at_end);
},
[&](const rules::Blank &blank) {},
@ -78,18 +84,13 @@ class FlattenRule {
}
public:
FlattenRule()
: precedence_stack({ 0 }),
associativity_stack({ rules::AssociativityNone }),
last_precedence(0),
last_associativity(rules::AssociativityNone) {}
FlattenRule() :
precedence_stack({0}),
associativity_stack({rules::AssociativityNone}),
name_replacement_stack({""}) {}
Production flatten(const Rule &rule) {
apply(rule);
if (!production.empty()) {
production.back().precedence = last_precedence;
production.back().associativity = last_associativity;
}
apply(rule, true);
return production;
}
};

View file

@ -1,10 +1,14 @@
#include "compiler/rules/metadata.h"
#include <climits>
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::move;
using std::string;
Metadata::Metadata(const Rule &rule, MetadataParams params) :
rule(std::make_shared<Rule>(rule)), params(params) {}
@ -73,5 +77,11 @@ Metadata Metadata::main_token(const Rule &rule) {
return Metadata{rule, params};
}
Metadata Metadata::rename(string &&name, const Rule &rule) {
MetadataParams params;
params.name_replacement = move(name);
return Metadata{rule, params};
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,6 +1,7 @@
#ifndef COMPILER_RULES_METADATA_H_
#define COMPILER_RULES_METADATA_H_
#include <string>
#include <memory>
namespace tree_sitter {
@ -22,6 +23,7 @@ struct MetadataParams {
bool is_string;
bool is_active;
bool is_main_token;
std::string name_replacement;
inline MetadataParams() :
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
@ -38,7 +40,8 @@ struct MetadataParams {
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
is_main_token == other.is_main_token
is_main_token == other.is_main_token &&
name_replacement == other.name_replacement
);
}
};
@ -59,6 +62,7 @@ struct Metadata {
static Metadata prec_dynamic(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
static Metadata rename(std::string &&name, const Rule &rule);
bool operator==(const Metadata &other) const;
};

View file

@ -0,0 +1,36 @@
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity &&
name_replacement == other.name_replacement;
}
bool ProductionStep::operator!=(const ProductionStep &other) const {
return !operator==(other);
}
bool ProductionStep::operator<(const ProductionStep &other) const {
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (precedence < other.precedence) return true;
if (other.precedence < precedence) return false;
if (associativity < other.associativity) return true;
if (other.associativity < associativity) return false;
return name_replacement < other.name_replacement;
}
bool Production::operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
bool ExternalToken::operator==(const ExternalToken &other) const {
return name == other.name &&
type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
} // namespace tree_sitter

View file

@ -10,43 +10,29 @@
namespace tree_sitter {
struct ProductionStep {
inline bool operator==(const ProductionStep &other) const {
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity;
}
inline bool operator!=(const ProductionStep &other) const {
return !operator==(other);
}
inline bool operator<(const ProductionStep &other) const {
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (precedence < other.precedence) return true;
if (other.precedence < precedence) return false;
return associativity < other.associativity;
}
rules::Symbol symbol;
int precedence;
rules::Associativity associativity;
std::string name_replacement;
bool operator==(const ProductionStep &) const;
bool operator!=(const ProductionStep &) const;
bool operator<(const ProductionStep &) const;
};
struct Production {
std::vector<ProductionStep> steps;
int dynamic_precedence = 0;
inline bool operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
bool operator==(const Production &) const;
inline ProductionStep &back() { return steps.back(); }
inline const ProductionStep &back() const { return steps.back(); }
inline bool empty() const { return steps.empty(); }
inline size_t size() const { return steps.size(); }
inline const ProductionStep &operator[](int i) const { return steps[i]; }
inline const ProductionStep &at(int i) const { return steps[i]; }
inline std::vector<ProductionStep>::const_iterator begin() const { return steps.begin(); }
inline std::vector<ProductionStep>::const_iterator end() const { return steps.end(); }
};
struct SyntaxVariable {
@ -55,24 +41,18 @@ struct SyntaxVariable {
std::vector<Production> productions;
};
using ConflictSet = std::set<rules::Symbol>;
struct ExternalToken {
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
inline bool operator==(const ExternalToken &other) const {
return name == other.name &&
type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
bool operator==(const ExternalToken &) const;
};
struct SyntaxGrammar {
std::vector<SyntaxVariable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
std::set<rules::Symbol> variables_to_inline;
};

View file

@ -3,7 +3,8 @@
#include "runtime/error_costs.h"
static const TSParseAction SHIFT_ERROR = {
.type = TSParseActionTypeShift, .params = {.to_state = ERROR_STATE}
.type = TSParseActionTypeShift,
.to_state = ERROR_STATE,
};
void ts_language_table_entry(const TSLanguage *self, TSStateId state,

View file

@ -44,7 +44,7 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
if (count > 0) {
TSParseAction action = actions[count - 1];
if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
return action.params.to_state;
return action.to_state;
}
}
return 0;
@ -63,6 +63,13 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
}
}
static inline const TSSymbol *
ts_language_rename_sequence(const TSLanguage *self, unsigned id) {
return id > 0 ?
self->rename_sequences + id * self->max_rename_sequence_length :
NULL;
}
#ifdef __cplusplus
}
#endif

View file

@ -288,7 +288,8 @@ void ts_symbol_iterator_next(TSSymbolIterator *self) {
}
const char *ts_node_type(TSNode self, const TSDocument *document) {
TSSymbol symbol = ts_node__tree(self)->symbol;
const Tree *tree = ts_node__tree(self);
TSSymbol symbol = tree->context.rename_symbol ? tree->context.rename_symbol : tree->symbol;
return ts_language_symbol_name(document->parser.language, symbol);
}
@ -303,7 +304,8 @@ bool ts_node_eq(TSNode self, TSNode other) {
}
bool ts_node_is_named(TSNode self) {
return ts_node__tree(self)->named;
const Tree *tree = ts_node__tree(self);
return tree->named || tree->context.rename_symbol != 0;
}
bool ts_node_has_changes(TSNode self) {

View file

@ -352,7 +352,10 @@ static Tree *parser__lex(Parser *self, StackVersion version) {
if (found_external_token) {
result->has_external_tokens = true;
memset(result->external_token_state, 0, sizeof(TSExternalTokenState));
self->language->external_scanner.serialize(self->external_scanner_payload, result->external_token_state);
self->language->external_scanner.serialize(
self->external_scanner_payload,
result->external_token_state
);
ts_lexer_set_last_external_token(&self->lexer, result);
}
}
@ -537,11 +540,15 @@ static void parser__shift(Parser *self, StackVersion version, TSStateId state,
ts_tree_release(lookahead);
}
static bool parser__switch_children(Parser *self, Tree *tree,
Tree **children, uint32_t count) {
static bool parser__replace_children(Parser *self, Tree *tree, Tree **children, uint32_t count) {
self->scratch_tree.symbol = tree->symbol;
self->scratch_tree.child_count = 0;
ts_tree_set_children(&self->scratch_tree, count, children);
ts_tree_set_children(
&self->scratch_tree,
count,
children,
ts_language_rename_sequence(self->language, tree->rename_sequence_id)
);
if (parser__select_tree(self, tree, &self->scratch_tree)) {
tree->size = self->scratch_tree.size;
tree->padding = self->scratch_tree.padding;
@ -557,17 +564,16 @@ static bool parser__switch_children(Parser *self, Tree *tree,
}
static StackPopResult parser__reduce(Parser *self, StackVersion version,
TSSymbol symbol, unsigned count,
bool fragile, int dynamic_precedence,
bool allow_skipping) {
TSSymbol symbol, uint32_t count,
int dynamic_precedence, uint16_t rename_sequence_id,
bool fragile, bool allow_skipping) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
if (pop.stopped_at_error)
return pop;
if (pop.stopped_at_error) return pop;
const TSLanguage *language = self->language;
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
TSSymbolMetadata metadata = ts_language_symbol_metadata(self->language, symbol);
const TSSymbol *rename_sequence = ts_language_rename_sequence(self->language, rename_sequence_id);
for (uint32_t i = 0; i < pop.slices.size; i++) {
StackSlice slice = pop.slices.contents[i];
@ -576,10 +582,13 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
// node. They will be re-pushed onto the stack after the parent node is
// created and pushed.
uint32_t child_count = slice.trees.size;
while (child_count > 0 && slice.trees.contents[child_count - 1]->extra)
while (child_count > 0 && slice.trees.contents[child_count - 1]->extra) {
child_count--;
}
Tree *parent = ts_tree_make_node(symbol, child_count, slice.trees.contents, metadata);
Tree *parent = ts_tree_make_node(
symbol, child_count, slice.trees.contents, metadata, rename_sequence
);
// This pop operation may have caused multiple stack versions to collapse
// into one, because they all diverged from a common state. In that case,
@ -591,10 +600,11 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
i++;
uint32_t child_count = next_slice.trees.size;
while (child_count > 0 && next_slice.trees.contents[child_count - 1]->extra)
while (child_count > 0 && next_slice.trees.contents[child_count - 1]->extra) {
child_count--;
}
if (parser__switch_children(self, parent, next_slice.trees.contents, child_count)) {
if (parser__replace_children(self, parent, next_slice.trees.contents, child_count)) {
ts_tree_array_delete(&slice.trees);
slice = next_slice;
} else {
@ -603,9 +613,10 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
}
parent->dynamic_precedence += dynamic_precedence;
parent->rename_sequence_id = rename_sequence_id;
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(language, state, symbol);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
if (fragile || self->is_split || pop.slices.size > 1 || initial_version_count > 1) {
parent->fragile_left = true;
parent->fragile_right = true;
@ -699,12 +710,12 @@ static const TSParseAction *parser__reductions_after_sequence(
(*count)--;
}
while (*count > 0 && actions[0].params.child_count < child_count) {
while (*count > 0 && actions[0].child_count < child_count) {
actions++;
(*count)--;
}
while (*count > 0 && actions[*count - 1].params.child_count > child_count) {
while (*count > 0 && actions[*count - 1].child_count > child_count) {
(*count)--;
}
@ -756,7 +767,7 @@ static StackIterateAction parser__repair_error_callback(void *payload, TSStateId
}
for (uint32_t j = 0; j < repair_reduction_count; j++) {
if (repair_reductions[j].params.symbol == repair->symbol) {
if (repair_reductions[j].symbol == repair->symbol) {
result |= StackIteratePop;
session->found_repair = true;
session->best_repair = *repair;
@ -788,8 +799,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
array_clear(&self->reduce_actions);
for (uint32_t i = 0; i < entry.action_count; i++) {
if (entry.actions[i].type == TSParseActionTypeReduce) {
TSSymbol symbol = entry.actions[i].params.symbol;
uint32_t child_count = entry.actions[i].params.child_count;
TSSymbol symbol = entry.actions[i].symbol;
uint32_t child_count = entry.actions[i].child_count;
if ((child_count > session.tree_count_above_error) ||
(child_count == session.tree_count_above_error &&
!ts_language_symbol_metadata(self->language, symbol).visible))
@ -838,9 +849,11 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
array_push(&children, slice.trees.contents[i]);
array_delete(&slice.trees);
Tree *parent =
ts_tree_make_node(symbol, children.size, children.contents,
ts_language_symbol_metadata(self->language, symbol));
Tree *parent = ts_tree_make_node(
symbol, children.size, children.contents,
ts_language_symbol_metadata(self->language, symbol),
NULL
);
parser__push(self, slice.version, parent, next_state);
ts_stack_decrease_push_count(self->stack, slice.version, error->child_count);
@ -898,7 +911,11 @@ static void parser__accept(Parser *self, StackVersion version,
for (uint32_t k = 0; k < child->child_count; k++)
ts_tree_retain(child->children[k]);
array_splice(&trees, j, 1, child->child_count, child->children);
ts_tree_set_children(root, trees.size, trees.contents);
const TSSymbol *rename_sequence = ts_language_rename_sequence(
self->language,
root->rename_sequence_id
);
ts_tree_set_children(root, trees.size, trees.contents, rename_sequence);
ts_tree_release(child);
break;
}
@ -942,11 +959,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
has_shift_action = true;
break;
case TSParseActionTypeReduce:
if (action.params.child_count > 0)
if (action.child_count > 0)
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence
.symbol = action.symbol,
.count = action.child_count,
.dynamic_precedence = action.dynamic_precedence,
.rename_sequence_id = action.rename_sequence_id,
});
default:
break;
@ -958,8 +976,9 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
ReduceAction action = self->reduce_actions.contents[i];
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.count, true,
action.dynamic_precedence, false
self, version, action.symbol, action.count,
action.dynamic_precedence, action.rename_sequence_id,
true, false
);
if (reduction.stopped_at_error) {
ts_tree_array_delete(&reduction.slices.contents[0].trees);
@ -1168,7 +1187,7 @@ static void parser__advance(Parser *self, StackVersion version,
next_state = state;
LOG("shift_extra");
} else {
next_state = action.params.to_state;
next_state = action.to_state;
LOG("shift state:%u", next_state);
}
@ -1195,18 +1214,14 @@ static void parser__advance(Parser *self, StackVersion version,
}
case TSParseActionTypeReduce: {
if (reduction_stopped_at_error)
continue;
if (reduction_stopped_at_error) continue;
unsigned child_count = action.params.child_count;
TSSymbol symbol = action.params.symbol;
unsigned dynamic_precedence = action.params.dynamic_precedence;
bool fragile = action.fragile;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
StackPopResult reduction =
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), action.child_count);
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.child_count,
action.dynamic_precedence, action.rename_sequence_id,
action.fragile, true
);
StackSlice slice = *array_front(&reduction.slices);
if (reduction.stopped_at_error) {
reduction_stopped_at_error = true;
@ -1237,7 +1252,7 @@ static void parser__advance(Parser *self, StackVersion version,
ts_tree_retain(lookahead);
}
parser__recover(self, version, action.params.to_state, lookahead);
parser__recover(self, version, action.to_state, lookahead);
if (lookahead == reusable_node->tree) {
reusable_node_pop(reusable_node);
}
@ -1355,6 +1370,6 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
LOG_TREE();
ts_stack_clear(self->stack);
parser__clear_cached_token(self);
ts_tree_assign_parents(self->finished_tree, &self->tree_path1);
ts_tree_assign_parents(self->finished_tree, &self->tree_path1, self->language);
return self->finished_tree;
}

View file

@ -12,6 +12,7 @@ typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short rename_sequence_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -7,6 +7,7 @@
#include "runtime/alloc.h"
#include "runtime/tree.h"
#include "runtime/length.h"
#include "runtime/language.h"
#include "runtime/error_costs.h"
TSStateId TS_TREE_STATE_NONE = USHRT_MAX;
@ -22,6 +23,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
.visible_child_count = 0,
.named_child_count = 0,
.children = NULL,
.rename_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
@ -120,18 +122,22 @@ Tree *ts_tree_make_copy(Tree *self) {
return result;
}
void ts_tree_assign_parents(Tree *self, TreePath *path) {
void ts_tree_assign_parents(Tree *self, TreePath *path, const TSLanguage *language) {
array_clear(path);
array_push(path, ((TreePathEntry){self, length_zero(), 0}));
while (path->size > 0) {
Tree *tree = array_pop(path).tree;
Length offset = length_zero();
const TSSymbol *rename_sequence = ts_language_rename_sequence(language, tree->rename_sequence_id);
for (uint32_t i = 0; i < tree->child_count; i++) {
Tree *child = tree->children[i];
if (child->context.parent != tree || child->context.index != i) {
child->context.parent = tree;
child->context.index = i;
child->context.offset = offset;
if (rename_sequence && rename_sequence[i] != 0) {
child->context.rename_symbol = rename_sequence[i];
}
array_push(path, ((TreePathEntry){child, length_zero(), 0}));
}
offset = length_add(offset, ts_tree_total_size(child));
@ -140,7 +146,8 @@ void ts_tree_assign_parents(Tree *self, TreePath *path) {
}
void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children,
const TSSymbol *rename_sequence) {
if (self->child_count > 0)
ts_free(self->children);
@ -170,8 +177,9 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
if (child->visible) {
self->visible_child_count++;
if (child->named)
if (child->named || (rename_sequence && rename_sequence[i] != 0)) {
self->named_child_count++;
}
} else if (child->child_count > 0) {
self->visible_child_count += child->visible_child_count;
self->named_child_count += child->named_child_count;
@ -202,11 +210,11 @@ void ts_tree_set_children(Tree *self, uint32_t child_count, Tree **children) {
}
}
Tree *ts_tree_make_node(TSSymbol symbol, uint32_t child_count,
Tree **children, TSSymbolMetadata metadata) {
Tree *ts_tree_make_node(TSSymbol symbol, uint32_t child_count, Tree **children,
TSSymbolMetadata metadata, const TSSymbol *rename_sequence) {
Tree *result =
ts_tree_make_leaf(symbol, length_zero(), length_zero(), metadata);
ts_tree_set_children(result, child_count, children);
ts_tree_set_children(result, child_count, children, rename_sequence);
return result;
}
@ -224,7 +232,7 @@ Tree *ts_tree_make_error_node(TreeArray *children) {
Tree *result = ts_tree_make_node(
ts_builtin_sym_error, children->size, children->contents,
(TSSymbolMetadata){.extra = false, .visible = true, .named = true });
(TSSymbolMetadata){.extra = false, .visible = true, .named = true }, NULL);
result->fragile_left = true;
result->fragile_right = true;
@ -472,36 +480,36 @@ static size_t ts_tree__write_to_string(const Tree *self,
const TSLanguage *language, char *string,
size_t limit, bool is_root,
bool include_all) {
if (!self)
return snprintf(string, limit, "(NULL)");
if (!self) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible = include_all || is_root || (self->visible && self->named);
bool visible =
include_all ||
is_root ||
(self->visible && self->named) ||
self->context.rename_symbol != 0;
if (visible && !is_root)
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 &&
self->size.chars > 0) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor +=
ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
} else {
cursor += snprintf(*writer, limit, "(%s",
ts_language_symbol_name(language, self->symbol));
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol));
}
}
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
cursor += ts_tree__write_to_string(child, language, *writer, limit, false,
include_all);
cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all);
}
if (visible)
cursor += snprintf(*writer, limit, ")");
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
@ -518,8 +526,8 @@ char *ts_tree_string(const Tree *self, const TSLanguage *language,
void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset,
const TSLanguage *language, FILE *f) {
fprintf(f, "tree_%p [label=\"%s\"", self,
ts_language_symbol_name(language, self->symbol));
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
if (self->child_count == 0)
fprintf(f, ", shape=plaintext");

View file

@ -19,6 +19,7 @@ typedef struct Tree {
struct Tree *parent;
uint32_t index;
Length offset;
TSSymbol rename_symbol;
} context;
uint32_t child_count;
@ -26,6 +27,7 @@ typedef struct Tree {
struct {
uint32_t visible_child_count;
uint32_t named_child_count;
unsigned short rename_sequence_id;
struct Tree **children;
};
TSExternalTokenState external_token_state;
@ -73,7 +75,7 @@ TreeArray ts_tree_array_remove_last_n(TreeArray *, uint32_t);
TreeArray ts_tree_array_remove_trailing_extras(TreeArray *);
Tree *ts_tree_make_leaf(TSSymbol, Length, Length, TSSymbolMetadata);
Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, TSSymbolMetadata);
Tree *ts_tree_make_node(TSSymbol, uint32_t, Tree **, TSSymbolMetadata, const TSSymbol *);
Tree *ts_tree_make_copy(Tree *child);
Tree *ts_tree_make_error_node(TreeArray *);
Tree *ts_tree_make_error(Length, Length, int32_t);
@ -84,8 +86,8 @@ int ts_tree_compare(const Tree *tree1, const Tree *tree2);
uint32_t ts_tree_start_column(const Tree *self);
uint32_t ts_tree_end_column(const Tree *self);
void ts_tree_set_children(Tree *, uint32_t, Tree **);
void ts_tree_assign_parents(Tree *, TreePath *);
void ts_tree_set_children(Tree *, uint32_t, Tree **, const TSSymbol *);
void ts_tree_assign_parents(Tree *, TreePath *, const TSLanguage *);
void ts_tree_edit(Tree *, const TSInputEdit *edit);
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);

View file

@ -12,6 +12,7 @@ START_TEST
describe("ParseItemSetBuilder", []() {
vector<LexicalVariable> lexical_variables;
for (size_t i = 0; i < 20; i++) {
lexical_variables.push_back({
"token_" + to_string(i),
@ -27,23 +28,23 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::terminal(11), 0, AssociativityNone, ""},
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone, ""},
{Symbol::terminal(13), 0, AssociativityNone, ""},
}, 0},
Production{{
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone, ""},
}, 0}
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production{{
{Symbol::terminal(14), 0, AssociativityNone},
{Symbol::terminal(15), 0, AssociativityNone},
{Symbol::terminal(14), 0, AssociativityNone, ""},
{Symbol::terminal(15), 0, AssociativityNone, ""},
}, 0}
}},
}, {}, {}, {}, {}};
@ -52,21 +53,21 @@ describe("ParseItemSetBuilder", []() {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
ParseItemSet item_set{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
}};
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
AssertThat(item_set, Equals(ParseItemSet{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
},
},
{
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
@ -79,21 +80,21 @@ describe("ParseItemSetBuilder", []() {
ParseItem(Symbol::non_terminal(2), production(2, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
}}));
});
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::terminal(11), 0, AssociativityNone, ""},
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone, ""},
{Symbol::terminal(13), 0, AssociativityNone, ""},
}, 0},
Production{{}, 0}
}},
@ -103,17 +104,17 @@ describe("ParseItemSetBuilder", []() {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
ParseItemSet item_set{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
}};
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
AssertThat(item_set, Equals(ParseItemSet{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
@ -126,7 +127,7 @@ describe("ParseItemSetBuilder", []() {
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
}}));
});
});

View file

@ -35,19 +35,19 @@ describe("flatten_grammar", []() {
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(3), 102, AssociativityRight},
{Symbol::non_terminal(4), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::non_terminal(2), 101, AssociativityLeft, ""},
{Symbol::non_terminal(3), 102, AssociativityRight, ""},
{Symbol::non_terminal(4), 101, AssociativityLeft, ""},
{Symbol::non_terminal(6), 0, AssociativityNone, ""},
{Symbol::non_terminal(7), 0, AssociativityNone, ""},
}, 0},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(5), 101, AssociativityLeft},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::non_terminal(2), 101, AssociativityLeft, ""},
{Symbol::non_terminal(5), 101, AssociativityLeft, ""},
{Symbol::non_terminal(6), 0, AssociativityNone, ""},
{Symbol::non_terminal(7), 0, AssociativityNone, ""},
}, 0}
})));
});
@ -77,19 +77,19 @@ describe("flatten_grammar", []() {
AssertThat(result.type, Equals(VariableTypeNamed));
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(3), 0, AssociativityNone},
{Symbol::non_terminal(4), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::non_terminal(2), 0, AssociativityNone, ""},
{Symbol::non_terminal(3), 0, AssociativityNone, ""},
{Symbol::non_terminal(4), 0, AssociativityNone, ""},
{Symbol::non_terminal(6), 0, AssociativityNone, ""},
{Symbol::non_terminal(7), 0, AssociativityNone, ""},
}, 102},
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(5), 0, AssociativityNone},
{Symbol::non_terminal(6), 0, AssociativityNone},
{Symbol::non_terminal(7), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::non_terminal(2), 0, AssociativityNone, ""},
{Symbol::non_terminal(5), 0, AssociativityNone, ""},
{Symbol::non_terminal(6), 0, AssociativityNone, ""},
{Symbol::non_terminal(7), 0, AssociativityNone, ""},
}, 101}
})));
});
@ -106,8 +106,8 @@ describe("flatten_grammar", []() {
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
{Symbol::non_terminal(2), 101, AssociativityLeft},
{Symbol::non_terminal(1), 101, AssociativityLeft, ""},
{Symbol::non_terminal(2), 101, AssociativityLeft, ""},
}, 0}
})));
@ -121,7 +121,7 @@ describe("flatten_grammar", []() {
AssertThat(result.productions, Equals(vector<Production>({
Production{{
{Symbol::non_terminal(1), 101, AssociativityLeft},
{Symbol::non_terminal(1), 101, AssociativityLeft, ""},
}, 0}
})));
});

View file

@ -0,0 +1,18 @@
======================================
Method calls
======================================
a.b(c(d.e));
---
(statement
(call_expression
(member_expression
(variable_name)
(property_name))
(call_expression
(variable_name)
(member_expression
(variable_name)
(property_name)))))

View file

@ -0,0 +1,73 @@
{
"name": "inlined_renamed_rules",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"inline": [
"expression"
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ";"}
]
},
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "call_expression"},
{"type": "SYMBOL", "name": "member_expression"},
{
"type": "RENAME",
"value": "variable_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"call_expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": ")"},
]
}
},
"member_expression": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "."},
{
"type": "RENAME",
"value": "property_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
}
},
"identifier": {"type": "PATTERN", "value": "\\a+"}
}
}

View file

@ -0,0 +1 @@
This grammar shows that a rule marked as `inline` can *contain* a `RENAME` rule.

View file

@ -0,0 +1,13 @@
=========================
OK
=========================
a.b.c
---
(expression (member_expression
(expression (member_expression
(expression (variable_name))
(property_name)))
(property_name)))

View file

@ -0,0 +1,57 @@
{
"name": "renamed_inlined_rules",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"inline": [
"identifier"
],
"rules": {
"expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "member_expression"},
{
"type": "RENAME",
"value": "variable_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"member_expression": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "expression"},
{"type": "STRING", "value": "."},
{
"type": "RENAME",
"value": "property_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
}
},
"identifier": {
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "a"},
{"type": "STRING", "value": "b"},
{"type": "STRING", "value": "c"}
]
}
}
}

View file

@ -0,0 +1 @@
This grammar shows that `RENAME` rules can *contain* a rule that is marked as `inline`. It also shows that you can rename a rule that would otherwise be anonymous, and it will then appear as a named node.

View file

@ -0,0 +1,18 @@
======================================
Method calls
======================================
a.b(c(d.e));
---
(statement
(call_expression
(member_expression
(variable_name)
(property_name))
(call_expression
(variable_name)
(member_expression
(variable_name)
(property_name)))))

View file

@ -0,0 +1,69 @@
{
"name": "renamed_rules",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": ";"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "call_expression"},
{"type": "SYMBOL", "name": "member_expression"},
{
"type": "RENAME",
"value": "variable_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"call_expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": ")"},
]
}
},
"member_expression": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": "."},
{
"type": "RENAME",
"value": "property_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
}
},
"identifier": {"type": "PATTERN", "value": "\\a+"}
}
}

View file

@ -53,11 +53,11 @@ bool operator==(const std::vector<Tree *> &vec, const TreeArray &array) {
void assert_consistent_tree_sizes(TSNode node) {
size_t child_count = ts_node_child_count(node);
size_t named_child_count = ts_node_named_child_count(node);
size_t start_byte = ts_node_start_byte(node);
size_t end_byte = ts_node_end_byte(node);
TSPoint start_point = ts_node_start_point(node);
TSPoint end_point = ts_node_end_point(node);
bool some_child_has_changes = false;
AssertThat(start_byte, !IsGreaterThan(end_byte));
AssertThat(start_point, !IsGreaterThan(end_point));
@ -65,6 +65,8 @@ void assert_consistent_tree_sizes(TSNode node) {
size_t last_child_end_byte = start_byte;
TSPoint last_child_end_point = start_point;
bool some_child_has_changes = false;
size_t actual_named_child_count = 0;
for (size_t i = 0; i < child_count; i++) {
TSNode child = ts_node_child(node, i);
size_t child_start_byte = ts_node_start_byte(child);
@ -73,13 +75,16 @@ void assert_consistent_tree_sizes(TSNode node) {
AssertThat(child_start_byte, !IsLessThan(last_child_end_byte));
AssertThat(child_start_point, !IsLessThan(last_child_end_point));
assert_consistent_tree_sizes(child);
if (ts_node_has_changes(child))
some_child_has_changes = true;
if (ts_node_has_changes(child)) some_child_has_changes = true;
if (ts_node_is_named(child)) actual_named_child_count++;
last_child_end_byte = ts_node_end_byte(child);
last_child_end_point = ts_node_end_point(child);
}
AssertThat(actual_named_child_count, Equals(named_child_count));
if (child_count > 0) {
AssertThat(end_byte, !IsLessThan(last_child_end_byte));
AssertThat(end_point, !IsLessThan(last_child_end_point));

View file

@ -3,6 +3,7 @@
#include "helpers/load_language.h"
#include "helpers/stderr_logger.h"
#include "helpers/file_helpers.h"
#include "helpers/tree_helpers.h"
#include "runtime/alloc.h"
START_TEST
@ -54,6 +55,7 @@ for (auto &language_name : test_languages) {
ts_document_parse(document);
TSNode root_node = ts_document_root_node(document);
assert_consistent_tree_sizes(root_node);
const char *node_string = ts_node_string(root_node, document);
string result(node_string);
ts_free((void *)node_string);

View file

@ -72,7 +72,7 @@ describe("Tree", []() {
parent1 = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
}), visible, nullptr);
});
after_each([&]() {
@ -103,7 +103,7 @@ describe("Tree", []() {
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
}), visible, nullptr);
});
after_each([&]() {
@ -127,7 +127,7 @@ describe("Tree", []() {
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
}), visible, nullptr);
});
after_each([&]() {
@ -151,7 +151,7 @@ describe("Tree", []() {
parent = ts_tree_make_node(symbol3, 2, tree_array({
tree1,
tree2,
}), visible);
}), visible, nullptr);
});
after_each([&]() {
@ -173,7 +173,7 @@ describe("Tree", []() {
ts_tree_make_leaf(symbol2, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(symbol3, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
ts_tree_make_leaf(symbol4, {2, 2, {0, 2}}, {3, 3, {0, 3}}, visible),
}), visible);
}), visible, nullptr);
AssertThat(tree->padding, Equals<Length>({2, 2, {0, 2}}));
AssertThat(tree->size, Equals<Length>({13, 13, {0, 13}}));
@ -350,14 +350,14 @@ describe("Tree", []() {
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf_copy,
}), visible);
}), visible, nullptr);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
Tree *parent_copy = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf_copy,
}), visible);
}), visible, nullptr);
ts_tree_retain(leaf);
ts_tree_retain(leaf_copy);
@ -401,14 +401,14 @@ describe("Tree", []() {
Tree *parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf,
leaf2,
}), visible);
}), visible, nullptr);
ts_tree_retain(leaf);
ts_tree_retain(leaf2);
Tree *different_parent = ts_tree_make_node(symbol2, 2, tree_array({
leaf2,
leaf,
}), visible);
}), visible, nullptr);
ts_tree_retain(leaf2);
ts_tree_retain(leaf);
@ -438,14 +438,14 @@ describe("Tree", []() {
(tree3 = make_external(ts_tree_make_leaf(symbol3, padding, size, visible))),
(tree4 = ts_tree_make_leaf(symbol4, padding, size, visible)),
(tree5 = ts_tree_make_leaf(symbol5, padding, size, visible)),
}), visible)),
}), visible, nullptr)),
(tree6 = ts_tree_make_node(symbol6, 2, tree_array({
(tree7 = ts_tree_make_node(symbol7, 1, tree_array({
(tree8 = ts_tree_make_leaf(symbol8, padding, size, visible)),
}), visible)),
}), visible, nullptr)),
(tree9 = ts_tree_make_leaf(symbol9, padding, size, visible)),
}), visible)),
}), visible);
}), visible, nullptr)),
}), visible, nullptr);
auto token = ts_tree_last_external_token(tree1);
AssertThat(token, Equals(tree3));