Introduce RENAME rule type

This commit is contained in:
Max Brunsfeld 2017-07-13 17:17:22 -07:00
parent 0b94e9d814
commit b3a72954ff
26 changed files with 516 additions and 246 deletions

View file

@ -135,6 +135,23 @@
"required": ["type", "members"]
},
"rename-rule": {
"type": "object",
"properties": {
"type": {
"type": "string",
"pattern": "^RENAME$"
},
"value": {
"type": "string"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["type", "content", "value"]
},
"repeat-rule": {
"type": "object",
"properties": {
@ -202,6 +219,7 @@
{ "$ref": "#/definitions/symbol-rule" },
{ "$ref": "#/definitions/seq-rule" },
{ "$ref": "#/definitions/choice-rule" },
{ "$ref": "#/definitions/rename-rule" },
{ "$ref": "#/definitions/repeat1-rule" },
{ "$ref": "#/definitions/repeat-rule" },
{ "$ref": "#/definitions/token-rule" },

View file

@ -9,9 +9,8 @@ extern "C" {
#include <stdint.h>
#include <stdlib.h>
typedef unsigned short TSSymbol;
typedef unsigned short TSStateId;
typedef uint16_t TSSymbol;
typedef uint16_t TSStateId;
typedef uint8_t TSExternalTokenState[16];
#define ts_builtin_sym_error ((TSSymbol)-1)
@ -40,16 +39,19 @@ typedef enum {
typedef struct {
union {
TSStateId to_state;
struct {
short dynamic_precedence;
TSSymbol symbol;
unsigned short child_count;
TSStateId to_state;
bool extra : 1;
};
} params;
struct {
TSSymbol symbol;
uint16_t dynamic_precedence;
uint8_t child_count;
uint8_t rename_sequence_id : 7;
bool fragile : 1;
};
};
TSParseActionType type : 4;
bool extra : 1;
bool fragile : 1;
} TSParseAction;
typedef struct {
@ -60,7 +62,7 @@ typedef struct {
typedef union {
TSParseAction action;
struct {
unsigned short count;
uint8_t count;
bool reusable : 1;
bool depends_on_lookahead : 1;
};
@ -73,9 +75,11 @@ typedef struct TSLanguage {
uint32_t external_token_count;
const char **symbol_names;
const TSSymbolMetadata *symbol_metadata;
const unsigned short *parse_table;
const uint16_t *parse_table;
const TSParseActionEntry *parse_actions;
const TSLexMode *lex_modes;
const TSSymbol *rename_sequences;
uint16_t max_rename_sequence_length;
bool (*lex_fn)(TSLexer *, TSStateId);
struct {
const bool *states;
@ -127,70 +131,62 @@ typedef struct TSLanguage {
#define STATE(id) id
#define ACTIONS(id) id
#define SHIFT(to_state_value) \
{ \
{ \
.type = TSParseActionTypeShift, .params = {.to_state = to_state_value } \
} \
#define SHIFT(to_state_value) \
{ \
{ \
.type = TSParseActionTypeShift, \
.to_state = to_state_value, \
} \
}
#define RECOVER(to_state_value) \
{ \
{ \
.type = TSParseActionTypeRecover, .params = {.to_state = to_state_value } \
} \
#define RECOVER(to_state_value) \
{ \
{ \
.type = TSParseActionTypeRecover, \
.to_state = to_state_value \
} \
}
#define SHIFT_EXTRA() \
{ \
{ .type = TSParseActionTypeShift, .extra = true } \
#define SHIFT_EXTRA() \
{ \
{ \
.type = TSParseActionTypeShift, \
.extra = true \
} \
}
#define REDUCE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
#define REDUCE(symbol_val, child_count_val, ...) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.symbol = symbol_val, \
.child_count = child_count_val, \
__VA_ARGS__ \
} \
}
#define REDUCE_FRAGILE(symbol_val, child_count_val, dynamic_precedence_val) \
{ \
{ \
.type = TSParseActionTypeReduce, \
.fragile = true, \
.params = { \
.symbol = symbol_val, \
.child_count = child_count_val, \
.dynamic_precedence = dynamic_precedence_val, \
} \
} \
}
#define ACCEPT_INPUT() \
{ \
{ .type = TSParseActionTypeAccept } \
}
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#define GET_LANGUAGE(...) \
static TSLanguage language = { \
.version = LANGUAGE_VERSION, \
.symbol_count = SYMBOL_COUNT, \
.token_count = TOKEN_COUNT, \
.symbol_metadata = ts_symbol_metadata, \
.parse_table = (const unsigned short *)ts_parse_table, \
.parse_actions = ts_parse_actions, \
.lex_modes = ts_lex_modes, \
.symbol_names = ts_symbol_names, \
.rename_sequences = (const TSSymbol *)ts_rename_sequences, \
.max_rename_sequence_length = MAX_RENAME_SEQUENCE_LENGTH, \
.lex_fn = ts_lex, \
.external_token_count = EXTERNAL_TOKEN_COUNT, \
.external_scanner = {__VA_ARGS__} \
}; \
return &language \
#ifdef __cplusplus
}

View file

@ -38,6 +38,7 @@
'src/compiler/prepare_grammar/prepare_grammar.cc',
'src/compiler/prepare_grammar/token_description.cc',
'src/compiler/rule.cc',
'src/compiler/syntax_grammar.cc',
'src/compiler/rules/character_set.cc',
'src/compiler/rules/choice.cc',
'src/compiler/rules/metadata.cc',

View file

@ -63,21 +63,26 @@ class ParseTableBuilder {
processing_recovery_states(false) {}
pair<ParseTable, CompileError> build() {
// Ensure that the empty rename sequence has index 0.
parse_table.rename_sequences.push_back({});
// Ensure that the error state has index 0.
ParseStateId error_state_id = add_parse_state({}, ParseItemSet{});
// Add the starting state.
Symbol start_symbol = grammar.variables.empty() ?
Symbol::terminal(0) :
Symbol::non_terminal(0);
Production start_production{{{start_symbol, 0, rules::AssociativityNone}}, 0};
ParseStateId error_state_id = add_parse_state({}, ParseItemSet());
add_parse_state({}, ParseItemSet({
Production start_production{{{start_symbol, 0, rules::AssociativityNone, ""}}, 0};
add_parse_state({}, ParseItemSet{{
{
ParseItem(rules::START(), start_production, 0),
LookaheadSet({END_OF_INPUT()}),
},
}));
}});
CompileError error = process_part_state_queue();
if (error.type != TSCompileErrorTypeNone) return {parse_table, error};
if (error) return {parse_table, error};
compute_unmergable_token_pairs();
@ -191,9 +196,14 @@ class ParseTableBuilder {
// If the item is finished, immediately add a Reduce or Accept action to
// the parse table for each of its lookahead terminals.
if (item.is_done()) {
ParseAction action = (item.lhs() == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
ParseAction action;
if (item.lhs() == rules::START()) {
action = ParseAction::Accept();
} else {
action = ParseAction::Reduce(item.lhs(), item.step_index, *item.production);
action.rename_sequence_id = get_rename_sequence_id(*item.production);
}
int precedence = item.precedence();
lookahead_symbols.for_each([&](Symbol lookahead) {
@ -688,6 +698,27 @@ class ParseTableBuilder {
return fragile_productions.find(production) != fragile_productions.end();
}
unsigned get_rename_sequence_id(const Production &production) {
RenameSequence rename_sequence;
for (unsigned i = 0, n = production.size(); i < n; i++) {
auto &step = production.at(i);
if (!step.name_replacement.empty()) {
rename_sequence.resize(production.size());
rename_sequence[i] = step.name_replacement;
}
}
auto begin = parse_table.rename_sequences.begin();
auto end = parse_table.rename_sequences.end();
auto iter = find(begin, end, rename_sequence);
if (iter != end) {
return iter - begin;
} else {
parse_table.rename_sequences.push_back(move(rename_sequence));
return parse_table.rename_sequences.size() - 1;
}
}
SymbolSequence append_symbol(const SymbolSequence &sequence, const Symbol &symbol) {
if (!sequence.empty()) {
const LookaheadSet &left_tokens = item_set_builder.get_last_set(sequence.back());

View file

@ -27,6 +27,11 @@ bool ParseItem::operator==(const ParseItem &other) const {
if (step_index != other.step_index) return false;
if (variable_index != other.variable_index) return false;
if (production->size() != other.production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement != other.production->at(i).name_replacement) {
return false;
}
}
if (is_done()) {
if (!production->empty()) {
if (production->back().precedence != other.production->back().precedence) return false;
@ -47,6 +52,10 @@ bool ParseItem::operator<(const ParseItem &other) const {
if (other.variable_index < variable_index) return false;
if (production->size() < other.production->size()) return true;
if (other.production->size() < production->size()) return false;
for (size_t i = 0; i < step_index; i++) {
if (production->at(i).name_replacement < other.production->at(i).name_replacement) return true;
if (other.production->at(i).name_replacement < production->at(i).name_replacement) return false;
}
if (is_done()) {
if (!production->empty()) {
if (production->back().precedence < other.production->back().precedence) return true;
@ -106,11 +115,6 @@ Symbol ParseItem::next_symbol() const {
return production->at(step_index).symbol;
}
ParseItemSet::ParseItemSet() {}
ParseItemSet::ParseItemSet(const map<ParseItem, LookaheadSet> &entries)
: entries(entries) {}
bool ParseItemSet::operator==(const ParseItemSet &other) const {
return entries == other.entries;
}
@ -153,6 +157,9 @@ struct hash<ParseItem> {
hash_combine(&result, item.step_index);
hash_combine(&result, item.production->dynamic_precedence);
hash_combine(&result, item.production->size());
for (size_t i = 0; i < item.step_index; i++) {
hash_combine(&result, item.production->at(i).name_replacement);
}
if (item.is_done()) {
if (!item.production->empty()) {
hash_combine(&result, item.production->back().precedence);

View file

@ -36,9 +36,6 @@ struct ParseItem {
};
struct ParseItemSet {
ParseItemSet();
explicit ParseItemSet(const std::map<ParseItem, LookaheadSet> &);
bool operator==(const ParseItemSet &) const;
void add(const ParseItemSet &);
size_t unfinished_item_signature() const;

View file

@ -23,19 +23,22 @@ using rules::Symbol;
static vector<Production> inline_production(const ParseItem &item, const SyntaxGrammar &grammar) {
vector<Production> result;
for (const Production &production_to_insert : grammar.variables[item.next_symbol().index].productions) {
auto &inlined_step = item.production->at(item.step_index);
auto &productions_to_insert = grammar.variables[inlined_step.symbol.index].productions;
for (const Production &production_to_insert : productions_to_insert) {
auto begin = item.production->steps.begin();
auto end = item.production->steps.end();
auto step = begin + item.step_index;
Production production{{begin, step}, item.production->dynamic_precedence};
production.steps.insert(
production.steps.end(),
production_to_insert.steps.begin(),
production_to_insert.steps.end()
);
production.back().precedence = item.precedence();
production.back().associativity = item.associativity();
for (auto &step : production_to_insert) {
production.steps.push_back(step);
if (!inlined_step.name_replacement.empty()) {
production.steps.back().name_replacement = inlined_step.name_replacement;
}
}
production.back().precedence = inlined_step.precedence;
production.back().associativity = inlined_step.associativity;
production.steps.insert(
production.steps.end(),
step + 1,

View file

@ -76,6 +76,7 @@ class CCodeGenerator {
vector<pair<size_t, ParseTableEntry>> parse_table_entries;
vector<set<Symbol::Index>> external_scanner_states;
size_t next_parse_action_list_index;
set<string> unique_replacement_names;
public:
CCodeGenerator(string name, const ParseTable &parse_table,
@ -98,6 +99,7 @@ class CCodeGenerator {
add_symbol_enum();
add_symbol_names_list();
add_symbol_metadata_list();
add_rename_sequences();
add_lex_function();
add_lex_modes_list();
@ -139,11 +141,22 @@ class CCodeGenerator {
}
}
unsigned max_rename_sequence_length = 0;
for (const RenameSequence &rename_sequence : parse_table.rename_sequences) {
if (rename_sequence.size() > max_rename_sequence_length) {
max_rename_sequence_length = rename_sequence.size();
}
for (const string &name_replacement : rename_sequence) {
unique_replacement_names.insert(name_replacement);
}
}
line("#define LANGUAGE_VERSION " + to_string(TREE_SITTER_LANGUAGE_VERSION));
line("#define STATE_COUNT " + to_string(parse_table.states.size()));
line("#define SYMBOL_COUNT " + to_string(parse_table.symbols.size()));
line("#define TOKEN_COUNT " + to_string(token_count));
line("#define EXTERNAL_TOKEN_COUNT " + to_string(syntax_grammar.external_tokens.size()));
line("#define MAX_RENAME_SEQUENCE_LENGTH " + to_string(max_rename_sequence_length));
line();
}
@ -158,6 +171,11 @@ class CCodeGenerator {
i++;
}
}
for (const string &replacement_name : unique_replacement_names) {
line(rename_id(replacement_name) + " = " + to_string(i) + ",");
i++;
}
});
line("};");
line();
@ -166,9 +184,45 @@ class CCodeGenerator {
void add_symbol_names_list() {
line("static const char *ts_symbol_names[] = {");
indent([&]() {
for (const auto &entry : parse_table.symbols)
line("[" + symbol_id(entry.first) + "] = \"" +
sanitize_name_for_string(symbol_name(entry.first)) + "\",");
for (const auto &entry : parse_table.symbols) {
line(
"[" + symbol_id(entry.first) + "] = \"" +
sanitize_name_for_string(symbol_name(entry.first)) + "\","
);
}
for (const string &replacement_name : unique_replacement_names) {
line(
"[" + rename_id(replacement_name) + "] = \"" +
sanitize_name_for_string(replacement_name) + "\","
);
}
});
line("};");
line();
}
void add_rename_sequences() {
line(
"static TSSymbol ts_rename_sequences[" +
to_string(parse_table.rename_sequences.size()) +
"][MAX_RENAME_SEQUENCE_LENGTH] = {"
);
indent([&]() {
for (unsigned i = 1, n = parse_table.rename_sequences.size(); i < n; i++) {
const RenameSequence &sequence = parse_table.rename_sequences[i];
line("[" + to_string(i) + "] = {");
indent([&]() {
for (unsigned j = 0, n = sequence.size(); j < n; j++) {
if (!sequence[j].empty()) {
line("[" + to_string(j) + "] = " + rename_id(sequence[j]) + ",");
}
}
});
line("},");
}
});
line("};");
line();
@ -332,7 +386,7 @@ class CCodeGenerator {
add_parse_action_list_id(ParseTableEntry{ {}, false, false });
size_t state_id = 0;
line("static unsigned short ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
line("static uint16_t ts_parse_table[STATE_COUNT][SYMBOL_COUNT] = {");
indent([&]() {
for (const auto &state : parse_table.states) {
@ -543,17 +597,23 @@ class CCodeGenerator {
}
break;
case ParseActionTypeReduce:
if (action.fragile) {
add("REDUCE_FRAGILE");
} else {
add("REDUCE");
}
add("(");
add("REDUCE(");
add(symbol_id(action.symbol));
add(", ");
add(to_string(action.consumed_symbol_count));
add(", " + to_string(action.dynamic_precedence));
if (action.fragile) {
add(", .fragile = true");
}
if (action.dynamic_precedence != 0) {
add(", .dynamic_precedence = " + to_string(action.dynamic_precedence));
}
if (action.rename_sequence_id != 0) {
add(", .rename_sequence_id = " + to_string(action.rename_sequence_id));
}
add(")");
break;
case ParseActionTypeRecover:
@ -605,6 +665,10 @@ class CCodeGenerator {
}
}
string rename_id(const string &name) {
return "rename_sym_" + sanitize_name(name);
}
string symbol_name(const Symbol &symbol) {
if (symbol == rules::END_OF_INPUT())
return "END";

View file

@ -198,6 +198,20 @@ ParseRuleResult parse_rule(json_value *rule_json) {
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, result.rule));
}
if (type == "RENAME") {
json_value name_json = rule_json->operator[]("value");
if (name_json.type != json_string) {
return "Rename value must be a string";
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid rename content: " + result.error_message;
}
return Rule(Metadata::rename(string(name_json.u.string.ptr), result.rule));
}
return "Unknown rule type: " + type;
}

View file

@ -6,21 +6,20 @@
namespace tree_sitter {
using std::string;
using std::ostream;
using std::to_string;
using std::set;
using std::vector;
using std::function;
using rules::Symbol;
ParseAction::ParseAction()
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1) {}
: production(nullptr),
consumed_symbol_count(0),
symbol(rules::NONE()),
type(ParseActionTypeError),
extra(false),
fragile(false),
state_index(-1),
rename_sequence_id(0) {}
ParseAction ParseAction::Error() {
return ParseAction();
@ -65,52 +64,49 @@ ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count,
}
bool ParseAction::operator==(const ParseAction &other) const {
return (type == other.type && extra == other.extra &&
fragile == other.fragile && symbol == other.symbol &&
state_index == other.state_index && production == other.production &&
consumed_symbol_count == other.consumed_symbol_count);
return
type == other.type &&
extra == other.extra &&
fragile == other.fragile &&
symbol == other.symbol &&
state_index == other.state_index &&
production == other.production &&
consumed_symbol_count == other.consumed_symbol_count &&
rename_sequence_id == other.rename_sequence_id;
}
bool ParseAction::operator<(const ParseAction &other) const {
if (type < other.type)
return true;
if (other.type < type)
return false;
if (extra && !other.extra)
return true;
if (other.extra && !extra)
return false;
if (fragile && !other.fragile)
return true;
if (other.fragile && !fragile)
return false;
if (symbol < other.symbol)
return true;
if (other.symbol < symbol)
return false;
if (state_index < other.state_index)
return true;
if (other.state_index < state_index)
return false;
if (production < other.production)
return true;
if (other.production < production)
return false;
return consumed_symbol_count < other.consumed_symbol_count;
if (type < other.type) return true;
if (other.type < type) return false;
if (extra && !other.extra) return true;
if (other.extra && !extra) return false;
if (fragile && !other.fragile) return true;
if (other.fragile && !fragile) return false;
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (state_index < other.state_index) return true;
if (other.state_index < state_index) return false;
if (production < other.production) return true;
if (other.production < production) return false;
if (consumed_symbol_count < other.consumed_symbol_count) return true;
if (other.consumed_symbol_count < consumed_symbol_count) return false;
return rename_sequence_id < other.rename_sequence_id;
}
ParseTableEntry::ParseTableEntry()
: reusable(true), depends_on_lookahead(false) {}
: reusable(true), depends_on_lookahead(false) {}
ParseTableEntry::ParseTableEntry(const vector<ParseAction> &actions,
bool reusable, bool depends_on_lookahead)
: actions(actions),
reusable(reusable),
depends_on_lookahead(depends_on_lookahead) {}
: actions(actions),
reusable(reusable),
depends_on_lookahead(depends_on_lookahead) {}
bool ParseTableEntry::operator==(const ParseTableEntry &other) const {
return actions == other.actions && reusable == other.reusable &&
depends_on_lookahead == other.depends_on_lookahead;
return
actions == other.actions &&
reusable == other.reusable &&
depends_on_lookahead == other.depends_on_lookahead;
}
ParseState::ParseState() : lex_state_id(-1) {}

View file

@ -41,6 +41,7 @@ struct ParseAction {
bool extra;
bool fragile;
ParseStateId state_index;
unsigned rename_sequence_id;
};
struct ParseTableEntry {
@ -73,12 +74,15 @@ struct ParseTableSymbolMetadata {
bool structural;
};
using RenameSequence = std::vector<std::string>;
struct ParseTable {
ParseAction &add_terminal_action(ParseStateId state_id, rules::Symbol, ParseAction);
void set_nonterminal_action(ParseStateId, rules::Symbol::Index, ParseStateId);
std::vector<ParseState> states;
std::map<rules::Symbol, ParseTableSymbolMetadata> symbols;
std::vector<RenameSequence> rename_sequences;
};
} // namespace tree_sitter

View file

@ -1,8 +1,9 @@
#include "compiler/prepare_grammar/flatten_grammar.h"
#include <vector>
#include <algorithm>
#include <cassert>
#include <cmath>
#include <algorithm>
#include <string>
#include <vector>
#include "compiler/prepare_grammar/extract_choices.h"
#include "compiler/prepare_grammar/initial_syntax_grammar.h"
#include "compiler/grammar.h"
@ -13,6 +14,7 @@ namespace prepare_grammar {
using std::find;
using std::pair;
using std::string;
using std::vector;
using rules::Rule;
@ -20,6 +22,7 @@ class FlattenRule {
private:
vector<int> precedence_stack;
vector<rules::Associativity> associativity_stack;
vector<string> name_replacement_stack;
Production production;
void apply(const Rule &rule, bool at_end) {
@ -28,7 +31,8 @@ class FlattenRule {
production.steps.push_back(ProductionStep{
symbol,
precedence_stack.back(),
associativity_stack.back()
associativity_stack.back(),
name_replacement_stack.back()
});
},
@ -41,6 +45,10 @@ class FlattenRule {
associativity_stack.push_back(metadata.params.associativity);
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.push_back(metadata.params.name_replacement);
}
if (abs(metadata.params.dynamic_precedence) > abs(production.dynamic_precedence)) {
production.dynamic_precedence = metadata.params.dynamic_precedence;
}
@ -56,6 +64,10 @@ class FlattenRule {
associativity_stack.pop_back();
if (!at_end) production.back().associativity = associativity_stack.back();
}
if (!metadata.params.name_replacement.empty()) {
name_replacement_stack.pop_back();
}
},
[&](const rules::Seq &sequence) {
@ -72,7 +84,10 @@ class FlattenRule {
}
public:
FlattenRule() : precedence_stack({ 0 }), associativity_stack({ rules::AssociativityNone }) {}
FlattenRule() :
precedence_stack({0}),
associativity_stack({rules::AssociativityNone}),
name_replacement_stack({""}) {}
Production flatten(const Rule &rule) {
apply(rule, true);

View file

@ -1,10 +1,14 @@
#include "compiler/rules/metadata.h"
#include <climits>
#include <string>
#include "compiler/rule.h"
namespace tree_sitter {
namespace rules {
using std::move;
using std::string;
Metadata::Metadata(const Rule &rule, MetadataParams params) :
rule(std::make_shared<Rule>(rule)), params(params) {}
@ -73,5 +77,11 @@ Metadata Metadata::main_token(const Rule &rule) {
return Metadata{rule, params};
}
Metadata Metadata::rename(string &&name, const Rule &rule) {
MetadataParams params;
params.name_replacement = move(name);
return Metadata{rule, params};
}
} // namespace rules
} // namespace tree_sitter

View file

@ -1,6 +1,7 @@
#ifndef COMPILER_RULES_METADATA_H_
#define COMPILER_RULES_METADATA_H_
#include <string>
#include <memory>
namespace tree_sitter {
@ -22,6 +23,7 @@ struct MetadataParams {
bool is_string;
bool is_active;
bool is_main_token;
std::string name_replacement;
inline MetadataParams() :
precedence{0}, dynamic_precedence{0}, associativity{AssociativityNone},
@ -38,7 +40,8 @@ struct MetadataParams {
is_token == other.is_token &&
is_string == other.is_string &&
is_active == other.is_active &&
is_main_token == other.is_main_token
is_main_token == other.is_main_token &&
name_replacement == other.name_replacement
);
}
};
@ -59,6 +62,7 @@ struct Metadata {
static Metadata prec_dynamic(int precedence, const Rule &rule);
static Metadata separator(const Rule &rule);
static Metadata main_token(const Rule &rule);
static Metadata rename(std::string &&name, const Rule &rule);
bool operator==(const Metadata &other) const;
};

View file

@ -0,0 +1,36 @@
#include "compiler/syntax_grammar.h"
namespace tree_sitter {
bool ProductionStep::operator==(const ProductionStep &other) const {
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity &&
name_replacement == other.name_replacement;
}
bool ProductionStep::operator!=(const ProductionStep &other) const {
return !operator==(other);
}
bool ProductionStep::operator<(const ProductionStep &other) const {
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (precedence < other.precedence) return true;
if (other.precedence < precedence) return false;
if (associativity < other.associativity) return true;
if (other.associativity < associativity) return false;
return name_replacement < other.name_replacement;
}
bool Production::operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
bool ExternalToken::operator==(const ExternalToken &other) const {
return name == other.name &&
type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
} // namespace tree_sitter

View file

@ -10,43 +10,29 @@
namespace tree_sitter {
struct ProductionStep {
inline bool operator==(const ProductionStep &other) const {
return symbol == other.symbol &&
precedence == other.precedence &&
associativity == other.associativity;
}
inline bool operator!=(const ProductionStep &other) const {
return !operator==(other);
}
inline bool operator<(const ProductionStep &other) const {
if (symbol < other.symbol) return true;
if (other.symbol < symbol) return false;
if (precedence < other.precedence) return true;
if (other.precedence < precedence) return false;
return associativity < other.associativity;
}
rules::Symbol symbol;
int precedence;
rules::Associativity associativity;
std::string name_replacement;
bool operator==(const ProductionStep &) const;
bool operator!=(const ProductionStep &) const;
bool operator<(const ProductionStep &) const;
};
struct Production {
std::vector<ProductionStep> steps;
int dynamic_precedence = 0;
inline bool operator==(const Production &other) const {
return steps == other.steps && dynamic_precedence == other.dynamic_precedence;
}
bool operator==(const Production &) const;
inline ProductionStep &back() { return steps.back(); }
inline const ProductionStep &back() const { return steps.back(); }
inline bool empty() const { return steps.empty(); }
inline size_t size() const { return steps.size(); }
inline const ProductionStep &operator[](int i) const { return steps[i]; }
inline const ProductionStep &at(int i) const { return steps[i]; }
inline std::vector<ProductionStep>::const_iterator begin() const { return steps.begin(); }
inline std::vector<ProductionStep>::const_iterator end() const { return steps.end(); }
};
struct SyntaxVariable {
@ -55,24 +41,18 @@ struct SyntaxVariable {
std::vector<Production> productions;
};
using ConflictSet = std::set<rules::Symbol>;
struct ExternalToken {
std::string name;
VariableType type;
rules::Symbol corresponding_internal_token;
inline bool operator==(const ExternalToken &other) const {
return name == other.name &&
type == other.type &&
corresponding_internal_token == other.corresponding_internal_token;
}
bool operator==(const ExternalToken &) const;
};
struct SyntaxGrammar {
std::vector<SyntaxVariable> variables;
std::set<rules::Symbol> extra_tokens;
std::set<ConflictSet> expected_conflicts;
std::set<std::set<rules::Symbol>> expected_conflicts;
std::vector<ExternalToken> external_tokens;
std::set<rules::Symbol> variables_to_inline;
};

View file

@ -3,7 +3,8 @@
#include "runtime/error_costs.h"
static const TSParseAction SHIFT_ERROR = {
.type = TSParseActionTypeShift, .params = {.to_state = ERROR_STATE}
.type = TSParseActionTypeShift,
.to_state = ERROR_STATE,
};
void ts_language_table_entry(const TSLanguage *self, TSStateId state,

View file

@ -44,7 +44,7 @@ static inline TSStateId ts_language_next_state(const TSLanguage *self,
if (count > 0) {
TSParseAction action = actions[count - 1];
if (action.type == TSParseActionTypeShift || action.type == TSParseActionTypeRecover) {
return action.params.to_state;
return action.to_state;
}
}
return 0;

View file

@ -288,7 +288,8 @@ void ts_symbol_iterator_next(TSSymbolIterator *self) {
}
const char *ts_node_type(TSNode self, const TSDocument *document) {
TSSymbol symbol = ts_node__tree(self)->symbol;
const Tree *tree = ts_node__tree(self);
TSSymbol symbol = tree->context.rename_symbol ? tree->context.rename_symbol : tree->symbol;
return ts_language_symbol_name(document->parser.language, symbol);
}

View file

@ -558,8 +558,8 @@ static bool parser__switch_children(Parser *self, Tree *tree,
static StackPopResult parser__reduce(Parser *self, StackVersion version,
TSSymbol symbol, unsigned count,
bool fragile, int dynamic_precedence,
bool allow_skipping) {
int dynamic_precedence, unsigned short rename_sequence_id,
bool fragile, bool allow_skipping) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
StackPopResult pop = ts_stack_pop_count(self->stack, version, count);
@ -603,6 +603,7 @@ static StackPopResult parser__reduce(Parser *self, StackVersion version,
}
parent->dynamic_precedence += dynamic_precedence;
parent->rename_sequence_id = rename_sequence_id;
TSStateId state = ts_stack_top_state(self->stack, slice.version);
TSStateId next_state = ts_language_next_state(language, state, symbol);
@ -699,12 +700,12 @@ static const TSParseAction *parser__reductions_after_sequence(
(*count)--;
}
while (*count > 0 && actions[0].params.child_count < child_count) {
while (*count > 0 && actions[0].child_count < child_count) {
actions++;
(*count)--;
}
while (*count > 0 && actions[*count - 1].params.child_count > child_count) {
while (*count > 0 && actions[*count - 1].child_count > child_count) {
(*count)--;
}
@ -756,7 +757,7 @@ static StackIterateAction parser__repair_error_callback(void *payload, TSStateId
}
for (uint32_t j = 0; j < repair_reduction_count; j++) {
if (repair_reductions[j].params.symbol == repair->symbol) {
if (repair_reductions[j].symbol == repair->symbol) {
result |= StackIteratePop;
session->found_repair = true;
session->best_repair = *repair;
@ -788,8 +789,8 @@ static bool parser__repair_error(Parser *self, StackSlice slice,
array_clear(&self->reduce_actions);
for (uint32_t i = 0; i < entry.action_count; i++) {
if (entry.actions[i].type == TSParseActionTypeReduce) {
TSSymbol symbol = entry.actions[i].params.symbol;
uint32_t child_count = entry.actions[i].params.child_count;
TSSymbol symbol = entry.actions[i].symbol;
uint32_t child_count = entry.actions[i].child_count;
if ((child_count > session.tree_count_above_error) ||
(child_count == session.tree_count_above_error &&
!ts_language_symbol_metadata(self->language, symbol).visible))
@ -942,11 +943,12 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
has_shift_action = true;
break;
case TSParseActionTypeReduce:
if (action.params.child_count > 0)
if (action.child_count > 0)
ts_reduce_action_set_add(&self->reduce_actions, (ReduceAction){
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence
.symbol = action.symbol,
.count = action.child_count,
.dynamic_precedence = action.dynamic_precedence,
.rename_sequence_id = action.rename_sequence_id,
});
default:
break;
@ -958,8 +960,9 @@ static bool parser__do_potential_reductions(Parser *self, StackVersion version)
for (uint32_t i = 0; i < self->reduce_actions.size; i++) {
ReduceAction action = self->reduce_actions.contents[i];
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.count, true,
action.dynamic_precedence, false
self, version, action.symbol, action.count,
action.dynamic_precedence, action.rename_sequence_id,
true, false
);
if (reduction.stopped_at_error) {
ts_tree_array_delete(&reduction.slices.contents[0].trees);
@ -1168,7 +1171,7 @@ static void parser__advance(Parser *self, StackVersion version,
next_state = state;
LOG("shift_extra");
} else {
next_state = action.params.to_state;
next_state = action.to_state;
LOG("shift state:%u", next_state);
}
@ -1195,18 +1198,14 @@ static void parser__advance(Parser *self, StackVersion version,
}
case TSParseActionTypeReduce: {
if (reduction_stopped_at_error)
continue;
if (reduction_stopped_at_error) continue;
unsigned child_count = action.params.child_count;
TSSymbol symbol = action.params.symbol;
unsigned dynamic_precedence = action.params.dynamic_precedence;
bool fragile = action.fragile;
LOG("reduce sym:%s, child_count:%u", SYM_NAME(symbol), child_count);
StackPopResult reduction =
parser__reduce(self, version, symbol, child_count, fragile, dynamic_precedence, true);
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.symbol), action.child_count);
StackPopResult reduction = parser__reduce(
self, version, action.symbol, action.child_count,
action.dynamic_precedence, action.rename_sequence_id,
action.fragile, true
);
StackSlice slice = *array_front(&reduction.slices);
if (reduction.stopped_at_error) {
reduction_stopped_at_error = true;
@ -1237,7 +1236,7 @@ static void parser__advance(Parser *self, StackVersion version,
ts_tree_retain(lookahead);
}
parser__recover(self, version, action.params.to_state, lookahead);
parser__recover(self, version, action.to_state, lookahead);
if (lookahead == reusable_node->tree) {
reusable_node_pop(reusable_node);
}
@ -1355,6 +1354,6 @@ Tree *parser_parse(Parser *self, TSInput input, Tree *old_tree, bool halt_on_err
LOG_TREE();
ts_stack_clear(self->stack);
parser__clear_cached_token(self);
ts_tree_assign_parents(self->finished_tree, &self->tree_path1);
ts_tree_assign_parents(self->finished_tree, &self->tree_path1, self->language);
return self->finished_tree;
}

View file

@ -12,6 +12,7 @@ typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short rename_sequence_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -22,6 +22,7 @@ Tree *ts_tree_make_leaf(TSSymbol sym, Length padding, Length size,
.visible_child_count = 0,
.named_child_count = 0,
.children = NULL,
.rename_sequence_id = 0,
.padding = padding,
.visible = metadata.visible,
.named = metadata.named,
@ -120,18 +121,23 @@ Tree *ts_tree_make_copy(Tree *self) {
return result;
}
void ts_tree_assign_parents(Tree *self, TreePath *path) {
void ts_tree_assign_parents(Tree *self, TreePath *path, const TSLanguage *language) {
array_clear(path);
array_push(path, ((TreePathEntry){self, length_zero(), 0}));
while (path->size > 0) {
Tree *tree = array_pop(path).tree;
Length offset = length_zero();
const TSSymbol *rename_symbols = language->rename_sequences +
tree->rename_sequence_id * language->max_rename_sequence_length;
for (uint32_t i = 0; i < tree->child_count; i++) {
Tree *child = tree->children[i];
if (child->context.parent != tree || child->context.index != i) {
child->context.parent = tree;
child->context.index = i;
child->context.offset = offset;
if (tree->rename_sequence_id && rename_symbols[i] != 0) {
child->context.rename_symbol = rename_symbols[i];
}
array_push(path, ((TreePathEntry){child, length_zero(), 0}));
}
offset = length_add(offset, ts_tree_total_size(child));
@ -472,36 +478,32 @@ static size_t ts_tree__write_to_string(const Tree *self,
const TSLanguage *language, char *string,
size_t limit, bool is_root,
bool include_all) {
if (!self)
return snprintf(string, limit, "(NULL)");
if (!self) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible = include_all || is_root || (self->visible && self->named);
if (visible && !is_root)
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
if (visible) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 &&
self->size.chars > 0) {
if (self->symbol == ts_builtin_sym_error && self->child_count == 0 && self->size.chars > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor +=
ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
cursor += ts_tree__write_char_to_string(*writer, limit, self->lookahead_char);
} else {
cursor += snprintf(*writer, limit, "(%s",
ts_language_symbol_name(language, self->symbol));
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
cursor += snprintf(*writer, limit, "(%s", ts_language_symbol_name(language, symbol));
}
}
for (uint32_t i = 0; i < self->child_count; i++) {
Tree *child = self->children[i];
cursor += ts_tree__write_to_string(child, language, *writer, limit, false,
include_all);
cursor += ts_tree__write_to_string(child, language, *writer, limit, false, include_all);
}
if (visible)
cursor += snprintf(*writer, limit, ")");
if (visible) cursor += snprintf(*writer, limit, ")");
return cursor - string;
}
@ -518,8 +520,8 @@ char *ts_tree_string(const Tree *self, const TSLanguage *language,
void ts_tree__print_dot_graph(const Tree *self, uint32_t byte_offset,
const TSLanguage *language, FILE *f) {
fprintf(f, "tree_%p [label=\"%s\"", self,
ts_language_symbol_name(language, self->symbol));
TSSymbol symbol = self->context.rename_symbol ? self->context.rename_symbol : self->symbol;
fprintf(f, "tree_%p [label=\"%s\"", self, ts_language_symbol_name(language, symbol));
if (self->child_count == 0)
fprintf(f, ", shape=plaintext");

View file

@ -19,6 +19,7 @@ typedef struct Tree {
struct Tree *parent;
uint32_t index;
Length offset;
TSSymbol rename_symbol;
} context;
uint32_t child_count;
@ -26,6 +27,7 @@ typedef struct Tree {
struct {
uint32_t visible_child_count;
uint32_t named_child_count;
unsigned short rename_sequence_id;
struct Tree **children;
};
TSExternalTokenState external_token_state;
@ -85,7 +87,7 @@ int ts_tree_compare(const Tree *tree1, const Tree *tree2);
uint32_t ts_tree_start_column(const Tree *self);
uint32_t ts_tree_end_column(const Tree *self);
void ts_tree_set_children(Tree *, uint32_t, Tree **);
void ts_tree_assign_parents(Tree *, TreePath *);
void ts_tree_assign_parents(Tree *, TreePath *, const TSLanguage *);
void ts_tree_edit(Tree *, const TSInputEdit *edit);
char *ts_tree_string(const Tree *, const TSLanguage *, bool include_all);
void ts_tree_print_dot_graph(const Tree *, const TSLanguage *, FILE *);

View file

@ -12,6 +12,7 @@ START_TEST
describe("ParseItemSetBuilder", []() {
vector<LexicalVariable> lexical_variables;
for (size_t i = 0; i < 20; i++) {
lexical_variables.push_back({
"token_" + to_string(i),
@ -27,23 +28,23 @@ describe("ParseItemSetBuilder", []() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::terminal(11), 0, AssociativityNone, ""},
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone, ""},
{Symbol::terminal(13), 0, AssociativityNone, ""},
}, 0},
Production{{
{Symbol::non_terminal(2), 0, AssociativityNone},
{Symbol::non_terminal(2), 0, AssociativityNone, ""},
}, 0}
}},
SyntaxVariable{"rule2", VariableTypeNamed, {
Production{{
{Symbol::terminal(14), 0, AssociativityNone},
{Symbol::terminal(15), 0, AssociativityNone},
{Symbol::terminal(14), 0, AssociativityNone, ""},
{Symbol::terminal(15), 0, AssociativityNone, ""},
}, 0}
}},
}, {}, {}, {}, {}};
@ -52,21 +53,21 @@ describe("ParseItemSetBuilder", []() {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
ParseItemSet item_set{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
}};
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
AssertThat(item_set, Equals(ParseItemSet{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
},
},
{
ParseItem(Symbol::non_terminal(1), production(1, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
@ -79,21 +80,21 @@ describe("ParseItemSetBuilder", []() {
ParseItem(Symbol::non_terminal(2), production(2, 0), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
}}));
});
it("handles rules with empty productions", [&]() {
SyntaxGrammar grammar{{
SyntaxVariable{"rule0", VariableTypeNamed, {
Production{{
{Symbol::non_terminal(1), 0, AssociativityNone},
{Symbol::terminal(11), 0, AssociativityNone},
{Symbol::non_terminal(1), 0, AssociativityNone, ""},
{Symbol::terminal(11), 0, AssociativityNone, ""},
}, 0},
}},
SyntaxVariable{"rule1", VariableTypeNamed, {
Production{{
{Symbol::terminal(12), 0, AssociativityNone},
{Symbol::terminal(13), 0, AssociativityNone},
{Symbol::terminal(12), 0, AssociativityNone, ""},
{Symbol::terminal(13), 0, AssociativityNone, ""},
}, 0},
Production{{}, 0}
}},
@ -103,17 +104,17 @@ describe("ParseItemSetBuilder", []() {
return grammar.variables[variable_index].productions[production_index];
};
ParseItemSet item_set({
ParseItemSet item_set{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) }),
}
});
}};
ParseItemSetBuilder item_set_builder(grammar, lexical_grammar);
item_set_builder.apply_transitive_closure(&item_set);
AssertThat(item_set, Equals(ParseItemSet({
AssertThat(item_set, Equals(ParseItemSet{{
{
ParseItem(rules::START(), production(0, 0), 0),
LookaheadSet({ Symbol::terminal(10) })
@ -126,7 +127,7 @@ describe("ParseItemSetBuilder", []() {
ParseItem(Symbol::non_terminal(1), production(1, 1), 0),
LookaheadSet({ Symbol::terminal(11) })
},
})));
}}));
});
});

View file

@ -0,0 +1,18 @@
======================================
Method calls
======================================
a.b(c(d.e));
---
(statement
(call_expression
(member_expression
(variable_name)
(property_name))
(call_expression
(variable_name)
(member_expression
(variable_name)
(property_name)))))

View file

@ -0,0 +1,69 @@
{
"name": "renamed_rules",
"extras": [
{"type": "PATTERN", "value": "\\s"}
],
"rules": {
"statement": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": ";"}
]
},
"_expression": {
"type": "CHOICE",
"members": [
{"type": "SYMBOL", "name": "call_expression"},
{"type": "SYMBOL", "name": "member_expression"},
{
"type": "RENAME",
"value": "variable_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
},
"call_expression": {
"type": "PREC_LEFT",
"value": 0,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": "("},
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": ")"},
]
}
},
"member_expression": {
"type": "PREC_LEFT",
"value": 1,
"content": {
"type": "SEQ",
"members": [
{"type": "SYMBOL", "name": "_expression"},
{"type": "STRING", "value": "."},
{
"type": "RENAME",
"value": "property_name",
"content": {
"type": "SYMBOL",
"name": "identifier"
}
}
]
}
},
"identifier": {"type": "PATTERN", "value": "\\a+"}
}
}