Introduce 'ubiquitous_tokens' concept, for parsing comments and such
This commit is contained in:
parent
b010e1667e
commit
4700e33746
24 changed files with 18321 additions and 19057 deletions
|
|
@ -3,6 +3,7 @@
|
|||
|
||||
namespace tree_sitter_examples {
|
||||
using tree_sitter::Grammar;
|
||||
using tree_sitter::GrammarOptions;
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
extern const Grammar golang({
|
||||
|
|
@ -112,5 +113,8 @@ namespace tree_sitter_examples {
|
|||
{ "type_name", sym("_identifier") },
|
||||
{ "_identifier", pattern("\\a[\\w_]*") },
|
||||
{ "number", pattern("\\d+(\\.\\d+)?") },
|
||||
});
|
||||
{ "comment", pattern("//[^\n]*") },
|
||||
}, GrammarOptions({
|
||||
{ "comment" },
|
||||
}));
|
||||
}
|
||||
|
|
|
|||
|
|
@ -10,7 +10,6 @@ namespace tree_sitter_examples {
|
|||
|
||||
// Statements
|
||||
{ "statement", choice({
|
||||
sym("comment"),
|
||||
sym("statement_block"),
|
||||
sym("if_statement"),
|
||||
sym("switch_statement"),
|
||||
|
|
@ -158,13 +157,10 @@ namespace tree_sitter_examples {
|
|||
str("*/") }),
|
||||
pattern("//[^\n]*") })) },
|
||||
{ "object", in_braces(comma_sep(err(seq({
|
||||
optional(sym("comment")),
|
||||
choice({ sym("string"), sym("identifier") }),
|
||||
str(":"),
|
||||
sym("expression") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(seq({
|
||||
optional(sym("comment")),
|
||||
sym("expression") })))) },
|
||||
{ "array", in_brackets(comma_sep(err(sym("expression")))) },
|
||||
{ "_terminator", pattern("[;\n]") },
|
||||
{ "regex", token(delimited("/")) },
|
||||
{ "string", token(choice({
|
||||
|
|
@ -175,5 +171,8 @@ namespace tree_sitter_examples {
|
|||
{ "null", keyword("null") },
|
||||
{ "true", keyword("true") },
|
||||
{ "false", keyword("false") },
|
||||
}, {
|
||||
// ubiquitous_tokens
|
||||
{ "comment" }
|
||||
});
|
||||
}
|
||||
|
|
|
|||
|
|
@ -43,6 +43,9 @@ SYMBOL_NAMES = {
|
|||
[ts_aux_sym_token6] = "')'",
|
||||
};
|
||||
|
||||
UBIQUITOUS_SYMBOLS = {
|
||||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
[ts_aux_sym_token0] = 1,
|
||||
[ts_aux_sym_token1] = 1,
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
File diff suppressed because it is too large
Load diff
|
|
@ -43,6 +43,9 @@ SYMBOL_NAMES = {
|
|||
[ts_aux_sym_token5] = "']'",
|
||||
};
|
||||
|
||||
UBIQUITOUS_SYMBOLS = {
|
||||
};
|
||||
|
||||
HIDDEN_SYMBOLS = {
|
||||
[ts_aux_sym_object_repeat0] = 1,
|
||||
[ts_aux_sym_array_repeat0] = 1,
|
||||
|
|
|
|||
|
|
@ -25,13 +25,19 @@ namespace tree_sitter {
|
|||
rule_ptr token(rule_ptr rule);
|
||||
}
|
||||
|
||||
struct GrammarOptions {
|
||||
std::vector<std::string> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
class Grammar {
|
||||
public:
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
|
||||
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules, GrammarOptions options);
|
||||
bool operator==(const Grammar &other) const;
|
||||
std::string start_rule_name() const;
|
||||
const rules::rule_ptr rule(const std::string &name) const;
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> rules;
|
||||
const GrammarOptions options;
|
||||
};
|
||||
|
||||
struct Conflict {
|
||||
|
|
|
|||
|
|
@ -30,6 +30,9 @@ static const char *ts_symbol_names[]
|
|||
#define HIDDEN_SYMBOLS \
|
||||
static const int hidden_symbol_flags[SYMBOL_COUNT]
|
||||
|
||||
#define UBIQUITOUS_SYMBOLS \
|
||||
static const int ubiquitous_symbol_flags[SYMBOL_COUNT]
|
||||
|
||||
#define LEX_STATES \
|
||||
static state_id ts_lex_states[STATE_COUNT]
|
||||
|
||||
|
|
@ -64,9 +67,15 @@ static const ts_parse_action ts_parse_actions[STATE_COUNT][SYMBOL_COUNT]
|
|||
ts_parser constructor_name() { \
|
||||
return (ts_parser) { \
|
||||
.parse_fn = ts_parse, \
|
||||
.free_fn = NULL, \
|
||||
.symbol_names = ts_symbol_names, \
|
||||
.data = ts_lr_parser_make(SYMBOL_COUNT, (const ts_parse_action *)ts_parse_actions, ts_lex_states, hidden_symbol_flags), \
|
||||
.free_fn = NULL \
|
||||
.data = ts_lr_parser_make( \
|
||||
SYMBOL_COUNT, \
|
||||
(const ts_parse_action *)ts_parse_actions, \
|
||||
ts_lex_states, \
|
||||
hidden_symbol_flags, \
|
||||
ubiquitous_symbol_flags \
|
||||
) \
|
||||
}; \
|
||||
}
|
||||
|
||||
|
|
@ -94,7 +103,7 @@ typedef struct {
|
|||
|
||||
ts_stack ts_stack_make();
|
||||
ts_tree * ts_stack_root(const ts_stack *stack);
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *collapse_flags);
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *hidden_symbol_flags, const int *ubiquitous_symbol_flags);
|
||||
void ts_stack_shrink(ts_stack *stack, size_t new_size);
|
||||
void ts_stack_push(ts_stack *stack, state_id state, ts_tree *node);
|
||||
state_id ts_stack_top_state(const ts_stack *stack);
|
||||
|
|
@ -202,6 +211,7 @@ typedef struct {
|
|||
ts_lexer lexer;
|
||||
ts_stack stack;
|
||||
const int *hidden_symbol_flags;
|
||||
const int *ubiquitous_symbol_flags;
|
||||
ts_tree *lookahead;
|
||||
ts_tree *next_lookahead;
|
||||
const ts_parse_action *parse_table;
|
||||
|
|
@ -209,7 +219,12 @@ typedef struct {
|
|||
size_t symbol_count;
|
||||
} ts_lr_parser;
|
||||
|
||||
static ts_lr_parser * ts_lr_parser_make(size_t symbol_count, const ts_parse_action *parse_table, const state_id *lex_states, const int *hidden_symbol_flags) {
|
||||
static ts_lr_parser *
|
||||
ts_lr_parser_make(size_t symbol_count,
|
||||
const ts_parse_action *parse_table,
|
||||
const state_id *lex_states,
|
||||
const int *hidden_symbol_flags,
|
||||
const int *ubiquitous_symbol_flags) {
|
||||
ts_lr_parser *result = malloc(sizeof(ts_lr_parser));
|
||||
result->lexer = ts_lexer_make();
|
||||
result->stack = ts_stack_make();
|
||||
|
|
@ -217,6 +232,7 @@ static ts_lr_parser * ts_lr_parser_make(size_t symbol_count, const ts_parse_acti
|
|||
result->parse_table = parse_table;
|
||||
result->lex_states = lex_states;
|
||||
result->hidden_symbol_flags = hidden_symbol_flags;
|
||||
result->ubiquitous_symbol_flags = ubiquitous_symbol_flags;
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -279,7 +295,7 @@ static void ts_lr_parser_shift(ts_lr_parser *parser, state_id parse_state) {
|
|||
|
||||
static void ts_lr_parser_reduce(ts_lr_parser *parser, ts_symbol symbol, int child_count) {
|
||||
parser->next_lookahead = parser->lookahead;
|
||||
parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count, parser->hidden_symbol_flags);
|
||||
parser->lookahead = ts_stack_reduce(&parser->stack, symbol, child_count, parser->hidden_symbol_flags, parser->ubiquitous_symbol_flags);
|
||||
}
|
||||
|
||||
static ts_symbol * ts_lr_parser_expected_symbols(ts_lr_parser *parser, size_t *count) {
|
||||
|
|
|
|||
|
|
@ -14,41 +14,53 @@ describe("building parse tables", []() {
|
|||
{ "rule0", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "rule1", i_token(0) },
|
||||
{ "rule2", i_token(1) },
|
||||
}, {});
|
||||
|
||||
}, {}, PreparedGrammarOptions({
|
||||
// ubiquitous_tokens
|
||||
{ Symbol(2, SymbolOptionToken) }
|
||||
}));
|
||||
|
||||
PreparedGrammar lex_grammar({
|
||||
{ "token0", pattern("[a-c]") },
|
||||
{ "token1", pattern("[b-d]") },
|
||||
}, {});
|
||||
|
||||
|
||||
it("first looks for the start rule and its item set closure", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
||||
|
||||
AssertThat(result.first.states[0].actions, Equals(map<Symbol, ParseAction>({
|
||||
// start item
|
||||
{ Symbol(0), ParseAction::Shift(1, { 0 }) },
|
||||
|
||||
|
||||
// expanded from the item set closure of the start item
|
||||
{ Symbol(1), ParseAction::Shift(2, { 0 }) },
|
||||
{ Symbol(2), ParseAction::Shift(2, { 0 }) },
|
||||
{ Symbol(0, SymbolOptionToken), ParseAction::Shift(3, { 0 }) },
|
||||
{ Symbol(1, SymbolOptionToken), ParseAction::Shift(4, { 0 }) },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), ParseAction::Shift(0, { 0 }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("accepts the input when EOF occurs after the start rule", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
||||
AssertThat(result.first.states[1].actions, Equals(map<Symbol, ParseAction>({
|
||||
{ END_OF_INPUT(), ParseAction::Accept() },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), ParseAction::Shift(1, { 0 }) },
|
||||
})));
|
||||
});
|
||||
|
||||
|
||||
it("reduces a rule once it has been consumed", [&]() {
|
||||
auto result = build_parse_table(parse_grammar, lex_grammar);
|
||||
|
||||
|
||||
AssertThat(result.first.states[2].actions, Equals(map<Symbol, ParseAction>({
|
||||
{ END_OF_INPUT(), ParseAction::Reduce(Symbol(0), 1, 0) },
|
||||
|
||||
// for the ubiquitous_token 'token2'
|
||||
{ Symbol(2, SymbolOptionToken), ParseAction::Shift(2, { 0 }) },
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
|
|
|||
|
|
@ -4,6 +4,7 @@
|
|||
#include <iostream>
|
||||
#include <set>
|
||||
#include <unordered_map>
|
||||
#include <map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
|
|
|
|||
|
|
@ -111,6 +111,20 @@ describe("extracting tokens from a grammar", []() {
|
|||
{ "rule0", str("ab") },
|
||||
}, {})));
|
||||
});
|
||||
|
||||
it("updates the grammar's ubiquitous_tokens", [&]() {
|
||||
auto result = extract_tokens(PreparedGrammar({
|
||||
{ "rule0", str("ab") },
|
||||
{ "rule1", i_sym(0) },
|
||||
{ "rule2", i_sym(1) },
|
||||
}, {}, PreparedGrammarOptions({
|
||||
{ Symbol(0) }
|
||||
})));
|
||||
|
||||
AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
{ Symbol(0, SymbolOptionToken) }
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -19,7 +19,7 @@ describe("interning symbols in a grammar", []() {
|
|||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat((bool)result.second, IsFalse());
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first, Equals(PreparedGrammar({
|
||||
{ "x", choice({ i_sym(1), i_sym(2) }) },
|
||||
{ "y", i_sym(2) },
|
||||
|
|
@ -38,6 +38,23 @@ describe("interning symbols in a grammar", []() {
|
|||
AssertThat(result.second->message, Equals("Undefined rule 'y'"));
|
||||
});
|
||||
});
|
||||
|
||||
it("translates the grammar's optional 'ubiquitous_tokens' to numerical symbols", [&]() {
|
||||
Grammar grammar({
|
||||
{ "x", choice({ sym("y"), sym("z") }) },
|
||||
{ "y", sym("z") },
|
||||
{ "z", str("stuff") }
|
||||
}, {
|
||||
{ "z" }
|
||||
});
|
||||
|
||||
auto result = intern_symbols(grammar);
|
||||
|
||||
AssertThat(result.second, Equals((GrammarError *)nullptr));
|
||||
AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
|
||||
Symbol(2)
|
||||
})));
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
|
|
@ -13,6 +13,20 @@ func z() {}
|
|||
(var_declaration (var_name) (number))
|
||||
(func_declaration (var_name) (statement_block)))
|
||||
|
||||
==========================================
|
||||
parses comments
|
||||
==========================================
|
||||
package trivial
|
||||
|
||||
func main() {
|
||||
// do stuff
|
||||
}
|
||||
---
|
||||
(program
|
||||
(package_directive (package_name))
|
||||
(func_declaration (var_name) (statement_block
|
||||
(comment))))
|
||||
|
||||
==========================================
|
||||
parses complex types
|
||||
==========================================
|
||||
|
|
|
|||
|
|
@ -97,14 +97,25 @@ print(object[propertyName()]);
|
|||
==========================================
|
||||
parses comments
|
||||
==========================================
|
||||
// this is another comment
|
||||
stuff(); // this is a comment
|
||||
/* this is a third comment */
|
||||
var thing = {
|
||||
|
||||
// this is a property.
|
||||
// its value is a function.
|
||||
key: function(x /* this is a parameter */) {
|
||||
|
||||
// this is a statement
|
||||
doStuff();
|
||||
}
|
||||
};
|
||||
---
|
||||
(program
|
||||
(program (var_declaration (assignment (identifier) (object
|
||||
(comment)
|
||||
(expression_statement (function_call (identifier))) (comment)
|
||||
(comment))
|
||||
(comment)
|
||||
(identifier) (function_expression
|
||||
(formal_parameters (identifier) (comment))
|
||||
(statement_block
|
||||
(comment)
|
||||
(expression_statement (function_call (identifier)))))))))
|
||||
|
||||
======================================
|
||||
parses real code
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ namespace tree_sitter {
|
|||
LexConflictManager conflict_manager;
|
||||
unordered_map<const LexItemSet, LexStateId> lex_state_ids;
|
||||
LexTable lex_table;
|
||||
|
||||
|
||||
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
|
||||
LexItemSet result;
|
||||
for (const auto &symbol : symbols) {
|
||||
|
|
@ -43,7 +43,7 @@ namespace tree_sitter {
|
|||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
|
||||
LexStateId add_lex_state(const LexItemSet &item_set) {
|
||||
auto pair = lex_state_ids.find(item_set);
|
||||
if (pair == lex_state_ids.end()) {
|
||||
|
|
@ -57,7 +57,7 @@ namespace tree_sitter {
|
|||
return pair->second;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void add_error_lex_state() {
|
||||
LexItemSet item_set = build_lex_item_set(parse_table->symbols);
|
||||
add_advance_actions(item_set, LexTable::ERROR_STATE_ID);
|
||||
|
|
@ -84,7 +84,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
||||
for (const auto &item : item_set)
|
||||
if (item.is_token_start())
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ namespace tree_sitter {
|
|||
unordered_map<const ParseItemSet, ParseStateId> parse_state_ids;
|
||||
SymTransitions sym_transitions;
|
||||
ParseTable parse_table;
|
||||
|
||||
|
||||
ParseStateId add_parse_state(const ParseItemSet &item_set) {
|
||||
auto pair = parse_state_ids.find(item_set);
|
||||
if (pair == parse_state_ids.end()) {
|
||||
|
|
@ -46,18 +46,24 @@ namespace tree_sitter {
|
|||
for (const auto &transition : sym_transitions(item_set, grammar)) {
|
||||
const Symbol &symbol = transition.first;
|
||||
const ParseItemSet &item_set = transition.second;
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(symbol);
|
||||
|
||||
auto &actions = parse_table.states[state_id].actions;
|
||||
auto current_action = actions.find(symbol);
|
||||
|
||||
set<int> precedence_values = precedence_values_for_item_set(item_set);
|
||||
if (current_action == current_actions.end() ||
|
||||
if (current_action == actions.end() ||
|
||||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
|
||||
ParseStateId new_state_id = add_parse_state(item_set);
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
|
||||
}
|
||||
}
|
||||
|
||||
for (const Symbol &symbol : grammar.options.ubiquitous_tokens) {
|
||||
auto &actions = parse_table.states[state_id].actions;
|
||||
if (actions.find(symbol) == actions.end())
|
||||
parse_table.add_action(state_id, symbol, ParseAction::Shift(state_id, { 0 }));
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void add_reduce_actions(const ParseItemSet &item_set, ParseStateId state_id) {
|
||||
for (const ParseItem &item : item_set) {
|
||||
if (item.is_done()) {
|
||||
|
|
@ -66,7 +72,7 @@ namespace tree_sitter {
|
|||
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
|
||||
auto current_actions = parse_table.states[state_id].actions;
|
||||
auto current_action = current_actions.find(item.lookahead_sym);
|
||||
|
||||
|
||||
if (current_action == current_actions.end() ||
|
||||
conflict_manager.resolve_parse_action(item.lookahead_sym, current_action->second, action)) {
|
||||
parse_table.add_action(state_id, item.lookahead_sym, action);
|
||||
|
|
@ -74,7 +80,7 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
|
||||
set<int> result;
|
||||
for (const auto &item : item_set)
|
||||
|
|
|
|||
|
|
@ -75,6 +75,7 @@ namespace tree_sitter {
|
|||
state_and_symbol_counts(),
|
||||
symbol_enum(),
|
||||
symbol_names_list(),
|
||||
ubiquitous_symbols_list(),
|
||||
hidden_symbols_list(),
|
||||
lex_function(),
|
||||
lex_states_list(),
|
||||
|
|
@ -221,6 +222,13 @@ namespace tree_sitter {
|
|||
return result + "};";
|
||||
}
|
||||
|
||||
string ubiquitous_symbols_list() {
|
||||
string result = "UBIQUITOUS_SYMBOLS = {\n";
|
||||
for (auto &symbol : syntax_grammar.options.ubiquitous_tokens)
|
||||
result += indent("[" + symbol_id(symbol) + "] = 1,") + "\n";
|
||||
return result + "};";
|
||||
}
|
||||
|
||||
string hidden_symbols_list() {
|
||||
string result = "HIDDEN_SYMBOLS = {\n";
|
||||
for (auto &symbol : parse_table.symbols)
|
||||
|
|
|
|||
|
|
@ -7,7 +7,12 @@ namespace tree_sitter {
|
|||
using rules::rule_ptr;
|
||||
|
||||
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules) :
|
||||
rules(rules) {}
|
||||
rules(rules),
|
||||
options({}) {}
|
||||
|
||||
Grammar::Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules, GrammarOptions options) :
|
||||
rules(rules),
|
||||
options(options) {}
|
||||
|
||||
bool Grammar::operator==(const Grammar &other) const {
|
||||
if (other.rules.size() != rules.size()) return false;
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ namespace tree_sitter {
|
|||
aux_rules.insert(aux_rules.end(), expander.aux_rules.begin(), expander.aux_rules.end());
|
||||
}
|
||||
|
||||
return PreparedGrammar(rules, aux_rules);
|
||||
return PreparedGrammar(rules, aux_rules, grammar.options);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -46,16 +46,20 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
rule_ptr apply_to(const Symbol *rule) {
|
||||
auto replacement_pair = replacements.find(*rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
return replacement_pair->second.copy();
|
||||
else if (rule->is_built_in())
|
||||
return rule->copy();
|
||||
else
|
||||
return make_shared<Symbol>(new_index_for_symbol(*rule), rule->options);
|
||||
return replace_symbol(*rule).copy();
|
||||
}
|
||||
|
||||
public:
|
||||
Symbol replace_symbol(const Symbol &rule) {
|
||||
auto replacement_pair = replacements.find(rule);
|
||||
if (replacement_pair != replacements.end())
|
||||
return replacement_pair->second;
|
||||
else if (rule.is_built_in())
|
||||
return rule;
|
||||
else
|
||||
return Symbol(new_index_for_symbol(rule), rule.options);
|
||||
}
|
||||
|
||||
SymbolInliner(const map<Symbol, Symbol> &replacements, size_t rule_count, size_t aux_rule_count) :
|
||||
replacements(replacements)
|
||||
{}
|
||||
|
|
@ -131,13 +135,21 @@ namespace tree_sitter {
|
|||
aux_tokens.insert(aux_tokens.end(), extractor.tokens.begin(), extractor.tokens.end());
|
||||
|
||||
SymbolInliner inliner(symbol_replacements, input_grammar.rules.size(), input_grammar.aux_rules.size());
|
||||
|
||||
vector<Symbol> ubiquitous_tokens;
|
||||
for (auto &pair : rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &pair : aux_rules)
|
||||
pair.second = inliner.apply(pair.second);
|
||||
for (auto &symbol : input_grammar.options.ubiquitous_tokens) {
|
||||
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
|
||||
}
|
||||
|
||||
PreparedGrammarOptions parse_options(input_grammar.options);
|
||||
parse_options.ubiquitous_tokens = ubiquitous_tokens;
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, aux_rules),
|
||||
PreparedGrammar(rules, aux_rules, parse_options),
|
||||
PreparedGrammar(tokens, aux_tokens)
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,19 +19,32 @@ namespace tree_sitter {
|
|||
using rules::IdentityRuleFn::apply_to;
|
||||
|
||||
rule_ptr apply_to(const rules::NamedSymbol *rule) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule->name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
missing_rule_name = rule->name;
|
||||
return rule_ptr();
|
||||
auto result = symbol_for_rule_name(rule->name);
|
||||
if (!result.get()) missing_rule_name = rule->name;
|
||||
return result;
|
||||
}
|
||||
|
||||
public:
|
||||
std::shared_ptr<rules::Symbol> symbol_for_rule_name(string rule_name) {
|
||||
for (size_t i = 0; i < grammar.rules.size(); i++)
|
||||
if (grammar.rules[i].first == rule_name)
|
||||
return make_shared<rules::Symbol>(i);
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
explicit InternSymbols(const Grammar &grammar) : grammar(grammar) {}
|
||||
const Grammar grammar;
|
||||
string missing_rule_name;
|
||||
};
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> missing_rule_error(string rule_name) {
|
||||
return {
|
||||
PreparedGrammar({}, {}),
|
||||
new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + rule_name + "'")
|
||||
};
|
||||
}
|
||||
|
||||
pair<PreparedGrammar, const GrammarError *> intern_symbols(const Grammar &grammar) {
|
||||
InternSymbols interner(grammar);
|
||||
vector<pair<string, rule_ptr>> rules;
|
||||
|
|
@ -39,15 +52,22 @@ namespace tree_sitter {
|
|||
for (auto &pair : grammar.rules) {
|
||||
auto new_rule = interner.apply(pair.second);
|
||||
if (!interner.missing_rule_name.empty())
|
||||
return {
|
||||
PreparedGrammar({}, {}),
|
||||
new GrammarError(GrammarErrorTypeUndefinedSymbol,
|
||||
"Undefined rule '" + interner.missing_rule_name + "'")
|
||||
};
|
||||
return missing_rule_error(interner.missing_rule_name);
|
||||
rules.push_back({ pair.first, new_rule });
|
||||
}
|
||||
|
||||
return { PreparedGrammar(rules, {}), nullptr };
|
||||
vector<rules::Symbol> ubiquitous_tokens;
|
||||
for (auto &name : grammar.options.ubiquitous_tokens) {
|
||||
auto token = interner.symbol_for_rule_name(name);
|
||||
if (!token.get())
|
||||
return missing_rule_error(name);
|
||||
ubiquitous_tokens.push_back(*token);
|
||||
}
|
||||
|
||||
return {
|
||||
PreparedGrammar(rules, {}, PreparedGrammarOptions({ ubiquitous_tokens })),
|
||||
nullptr
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,7 +14,15 @@ namespace tree_sitter {
|
|||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules) :
|
||||
Grammar(rules),
|
||||
aux_rules(aux_rules) {}
|
||||
aux_rules(aux_rules),
|
||||
options({}) {}
|
||||
|
||||
PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
PreparedGrammarOptions options) :
|
||||
Grammar(rules),
|
||||
aux_rules(aux_rules),
|
||||
options(options) {}
|
||||
|
||||
const rule_ptr & PreparedGrammar::rule(const Symbol &symbol) const {
|
||||
return symbol.is_auxiliary() ?
|
||||
|
|
|
|||
|
|
@ -8,16 +8,24 @@
|
|||
#include "compiler/rules/symbol.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
struct PreparedGrammarOptions {
|
||||
std::vector<rules::Symbol> ubiquitous_tokens;
|
||||
};
|
||||
|
||||
class PreparedGrammar : public Grammar {
|
||||
public:
|
||||
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
|
||||
PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
|
||||
PreparedGrammarOptions options);
|
||||
|
||||
bool operator==(const PreparedGrammar &other) const;
|
||||
const std::string & rule_name(const rules::Symbol &symbol) const;
|
||||
const rules::rule_ptr & rule(const rules::Symbol &symbol) const;
|
||||
|
||||
const std::vector<std::pair<std::string, rules::rule_ptr>> aux_rules;
|
||||
const PreparedGrammarOptions options;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream &stream, const PreparedGrammar &grammar);
|
||||
|
|
|
|||
|
|
@ -57,51 +57,70 @@ size_t ts_stack_right_position(const ts_stack *stack) {
|
|||
return result;
|
||||
}
|
||||
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack, ts_symbol symbol, int immediate_child_count, const int *hidden_symbol_flags) {
|
||||
size_t new_stack_size = stack->size - immediate_child_count;
|
||||
int flags[immediate_child_count];
|
||||
ts_tree * ts_stack_reduce(ts_stack *stack,
|
||||
ts_symbol symbol,
|
||||
int immediate_child_count,
|
||||
const int *hidden_symbol_flags,
|
||||
const int *ubiquitous_symbol_flags) {
|
||||
|
||||
// First, walk down the stack to determine which symbols will be reduced.
|
||||
// The child node count is known ahead of time, but some of the
|
||||
// nodes at the top of the stack might be hidden nodes, in which
|
||||
// case we 'collapse' them. Some may also be ubiquitous tokens,
|
||||
// which don't count towards the child node count.
|
||||
static int collapse_flags[100];
|
||||
int child_count = 0;
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
ts_tree *child = stack->entries[stack->size - 1 - i].node;
|
||||
size_t grandchild_count;
|
||||
ts_tree **grandchildren = ts_tree_children(child, &grandchild_count);
|
||||
flags[i] = (
|
||||
hidden_symbol_flags[ts_tree_symbol(child)] ||
|
||||
ts_symbol symbol = ts_tree_symbol(child);
|
||||
|
||||
if (ubiquitous_symbol_flags[symbol])
|
||||
immediate_child_count++;
|
||||
|
||||
collapse_flags[i] = (
|
||||
hidden_symbol_flags[symbol] ||
|
||||
(grandchild_count == 1 && ts_tree_size(child) == ts_tree_size(grandchildren[0]))
|
||||
);
|
||||
child_count += (flags[i]) ? grandchild_count : 1;
|
||||
|
||||
child_count += (collapse_flags[i]) ? grandchild_count : 1;
|
||||
}
|
||||
|
||||
size_t child_index = 0;
|
||||
// Walk down the stack again, building up the array of children.
|
||||
// Though we collapse the hidden child nodes, we also need to
|
||||
// keep track of the actual immediate children so that we can
|
||||
// later collapse the stack again when the document is edited.
|
||||
// We store the children and immediate children in the same array,
|
||||
// to reduce allocations.
|
||||
size_t size = 0, offset = 0;
|
||||
size_t child_index = child_count;
|
||||
ts_tree **children = malloc((child_count + immediate_child_count) * sizeof(ts_tree *));
|
||||
ts_tree **immediate_children = children + child_count;
|
||||
|
||||
for (int i = 0; i < immediate_child_count; i++) {
|
||||
ts_tree *child = stack->entries[new_stack_size + i].node;
|
||||
immediate_children[i] = child;
|
||||
ts_tree *child = stack->entries[stack->size - 1 - i].node;
|
||||
immediate_children[immediate_child_count - 1 - i] = child;
|
||||
|
||||
if (i == 0) {
|
||||
offset = ts_tree_offset(child);
|
||||
size = ts_tree_size(child);
|
||||
if (collapse_flags[i]) {
|
||||
size_t grandchild_count;
|
||||
ts_tree **grandchildren = ts_tree_children(child, &grandchild_count);
|
||||
child_index -= grandchild_count;
|
||||
memcpy(children + child_index, grandchildren, (grandchild_count * sizeof(ts_tree *)));
|
||||
} else {
|
||||
size += ts_tree_offset(child) + ts_tree_size(child);
|
||||
child_index--;
|
||||
children[child_index] = child;
|
||||
}
|
||||
|
||||
if (flags[i]) {
|
||||
size_t grandchild_count;
|
||||
ts_tree ** grandchildren = ts_tree_children(child, &grandchild_count);
|
||||
memcpy(children + child_index, grandchildren, (grandchild_count * sizeof(ts_tree *)));
|
||||
child_index += grandchild_count;
|
||||
if (child_index == 0) {
|
||||
offset += ts_tree_offset(child);
|
||||
size += ts_tree_size(child);
|
||||
} else {
|
||||
children[child_index] = child;
|
||||
child_index++;
|
||||
size += ts_tree_offset(child) + ts_tree_size(child);
|
||||
}
|
||||
}
|
||||
|
||||
ts_tree *lookahead = ts_tree_make_node(symbol, child_count, immediate_child_count, children, size, offset);
|
||||
ts_stack_shrink(stack, new_stack_size);
|
||||
ts_stack_shrink(stack, stack->size - immediate_child_count);
|
||||
return lookahead;
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue