Fix handling of ubiquitous tokens used in grammar rules

This commit is contained in:
Max Brunsfeld 2014-07-01 20:47:35 -07:00
parent 59cc65c2e3
commit 83a1b9439e
30 changed files with 39086 additions and 32890 deletions

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -2,6 +2,7 @@
#define TREE_SITTER_COMPILER_H_
#include <vector>
#include <set>
#include <string>
#include <memory>
@ -29,8 +30,8 @@ namespace tree_sitter {
class Grammar {
protected:
const std::vector<std::pair<std::string, rules::rule_ptr>> rules_;
std::vector<std::string> ubiquitous_tokens_;
std::vector<char> separators_;
std::set<std::string> ubiquitous_tokens_;
std::set<char> separators_;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules);
@ -39,10 +40,10 @@ namespace tree_sitter {
const rules::rule_ptr rule(const std::string &name) const;
const std::vector<std::pair<std::string, rules::rule_ptr>> & rules() const;
const std::vector<std::string> & ubiquitous_tokens() const;
Grammar & ubiquitous_tokens(const std::vector<std::string> &ubiquitous_tokens);
const std::vector<char> & separators() const;
Grammar & separators(const std::vector<char> &separators);
const std::set<std::string> & ubiquitous_tokens() const;
Grammar & ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
const std::set<char> & separators() const;
Grammar & separators(const std::set<char> &separators);
};
struct Conflict {

View file

@ -5,7 +5,6 @@
extern "C" {
#endif
//#define TS_DEBUG_PARSE
// #define TS_DEBUG_LEX
#include "tree_sitter/runtime.h"

View file

@ -13,6 +13,7 @@ typedef enum {
TSParseActionTypeShift,
TSParseActionTypeShiftExtra,
TSParseActionTypeReduce,
TSParseActionTypeReduceExtra,
TSParseActionTypeAccept,
} TSParseActionType;
@ -33,6 +34,9 @@ typedef struct {
#define SHIFT_EXTRA() \
{ .type = TSParseActionTypeShiftExtra }
#define REDUCE_EXTRA(symbol_val) \
{ .type = TSParseActionTypeReduceExtra, .data = { .symbol = symbol_val } }
#define REDUCE(symbol_val, child_count_val) \
{ .type = TSParseActionTypeReduce, .data = { .symbol = symbol_val, .child_count = child_count_val } }

View file

@ -19,7 +19,7 @@ describe("building parse tables", []() {
LexicalGrammar lex_grammar({
{ "token0", pattern("[a-c]") },
{ "token1", pattern("[b-d]") },
}, {}, {});
}, {});
it("first looks for the start rule and its item set closure", [&]() {
auto result = build_parse_table(parse_grammar, lex_grammar);

View file

@ -14,13 +14,13 @@ describe("resolving parse conflicts", []() {
SyntaxGrammar parse_grammar({
{ "rule1", seq({ sym("rule2"), sym("token2") }) },
{ "rule2", sym("token1") },
}, {}, {});
}, {}, set<rules::Symbol>());
LexicalGrammar lex_grammar({
{ "token1", pattern("[a-c]") },
{ "token2", pattern("[b-d]") },
{ "token3", keyword("stuff") },
}, {}, {});
}, {}, set<char>());
describe("lexical conflicts", [&]() {
Symbol sym1(0, SymbolOptionToken);

View file

@ -46,7 +46,7 @@ describe("computing FIRST sets", []() {
i_token(2),
i_token(3),
i_token(4) }) }
}, {}, {});
}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -63,7 +63,7 @@ describe("computing FIRST sets", []() {
{ "rule0", choice({
i_token(0),
blank() }) }
}, {}, {});
}, {});
AssertThat(first_set(rule, grammar), Equals(set<Symbol>({
Symbol(0, SymbolOptionToken),
@ -79,7 +79,7 @@ describe("computing FIRST sets", []() {
seq({ i_sym(0), i_token(10) }),
i_token(11),
}) },
}, {}, {});
}, {});
auto rule = i_sym(0);

View file

@ -16,7 +16,7 @@ describe("computing closures of item sets", []() {
{ "T", seq({
i_token(12),
i_token(13) }) },
}, {}, {});
}, {});
it("adds items at the beginnings of referenced rules", [&]() {
ParseItemSet item_set = item_set_closure(ParseItem(Symbol(0), grammar.rule(Symbol(0)), 0),

View file

@ -31,7 +31,7 @@ describe("syntactic item set transitions", [&]() {
SyntaxGrammar grammar({
{ "A", blank() },
{ "B", i_token(21) },
}, {}, {});
}, {}, set<Symbol>());
it("computes the closure of the new item sets", [&]() {
ParseItemSet set1({

View file

@ -63,7 +63,7 @@ describe("checking if rules can be blank", [&]() {
{ "B", choice({
seq({ i_sym(1), i_token(12) }),
i_token(13) }) },
}, {}, {});
}, {}, set<Symbol>());
it("terminates for left-recursive rules that can be blank", [&]() {
rule = i_sym(0);

View file

@ -5,7 +5,7 @@ int main(int argc, char *argv[]) {
"",
"--no-color",
"--only="
// "compiles the javascript"
""
};
return bandit::run(4, const_cast<char **>(args));
}

View file

@ -12,7 +12,7 @@ describe("expanding repeat rules in a grammar", []() {
it("replaces repeat rules with pairs of recursive rules", [&]() {
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(0)) },
}, {}, {});
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);
@ -28,7 +28,7 @@ describe("expanding repeat rules in a grammar", []() {
it("replaces repeats inside of sequences", [&]() {
SyntaxGrammar grammar({
{ "rule0", seq({ i_token(10), repeat(i_token(11)) }) },
}, {}, {});
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);
@ -46,7 +46,7 @@ describe("expanding repeat rules in a grammar", []() {
it("replaces repeats inside of choices", [&]() {
SyntaxGrammar grammar({
{ "rule0", choice({ i_token(10), repeat(i_token(11)) }) },
}, {}, {});
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);
@ -64,7 +64,7 @@ describe("expanding repeat rules in a grammar", []() {
it("can replace multiple repeats in the same rule", [&]() {
SyntaxGrammar grammar({
{ "rule0", seq({ repeat(i_token(10)), repeat(i_token(11)) }) },
}, {}, {});
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);
@ -90,7 +90,7 @@ describe("expanding repeat rules in a grammar", []() {
SyntaxGrammar grammar({
{ "rule0", repeat(i_token(10)) },
{ "rule1", repeat(i_token(11)) },
}, {}, {});
}, {}, set<Symbol>());
auto match = expand_repeats(grammar);

View file

@ -15,7 +15,7 @@ describe("expanding token rules", []() {
i_sym(10),
pattern("x*"),
i_sym(11) }) },
}, {}, {});
}, {});
auto result = expand_tokens(grammar);
@ -34,7 +34,7 @@ describe("expanding token rules", []() {
i_sym(10),
str("xyz"),
i_sym(11) }) },
}, {}, {});
}, {});
auto result = expand_tokens(grammar);
@ -53,7 +53,7 @@ describe("expanding token rules", []() {
pattern("("),
str("xyz"),
pattern("[") }) },
}, {}, {});
}, {});
auto result = expand_tokens(grammar);

View file

@ -17,8 +17,8 @@ describe("extracting tokens from a grammar", []() {
{
{ "rule_A", seq({ str("ab"), i_sym(0) }) }
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -36,8 +36,8 @@ describe("extracting tokens from a grammar", []() {
{
{ "rule_A", seq({ pattern("a+"), i_sym(0) }) }
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -57,8 +57,8 @@ describe("extracting tokens from a grammar", []() {
token(seq({ pattern("."), choice({ str("a"), str("b") }) })),
i_sym(0) }) }
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -76,8 +76,8 @@ describe("extracting tokens from a grammar", []() {
{
{ "rule_A", choice({ i_sym(0), blank() }) },
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -93,8 +93,8 @@ describe("extracting tokens from a grammar", []() {
{
{ "rule_A", seq({ str("ab"), i_sym(0), str("ab") }) },
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -112,11 +112,11 @@ describe("extracting tokens from a grammar", []() {
{
{ "rule_A", str("ab") },
},
{},
set<Symbol>(),
{ 'x', 'y', 'z' }
});
AssertThat(result.second.separators, Equals(vector<char>({ 'x', 'y', 'z' })));
AssertThat(result.second.separators, Equals(set<char>({ 'x', 'y', 'z' })));
});
describe("when an entire rule can be extracted", [&]() {
@ -127,8 +127,8 @@ describe("extracting tokens from a grammar", []() {
{ "rule_B", pattern("a|b") },
{ "rule_C", token(seq({ str("a"), str("b") })) },
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -149,8 +149,8 @@ describe("extracting tokens from a grammar", []() {
{ "rule_B", i_sym(0) },
{ "rule_C", i_sym(1) },
},
{},
{}
set<Symbol>(),
set<char>()
});
AssertThat(result.first.rules, Equals(rule_list({
@ -172,10 +172,10 @@ describe("extracting tokens from a grammar", []() {
{ "rule_C", i_sym(1) },
},
{ Symbol(0) },
{}
set<char>()
});
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(set<Symbol>({
{ Symbol(0, SymbolOptionToken) }
})));
});

View file

@ -50,7 +50,7 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.second, Equals((GrammarError *)nullptr));
AssertThat(result.first.ubiquitous_tokens, Equals(vector<Symbol>({
AssertThat(result.first.ubiquitous_tokens, Equals(set<Symbol>({
Symbol(2)
})));
});
@ -62,7 +62,7 @@ describe("interning symbols in a grammar", []() {
auto result = intern_symbols(grammar);
AssertThat(result.first.separators, Equals(vector<char>({ 'x', 'y' })))
AssertThat(result.first.separators, Equals(set<char>({ 'x', 'y' })))
});
});

View file

@ -24,7 +24,7 @@ moreStuff();
recovers from errors in for loops
==========================================
stuff();
for (var i = 0; *nonsense*; *what*) {
for (var i = 0; *nonsense*; i++) {
*more-nonsense*;
}
moreStuff();
@ -34,7 +34,7 @@ moreStuff();
(for_statement
(var_declaration (assignment (identifier) (number)))
(expression_statement (ERROR '*'))
(ERROR '*')
(math_op (identifier))
(statement_block (expression_statement (ERROR '*'))))
(expression_statement (function_call (identifier))))

View file

@ -34,6 +34,18 @@ print(object.property);
(identifier)
(property_access (identifier) (identifier)))))
==========================================
parses property access across lines
==========================================
object
.someProperty
.otherProperty
---
(program (expression_statement
(property_access
(property_access (identifier) (identifier))
(identifier))))
===========================================
parses dynamic property access
==========================================

View file

@ -11,12 +11,16 @@
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/item_set_closure.h"
#include "compiler/build_tables/item_set_transitions.h"
#include "compiler/build_tables/first_set.h"
#include <iostream>
namespace tree_sitter {
using std::pair;
using std::string;
using std::vector;
using std::set;
using std::map;
using std::unordered_map;
using std::make_shared;
using rules::Symbol;
@ -33,8 +37,8 @@ namespace tree_sitter {
if (pair == parse_state_ids.end()) {
ParseStateId state_id = parse_table.add_state();
parse_state_ids[item_set] = state_id;
add_shift_actions(item_set, state_id);
add_reduce_actions(item_set, state_id);
add_shift_actions(item_set, state_id);
add_ubiquitous_token_actions(item_set, state_id);
return state_id;
} else {
@ -43,26 +47,34 @@ namespace tree_sitter {
}
void add_shift_actions(const ParseItemSet &item_set, ParseStateId state_id) {
map<Symbol, size_t> shifts;
for (const auto &transition : sym_transitions(item_set, grammar)) {
const Symbol &symbol = transition.first;
const ParseItemSet &next_item_set = transition.second;
auto &actions = parse_table.states[state_id].actions;
auto current_action = actions.find(symbol);
set<int> precedence_values = precedence_values_for_item_set(next_item_set);
if (current_action == actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, ParseAction::Shift(0, precedence_values))) {
ParseAction new_action = ParseAction::Shift(0, precedence_values_for_item_set(next_item_set));
if (should_add_action(state_id, symbol, new_action)) {
ParseStateId new_state_id = add_parse_state(next_item_set);
parse_table.add_action(state_id, symbol, ParseAction::Shift(new_state_id, precedence_values));
new_action.state_index = new_state_id;
parse_table.add_action(state_id, symbol, new_action);
shifts.insert({ symbol, new_state_id });
}
}
}
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
auto &actions = parse_table.states[state_id].actions;
if (actions.find(symbol) == actions.end())
parse_table.add_action(state_id, symbol, ParseAction::ShiftExtra());
for (auto &pair : shifts) {
const Symbol &shift_symbol = pair.first;
size_t new_state_id = pair.second;
if (grammar.ubiquitous_tokens.find(shift_symbol) != grammar.ubiquitous_tokens.end()) {
for (const auto &pair : parse_table.states[state_id].actions) {
const Symbol &lookahead_sym = pair.first;
ParseAction action = ParseAction::ReduceExtra(shift_symbol);
if (should_add_action(new_state_id, lookahead_sym, action))
parse_table.add_action(new_state_id, lookahead_sym, action);
}
}
}
}
@ -75,19 +87,39 @@ namespace tree_sitter {
ParseAction action = (item.lhs == rules::START()) ?
ParseAction::Accept() :
ParseAction::Reduce(item.lhs, item.consumed_symbol_count, item.precedence());
for (auto &lookahead_sym : lookahead_symbols) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(lookahead_sym);
if (current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(lookahead_sym, current_action->second, action)) {
for (auto &lookahead_sym : lookahead_symbols)
if (should_add_action(state_id, lookahead_sym, action))
parse_table.add_action(state_id, lookahead_sym, action);
}
}
}
}
}
void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
for (const Symbol &symbol : grammar.ubiquitous_tokens) {
auto &actions = parse_table.states[state_id].actions;
if (actions.find(symbol) == actions.end())
parse_table.add_action(state_id, symbol, ParseAction::ShiftExtra());
}
}
set<Symbol> first_set_for_item_set(const ParseItemSet &item_set) {
set<Symbol> result;
for (const auto &pair : item_set) {
auto new_set = first_set(pair.first.rule, grammar);
result.insert(new_set.begin(), new_set.end());
}
return result;
}
bool should_add_action(size_t state_id, const Symbol &symbol, const ParseAction &action) {
auto current_actions = parse_table.states[state_id].actions;
auto current_action = current_actions.find(symbol);
return (
current_action == current_actions.end() ||
conflict_manager.resolve_parse_action(symbol, current_action->second, action)
);
}
set<int> precedence_values_for_item_set(const ParseItemSet &item_set) {
set<int> result;
for (const auto &pair : item_set) {

View file

@ -290,6 +290,9 @@ namespace tree_sitter {
symbol_id(action.symbol) + ", " +
to_string(action.consumed_symbol_count) + ")");
break;
case ParseActionTypeReduceExtra:
add("REDUCE_EXTRA(" + symbol_id(action.symbol) + ")");
break;
default:;
}
}

View file

@ -2,9 +2,10 @@
#include "compiler/rules/rule.h"
namespace tree_sitter {
using std::string;
using std::ostream;
using std::pair;
using std::set;
using std::string;
using std::vector;
using rules::rule_ptr;
@ -59,20 +60,20 @@ namespace tree_sitter {
return stream << string("#<null>");
}
const vector<string> & Grammar::ubiquitous_tokens() const {
const set<string> & Grammar::ubiquitous_tokens() const {
return ubiquitous_tokens_;
}
Grammar & Grammar::ubiquitous_tokens(const vector<string> &ubiquitous_tokens) {
Grammar & Grammar::ubiquitous_tokens(const set<string> &ubiquitous_tokens) {
ubiquitous_tokens_ = ubiquitous_tokens;
return *this;
}
const vector<char> & Grammar::separators() const {
const set<char> & Grammar::separators() const {
return separators_;
}
Grammar & Grammar::separators(const vector<char> &separators) {
Grammar & Grammar::separators(const set<char> &separators) {
separators_ = separators;
return *this;
}

View file

@ -39,7 +39,11 @@ namespace tree_sitter {
}
ParseAction ParseAction::ShiftExtra() {
return ParseAction(ParseActionTypeShiftExtra, -1, Symbol(-1), 0, set<int>({}));
return ParseAction(ParseActionTypeShiftExtra, -1, Symbol(-1), 0, { 0 });
}
ParseAction ParseAction::ReduceExtra(Symbol symbol) {
return ParseAction(ParseActionTypeReduceExtra, -1, symbol, 0, { 0 });
}
ParseAction ParseAction::Reduce(Symbol symbol, size_t consumed_symbol_count, int precedence) {

View file

@ -14,6 +14,7 @@ namespace tree_sitter {
ParseActionTypeShift,
ParseActionTypeShiftExtra,
ParseActionTypeReduce,
ParseActionTypeReduceExtra,
ParseActionTypeAccept,
} ParseActionType;
@ -28,8 +29,9 @@ namespace tree_sitter {
static ParseAction Accept();
static ParseAction Error();
static ParseAction Shift(size_t state_index, std::set<int> precedence_values);
static ParseAction ShiftExtra();
static ParseAction Reduce(rules::Symbol symbol, size_t consumed_symbol_count, int precedence);
static ParseAction ShiftExtra();
static ParseAction ReduceExtra(rules::Symbol symbol);
bool operator==(const ParseAction &action) const;
ParseActionType type;

View file

@ -51,14 +51,14 @@ namespace tree_sitter {
for (auto &pair : grammar.rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar({}, {}, {}), expander.error };
return { LexicalGrammar(), expander.error };
rules.push_back({ pair.first, rule });
}
for (auto &pair : grammar.aux_rules) {
auto rule = expander.apply(pair.second);
if (expander.error)
return { LexicalGrammar({}, {}, {}), expander.error };
return { LexicalGrammar(), expander.error };
aux_rules.push_back({ pair.first, rule });
}

View file

@ -1,6 +1,7 @@
#include "compiler/prepare_grammar/extract_tokens.h"
#include <map>
#include <vector>
#include <set>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/prepared_grammar.h"
@ -18,6 +19,7 @@ namespace tree_sitter {
using std::map;
using std::to_string;
using std::vector;
using std::set;
using std::make_shared;
using rules::rule_ptr;
using rules::Symbol;
@ -96,7 +98,7 @@ namespace tree_sitter {
pair<SyntaxGrammar, LexicalGrammar> extract_tokens(const InternedGrammar &input_grammar) {
vector<pair<string, rule_ptr>> rules, tokens, aux_rules, aux_tokens;
vector<Symbol> ubiquitous_tokens;
set<Symbol> ubiquitous_tokens;
TokenExtractor extractor;
map<Symbol, Symbol> symbol_replacements;
@ -120,7 +122,7 @@ namespace tree_sitter {
for (auto &pair : rules)
pair.second = inliner.apply(pair.second);
for (auto &symbol : input_grammar.ubiquitous_tokens)
ubiquitous_tokens.push_back(inliner.replace_symbol(symbol));
ubiquitous_tokens.insert(inliner.replace_symbol(symbol));
return {
SyntaxGrammar(rules, aux_rules, ubiquitous_tokens),

View file

@ -1,6 +1,7 @@
#include "compiler/prepare_grammar/intern_symbols.h"
#include <memory>
#include <vector>
#include <set>
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/interned_grammar.h"
#include "compiler/prepared_grammar.h"
@ -12,6 +13,7 @@ namespace tree_sitter {
using std::string;
using rules::rule_ptr;
using std::vector;
using std::set;
using std::pair;
using std::make_shared;
@ -58,12 +60,12 @@ namespace tree_sitter {
rules.push_back({ pair.first, new_rule });
}
vector<rules::Symbol> ubiquitous_tokens;
set<rules::Symbol> ubiquitous_tokens;
for (auto &name : grammar.ubiquitous_tokens()) {
auto token = interner.symbol_for_rule_name(name);
if (!token.get())
return missing_rule_error(name);
ubiquitous_tokens.push_back(*token);
ubiquitous_tokens.insert(*token);
}
InternedGrammar result;

View file

@ -3,6 +3,7 @@
#include <utility>
#include <vector>
#include <set>
#include <string>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
@ -12,8 +13,8 @@ namespace tree_sitter {
class InternedGrammar {
public:
std::vector<std::pair<std::string, rules::rule_ptr>> rules;
std::vector<rules::Symbol> ubiquitous_tokens;
std::vector<char> separators;
std::set<rules::Symbol> ubiquitous_tokens;
std::set<char> separators;
};
}
}

View file

@ -8,6 +8,7 @@ namespace tree_sitter {
using std::string;
using std::pair;
using std::vector;
using std::set;
const rules::rule_ptr & PreparedGrammar::rule(const rules::Symbol &symbol) const {
return symbol.is_auxiliary() ?
@ -25,6 +26,16 @@ namespace tree_sitter {
SyntaxGrammar::SyntaxGrammar() {}
LexicalGrammar::LexicalGrammar() {}
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
PreparedGrammar(rules, aux_rules) {}
PreparedGrammar::PreparedGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules) :
@ -34,14 +45,14 @@ namespace tree_sitter {
SyntaxGrammar::SyntaxGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const vector<rules::Symbol> &ubiquitous_tokens) :
const set<rules::Symbol> &ubiquitous_tokens) :
PreparedGrammar(rules, aux_rules),
ubiquitous_tokens(ubiquitous_tokens) {}
LexicalGrammar::LexicalGrammar(
const vector<pair<string, rules::rule_ptr>> &rules,
const vector<pair<string, rules::rule_ptr>> &aux_rules,
const vector<char> &separators) :
const set<char> &separators) :
PreparedGrammar(rules, aux_rules),
separators(separators) {}
}

View file

@ -3,6 +3,7 @@
#include <vector>
#include <string>
#include <set>
#include <utility>
#include "tree_sitter/compiler.h"
#include "compiler/rules/symbol.h"
@ -25,23 +26,29 @@ namespace tree_sitter {
class SyntaxGrammar : public PreparedGrammar {
public:
SyntaxGrammar();
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
SyntaxGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::vector<rules::Symbol> &ubiquitous_tokens);
const std::set<rules::Symbol> &ubiquitous_tokens);
std::vector<rules::Symbol> ubiquitous_tokens;
std::set<rules::Symbol> ubiquitous_tokens;
};
class LexicalGrammar : public PreparedGrammar {
public:
LexicalGrammar();
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules);
LexicalGrammar(
const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,
const std::vector<std::pair<std::string, rules::rule_ptr>> &aux_rules,
const std::vector<char> &separators);
const std::set<char> &separators);
std::vector<char> separators;
std::set<char> separators;
};
}

View file

@ -9,20 +9,37 @@ static const TSParseAction * actions_for_state(TSStateMachine *machine, TSStateI
return machine->config.parse_table + (state * machine->config.symbol_count);
}
void shift(TSStateMachine *machine, TSStateId parse_state, int is_extra) {
machine->lookahead->is_extra = is_extra;
void shift(TSStateMachine *machine, TSStateId parse_state) {
if (machine->lookahead->is_extra)
parse_state = ts_stack_top_state(&machine->stack);
ts_stack_push(&machine->stack, parse_state, machine->lookahead);
machine->lookahead = machine->next_lookahead;
machine->next_lookahead = NULL;
}
void shift_extra(TSStateMachine *machine) {
machine->lookahead->is_extra = 1;
shift(machine, 0);
}
void reduce(TSStateMachine *machine, TSSymbol symbol, size_t child_count) {
machine->next_lookahead = machine->lookahead;
machine->lookahead = ts_stack_reduce(&machine->stack,
symbol,
child_count,
machine->config.hidden_symbol_flags,
1);
machine->lookahead = ts_stack_reduce(
&machine->stack,
symbol,
child_count,
machine->config.hidden_symbol_flags, 1);
}
int reduce_extra(TSStateMachine *machine, TSSymbol symbol) {
TSTree *top_node = ts_stack_top_node(&machine->stack);
if (top_node->symbol == symbol && !top_node->is_extra) {
reduce(machine, symbol, 1);
machine->lookahead->is_extra = 1;
return 1;
} else {
return 0;
}
}
static size_t breakdown_stack(TSStateMachine *machine, TSInputEdit *edit) {
@ -199,6 +216,8 @@ void ts_state_machine_initialize(TSStateMachine *machine, TSInput input, TSInput
ts_lexer_advance(&machine->lexer);
}
// #define TS_DEBUG_PARSE
#ifdef TS_DEBUG_PARSE
#include <stdio.h>
#define DEBUG_PARSE(...) fprintf(stderr, "\n" __VA_ARGS__)
@ -212,26 +231,33 @@ TSTree * ts_state_machine_parse(TSStateMachine *machine, const char **symbol_nam
switch (action.type) {
case TSParseActionTypeShift:
DEBUG_PARSE("SHIFT %d", action.data.to_state);
shift(machine, action.data.to_state, 0);
shift(machine, action.data.to_state);
return NULL;
case TSParseActionTypeShiftExtra:
DEBUG_PARSE("SHIFT EXTRA");
shift(machine, ts_stack_top_state(&machine->stack), 1);
shift_extra(machine);
return NULL;
case TSParseActionTypeReduce:
DEBUG_PARSE("REDUCE %s %d", symbol_names[action.data.symbol], action.data.child_count);
reduce(machine, action.data.symbol, action.data.child_count);
return NULL;
case TSParseActionTypeReduceExtra:
if (!reduce_extra(machine, action.data.symbol))
goto error;
DEBUG_PARSE("REDUCE EXTRA");
return NULL;
case TSParseActionTypeAccept:
DEBUG_PARSE("ACCEPT");
return get_tree_root(machine);
case TSParseActionTypeError:
DEBUG_PARSE("ERROR");
if (handle_error(machine))
return NULL;
else
return get_tree_root(machine);
goto error;
default:
return NULL;
}
error:
DEBUG_PARSE("ERROR");
if (handle_error(machine))
return NULL;
else
return get_tree_root(machine);
}