2015-09-19 13:19:49 -07:00
|
|
|
#include "compiler/build_tables/build_lex_table.h"
|
2015-11-08 13:36:09 -08:00
|
|
|
#include <climits>
|
2014-05-04 22:07:52 -07:00
|
|
|
#include <map>
|
|
|
|
|
#include <set>
|
2014-10-12 12:44:16 -07:00
|
|
|
#include <string>
|
2014-05-04 22:07:52 -07:00
|
|
|
#include <unordered_map>
|
2014-10-12 12:44:16 -07:00
|
|
|
#include <utility>
|
|
|
|
|
#include <vector>
|
2015-03-16 11:59:45 -07:00
|
|
|
#include "compiler/build_tables/lex_conflict_manager.h"
|
2014-10-12 12:44:16 -07:00
|
|
|
#include "compiler/build_tables/lex_item.h"
|
2015-10-28 17:45:17 -07:00
|
|
|
#include "compiler/build_tables/does_match_any_line.h"
|
2014-10-12 12:44:16 -07:00
|
|
|
#include "compiler/parse_table.h"
|
2015-01-12 23:01:52 -08:00
|
|
|
#include "compiler/lexical_grammar.h"
|
2014-05-04 22:07:52 -07:00
|
|
|
#include "compiler/rules/built_in_symbols.h"
|
2014-09-07 22:16:45 -07:00
|
|
|
#include "compiler/rules/choice.h"
|
2014-10-12 12:44:16 -07:00
|
|
|
#include "compiler/rules/metadata.h"
|
2014-05-04 22:07:52 -07:00
|
|
|
#include "compiler/rules/repeat.h"
|
|
|
|
|
#include "compiler/rules/seq.h"
|
2015-10-12 15:33:00 -07:00
|
|
|
#include "compiler/rules/blank.h"
|
2014-05-04 22:07:52 -07:00
|
|
|
|
|
|
|
|
namespace tree_sitter {
|
2014-07-20 21:43:27 -07:00
|
|
|
namespace build_tables {
|
|
|
|
|
|
2014-10-12 12:44:16 -07:00
|
|
|
using std::make_shared;
|
2014-07-20 21:43:27 -07:00
|
|
|
using std::map;
|
|
|
|
|
using std::set;
|
2014-10-12 12:44:16 -07:00
|
|
|
using std::string;
|
|
|
|
|
using std::unordered_map;
|
2014-09-07 22:16:45 -07:00
|
|
|
using std::vector;
|
2014-07-20 21:43:27 -07:00
|
|
|
using rules::CharacterSet;
|
2014-10-12 12:44:16 -07:00
|
|
|
using rules::Symbol;
|
2014-07-20 21:43:27 -07:00
|
|
|
|
|
|
|
|
class LexTableBuilder {
|
|
|
|
|
const LexicalGrammar lex_grammar;
|
2015-12-10 21:05:54 -08:00
|
|
|
LexConflictManager conflict_manager;
|
2014-07-20 21:43:27 -07:00
|
|
|
ParseTable *parse_table;
|
2015-10-05 15:13:43 -07:00
|
|
|
unordered_map<const LexItemSet, LexStateId, LexItemSet::Hash> lex_state_ids;
|
2014-07-20 21:43:27 -07:00
|
|
|
LexTable lex_table;
|
2015-10-12 15:33:00 -07:00
|
|
|
vector<rule_ptr> separator_rules;
|
2014-07-20 21:43:27 -07:00
|
|
|
|
2014-10-12 12:44:16 -07:00
|
|
|
public:
|
|
|
|
|
LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar)
|
2015-07-27 18:29:48 -07:00
|
|
|
: lex_grammar(lex_grammar),
|
2015-10-05 17:02:59 -07:00
|
|
|
parse_table(parse_table) {
|
2015-10-05 18:02:59 -07:00
|
|
|
for (const rule_ptr &rule : lex_grammar.separators)
|
2015-10-12 15:33:00 -07:00
|
|
|
separator_rules.push_back(rules::Repeat::build(rule));
|
|
|
|
|
separator_rules.push_back(rules::Blank::build());
|
2015-10-05 17:02:59 -07:00
|
|
|
}
|
2014-10-12 12:44:16 -07:00
|
|
|
|
|
|
|
|
LexTable build() {
|
2015-10-05 18:02:59 -07:00
|
|
|
for (ParseState &parse_state : parse_table->states) {
|
2015-10-30 16:07:29 -07:00
|
|
|
LexItemSet item_set =
|
|
|
|
|
build_lex_item_set(parse_state.expected_inputs(), false);
|
2014-10-12 12:44:16 -07:00
|
|
|
parse_state.lex_state_id = add_lex_state(item_set);
|
|
|
|
|
}
|
2015-10-05 18:02:59 -07:00
|
|
|
|
2015-12-04 20:56:33 -08:00
|
|
|
LexItemSet error_item_set =
|
|
|
|
|
build_lex_item_set(parse_table->all_symbols(), true);
|
2015-10-05 18:02:59 -07:00
|
|
|
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
|
|
|
|
|
|
2015-12-10 21:05:54 -08:00
|
|
|
mark_fragile_tokens();
|
|
|
|
|
|
2014-10-12 12:44:16 -07:00
|
|
|
return lex_table;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
private:
|
2015-10-28 17:45:17 -07:00
|
|
|
LexItemSet build_lex_item_set(const set<Symbol> &symbols, bool error) {
|
2014-07-20 21:43:27 -07:00
|
|
|
LexItemSet result;
|
2015-10-05 18:02:59 -07:00
|
|
|
for (const Symbol &symbol : symbols) {
|
2015-10-12 15:33:00 -07:00
|
|
|
vector<rule_ptr> rules;
|
|
|
|
|
if (symbol == rules::ERROR()) {
|
2014-07-20 21:43:27 -07:00
|
|
|
continue;
|
2015-10-12 15:33:00 -07:00
|
|
|
} else if (symbol == rules::END_OF_INPUT()) {
|
|
|
|
|
rules.push_back(CharacterSet().include(0).copy());
|
|
|
|
|
} else if (symbol.is_token) {
|
|
|
|
|
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
|
2015-10-28 17:45:17 -07:00
|
|
|
if (error && does_match_any_line(rule))
|
|
|
|
|
continue;
|
|
|
|
|
|
2015-10-12 15:33:00 -07:00
|
|
|
auto choice = rule->as<rules::Choice>();
|
|
|
|
|
if (choice)
|
|
|
|
|
for (const rule_ptr &element : choice->elements)
|
|
|
|
|
rules.push_back(element);
|
|
|
|
|
else
|
|
|
|
|
rules.push_back(rule);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (const rule_ptr &rule : rules)
|
|
|
|
|
for (const rule_ptr &separator_rule : separator_rules)
|
|
|
|
|
result.entries.insert(LexItem(
|
2015-12-04 20:56:33 -08:00
|
|
|
symbol,
|
|
|
|
|
rules::Metadata::build(
|
2015-11-08 13:36:09 -08:00
|
|
|
rules::Seq::build({
|
2015-12-04 20:56:33 -08:00
|
|
|
rules::Metadata::build(separator_rule,
|
|
|
|
|
{ { rules::START_TOKEN, 1 } }),
|
|
|
|
|
rules::Metadata::build(rule, { { rules::PRECEDENCE, 0 } }),
|
|
|
|
|
}),
|
|
|
|
|
{
|
|
|
|
|
{ rules::PRECEDENCE, INT_MIN }, { rules::IS_ACTIVE, true },
|
2015-11-08 13:36:09 -08:00
|
|
|
})));
|
2014-07-20 21:43:27 -07:00
|
|
|
}
|
2015-10-12 15:33:00 -07:00
|
|
|
|
2014-07-20 21:43:27 -07:00
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
LexStateId add_lex_state(const LexItemSet &item_set) {
|
2014-10-12 12:44:16 -07:00
|
|
|
const auto &pair = lex_state_ids.find(item_set);
|
2014-07-20 21:43:27 -07:00
|
|
|
if (pair == lex_state_ids.end()) {
|
|
|
|
|
LexStateId state_id = lex_table.add_state();
|
|
|
|
|
lex_state_ids[item_set] = state_id;
|
2015-10-05 18:02:59 -07:00
|
|
|
populate_lex_state(item_set, state_id);
|
2014-07-20 21:43:27 -07:00
|
|
|
return state_id;
|
|
|
|
|
} else {
|
|
|
|
|
return pair->second;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2015-10-05 18:02:59 -07:00
|
|
|
void populate_lex_state(const LexItemSet &item_set, LexStateId state_id) {
|
|
|
|
|
add_accept_token_actions(item_set, state_id);
|
|
|
|
|
add_advance_actions(item_set, state_id);
|
|
|
|
|
add_token_start(item_set, state_id);
|
2014-07-20 21:43:27 -07:00
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void add_advance_actions(const LexItemSet &item_set, LexStateId state_id) {
|
2015-10-05 15:13:43 -07:00
|
|
|
for (const auto &transition : item_set.transitions()) {
|
2015-10-05 18:02:59 -07:00
|
|
|
const CharacterSet &rule = transition.first;
|
2015-10-30 16:07:29 -07:00
|
|
|
const LexItemSet &new_item_set = transition.second.first;
|
|
|
|
|
const PrecedenceRange &precedence = transition.second.second;
|
|
|
|
|
auto current_action = lex_table.state(state_id).default_action;
|
|
|
|
|
auto action = LexAction::Advance(-1, precedence);
|
|
|
|
|
if (conflict_manager.resolve(action, current_action)) {
|
|
|
|
|
action.state_index = add_lex_state(new_item_set);
|
2014-07-20 21:43:27 -07:00
|
|
|
lex_table.state(state_id).actions[rule] = action;
|
2015-10-30 16:07:29 -07:00
|
|
|
}
|
2014-07-20 21:43:27 -07:00
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
2014-09-09 13:15:40 -07:00
|
|
|
void add_accept_token_actions(const LexItemSet &item_set, LexStateId state_id) {
|
2015-10-05 15:13:43 -07:00
|
|
|
for (const LexItem &item : item_set.entries) {
|
2015-10-30 16:48:37 -07:00
|
|
|
LexItem::CompletionStatus completion_status = item.completion_status();
|
2015-09-02 13:02:52 -07:00
|
|
|
if (completion_status.is_done) {
|
2014-07-20 21:43:27 -07:00
|
|
|
auto current_action = lex_table.state(state_id).default_action;
|
2015-12-04 20:56:33 -08:00
|
|
|
auto action =
|
|
|
|
|
LexAction::Accept(item.lhs, completion_status.precedence.max,
|
|
|
|
|
completion_status.is_string);
|
2015-10-30 16:07:29 -07:00
|
|
|
if (conflict_manager.resolve(action, current_action))
|
|
|
|
|
lex_table.state(state_id).default_action = action;
|
2014-07-20 21:43:27 -07:00
|
|
|
}
|
2015-09-02 13:02:52 -07:00
|
|
|
}
|
2014-07-20 21:43:27 -07:00
|
|
|
}
|
|
|
|
|
|
2014-09-02 07:41:29 -07:00
|
|
|
void add_token_start(const LexItemSet &item_set, LexStateId state_id) {
|
2015-10-05 15:13:43 -07:00
|
|
|
for (const auto &item : item_set.entries)
|
2014-09-02 07:41:29 -07:00
|
|
|
if (item.is_token_start())
|
|
|
|
|
lex_table.state(state_id).is_token_start = true;
|
|
|
|
|
}
|
2015-12-10 21:05:54 -08:00
|
|
|
|
|
|
|
|
void mark_fragile_tokens() {
|
|
|
|
|
for (LexState &state : lex_table.states)
|
|
|
|
|
if (state.default_action.type == LexActionTypeAccept)
|
2015-12-17 10:05:42 -08:00
|
|
|
if (conflict_manager.fragile_tokens.count(state.default_action.symbol))
|
2015-12-10 21:05:54 -08:00
|
|
|
state.default_action.type = LexActionTypeAcceptFragile;
|
|
|
|
|
}
|
2014-07-20 21:43:27 -07:00
|
|
|
};
|
|
|
|
|
|
2014-10-12 12:44:16 -07:00
|
|
|
LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) {
|
|
|
|
|
return LexTableBuilder(table, grammar).build();
|
2014-05-04 22:07:52 -07:00
|
|
|
}
|
2014-07-20 21:43:27 -07:00
|
|
|
|
|
|
|
|
} // namespace build_tables
|
|
|
|
|
} // namespace tree_sitter
|