2014-03-09 21:37:21 -07:00
|
|
|
#include "compiler/rules/pattern.h"
|
2014-02-16 22:13:08 -08:00
|
|
|
#include <set>
|
2014-03-09 22:45:33 -07:00
|
|
|
#include <string>
|
2014-03-09 21:37:21 -07:00
|
|
|
#include "compiler/rules/visitor.h"
|
|
|
|
|
#include "compiler/rules/choice.h"
|
|
|
|
|
#include "compiler/rules/seq.h"
|
|
|
|
|
#include "compiler/rules/repeat.h"
|
|
|
|
|
#include "compiler/rules/character_set.h"
|
2013-11-14 21:25:58 -08:00
|
|
|
|
2013-11-10 14:24:25 -08:00
|
|
|
namespace tree_sitter {
|
|
|
|
|
namespace rules {
|
2014-02-16 22:13:08 -08:00
|
|
|
using std::string;
|
|
|
|
|
using std::hash;
|
|
|
|
|
using std::make_shared;
|
|
|
|
|
using std::set;
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
class PatternParser {
|
|
|
|
|
public:
|
|
|
|
|
PatternParser(const string &input) :
|
|
|
|
|
input(input),
|
2014-02-18 09:07:00 -08:00
|
|
|
length(input.length()),
|
|
|
|
|
position(0) {}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
rule_ptr rule() {
|
|
|
|
|
auto result = term();
|
|
|
|
|
while (has_more_input() && peek() == '|') {
|
|
|
|
|
next();
|
2014-02-16 22:13:08 -08:00
|
|
|
result = make_shared<Choice>(result, term());
|
2013-11-14 21:25:58 -08:00
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
private:
|
|
|
|
|
rule_ptr term() {
|
|
|
|
|
rule_ptr result = factor();
|
|
|
|
|
while (has_more_input() && (peek() != '|') && (peek() != ')'))
|
2014-02-16 22:13:08 -08:00
|
|
|
result = Seq::Build({ result, factor() });
|
2013-11-14 21:25:58 -08:00
|
|
|
return result;
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
rule_ptr factor() {
|
2013-11-15 13:35:35 -08:00
|
|
|
rule_ptr result = atom();
|
|
|
|
|
if (has_more_input() && (peek() == '+')) {
|
|
|
|
|
next();
|
2014-02-16 22:13:08 -08:00
|
|
|
result = make_shared<Repeat>(result);
|
2013-11-15 13:35:35 -08:00
|
|
|
}
|
|
|
|
|
return result;
|
2013-11-14 21:25:58 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
rule_ptr atom() {
|
|
|
|
|
rule_ptr result;
|
|
|
|
|
switch (peek()) {
|
|
|
|
|
case '(':
|
|
|
|
|
next();
|
|
|
|
|
result = rule();
|
2014-03-04 12:36:38 -08:00
|
|
|
if (has_error()) return result;
|
|
|
|
|
if (peek() != ')') {
|
|
|
|
|
error = "mismatched parens";
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
next();
|
2013-11-15 08:46:45 -08:00
|
|
|
break;
|
2014-01-30 13:04:31 -08:00
|
|
|
case '[':
|
|
|
|
|
next();
|
2014-02-05 18:56:04 -08:00
|
|
|
result = char_set().copy();
|
2014-03-04 12:36:38 -08:00
|
|
|
if (has_error()) return result;
|
|
|
|
|
if (peek() != ']') {
|
|
|
|
|
error = "mismatched square brackets";
|
|
|
|
|
return result;
|
|
|
|
|
}
|
|
|
|
|
next();
|
2014-01-30 13:04:31 -08:00
|
|
|
break;
|
2013-11-15 08:46:45 -08:00
|
|
|
case ')':
|
2014-03-04 12:36:38 -08:00
|
|
|
error = "mismatched parens";
|
2013-11-14 21:25:58 -08:00
|
|
|
break;
|
2014-01-30 13:04:31 -08:00
|
|
|
default:
|
2014-02-05 18:56:04 -08:00
|
|
|
result = single_char().copy();
|
2014-01-30 13:04:31 -08:00
|
|
|
}
|
|
|
|
|
return result;
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
CharacterSet char_set() {
|
|
|
|
|
bool is_affirmative = true;
|
|
|
|
|
if (peek() == '^') {
|
|
|
|
|
next();
|
|
|
|
|
is_affirmative = false;
|
|
|
|
|
}
|
|
|
|
|
CharacterSet result;
|
|
|
|
|
while (has_more_input() && (peek() != ']'))
|
2014-02-07 12:57:35 -08:00
|
|
|
result.add_set(single_char());
|
2014-02-05 18:56:04 -08:00
|
|
|
return is_affirmative ? result : result.complement();
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
CharacterSet single_char() {
|
2014-03-04 12:36:38 -08:00
|
|
|
CharacterSet value;
|
2014-01-30 13:04:31 -08:00
|
|
|
switch (peek()) {
|
2013-11-14 21:25:58 -08:00
|
|
|
case '\\':
|
|
|
|
|
next();
|
2014-01-30 13:04:31 -08:00
|
|
|
value = escaped_char(peek());
|
2014-03-04 12:36:38 -08:00
|
|
|
if (has_error()) return value;
|
2013-11-15 08:46:45 -08:00
|
|
|
next();
|
|
|
|
|
break;
|
2013-11-14 21:25:58 -08:00
|
|
|
default:
|
2014-01-30 18:54:39 -08:00
|
|
|
char first_char = peek();
|
2013-11-14 21:25:58 -08:00
|
|
|
next();
|
2014-01-30 18:54:39 -08:00
|
|
|
if (peek() == '-') {
|
|
|
|
|
next();
|
2014-02-16 22:13:08 -08:00
|
|
|
value = CharacterSet({ CharacterRange(first_char, peek()) });
|
2014-01-30 18:54:39 -08:00
|
|
|
next();
|
|
|
|
|
} else {
|
2014-02-05 18:56:04 -08:00
|
|
|
value = CharacterSet({ first_char });
|
2014-01-30 18:54:39 -08:00
|
|
|
}
|
2013-11-14 21:25:58 -08:00
|
|
|
}
|
2014-01-30 13:04:31 -08:00
|
|
|
return value;
|
2013-11-14 21:25:58 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-02-05 18:56:04 -08:00
|
|
|
CharacterSet escaped_char(char value) {
|
2013-11-20 19:00:20 -08:00
|
|
|
switch (value) {
|
2014-01-30 13:04:31 -08:00
|
|
|
case '\\':
|
2013-11-20 19:00:20 -08:00
|
|
|
case '(':
|
|
|
|
|
case ')':
|
2014-02-05 18:56:04 -08:00
|
|
|
return CharacterSet({ value });
|
2013-11-20 19:00:20 -08:00
|
|
|
case 'w':
|
2014-02-16 22:13:08 -08:00
|
|
|
return CharacterSet({{'a', 'z'}, {'A', 'Z'}});
|
2013-11-20 19:00:20 -08:00
|
|
|
case 'd':
|
2014-02-16 22:13:08 -08:00
|
|
|
return CharacterSet({CharacterRange('0', '9')});
|
2013-11-20 19:00:20 -08:00
|
|
|
default:
|
2014-03-04 12:36:38 -08:00
|
|
|
error = "unrecognized escape sequence";
|
2014-02-05 18:56:04 -08:00
|
|
|
return CharacterSet();
|
2013-11-20 19:00:20 -08:00
|
|
|
}
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
void next() {
|
|
|
|
|
position++;
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
char peek() {
|
|
|
|
|
return input[position];
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-11-14 21:25:58 -08:00
|
|
|
bool has_more_input() {
|
|
|
|
|
return position < length;
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-04 12:36:38 -08:00
|
|
|
bool has_error() {
|
|
|
|
|
return error != "";
|
2013-11-15 08:46:45 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-04 12:36:38 -08:00
|
|
|
string error;
|
2013-12-19 23:05:54 -08:00
|
|
|
const string input;
|
2013-11-14 21:25:58 -08:00
|
|
|
const size_t length;
|
2014-03-04 18:28:28 -08:00
|
|
|
size_t position;
|
2013-11-14 21:25:58 -08:00
|
|
|
};
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-09 22:24:52 -07:00
|
|
|
Pattern::Pattern(const string &string) : value(string) {}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-18 20:58:05 -08:00
|
|
|
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
|
2013-11-20 19:00:20 -08:00
|
|
|
auto pattern = dynamic_cast<const Pattern *>(&other);
|
|
|
|
|
return pattern && (pattern->value == value);
|
2013-11-10 14:24:25 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-30 23:52:38 -08:00
|
|
|
size_t Pattern::hash_code() const {
|
2014-02-15 16:12:16 -08:00
|
|
|
return hash<string>()(value);
|
2013-12-30 23:52:38 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-01-02 13:04:41 -08:00
|
|
|
rule_ptr Pattern::copy() const {
|
|
|
|
|
return std::make_shared<Pattern>(*this);
|
|
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-19 23:05:54 -08:00
|
|
|
string Pattern::to_string() const {
|
2013-12-30 23:12:19 -08:00
|
|
|
return string("#<pattern '") + value + "'>";
|
2013-11-10 14:24:25 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2014-03-09 22:21:58 -07:00
|
|
|
void Pattern::accept(Visitor *visitor) const {
|
|
|
|
|
visitor->visit(this);
|
2013-12-18 20:58:05 -08:00
|
|
|
}
|
2014-03-09 19:49:35 -07:00
|
|
|
|
2013-12-18 20:58:05 -08:00
|
|
|
rule_ptr Pattern::to_rule_tree() const {
|
|
|
|
|
return PatternParser(value).rule();
|
|
|
|
|
}
|
2013-11-10 14:24:25 -08:00
|
|
|
}
|
|
|
|
|
}
|