tree-sitter/src/compiler/rules/pattern.cc

182 lines
5.6 KiB
C++
Raw Normal View History

2014-03-09 21:37:21 -07:00
#include "compiler/rules/pattern.h"
#include <set>
2014-03-09 21:37:21 -07:00
#include "compiler/rules/visitor.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/character_set.h"
namespace tree_sitter {
namespace rules {
using std::string;
using std::hash;
using std::make_shared;
using std::set;
2014-03-09 19:49:35 -07:00
class PatternParser {
public:
PatternParser(const string &input) :
input(input),
2014-02-18 09:07:00 -08:00
length(input.length()),
position(0) {}
2014-03-09 19:49:35 -07:00
rule_ptr rule() {
auto result = term();
while (has_more_input() && peek() == '|') {
next();
result = make_shared<Choice>(result, term());
}
return result;
}
2014-03-09 19:49:35 -07:00
private:
rule_ptr term() {
rule_ptr result = factor();
while (has_more_input() && (peek() != '|') && (peek() != ')'))
result = Seq::Build({ result, factor() });
return result;
}
2014-03-09 19:49:35 -07:00
rule_ptr factor() {
2013-11-15 13:35:35 -08:00
rule_ptr result = atom();
if (has_more_input() && (peek() == '+')) {
next();
result = make_shared<Repeat>(result);
2013-11-15 13:35:35 -08:00
}
return result;
}
2014-03-09 19:49:35 -07:00
rule_ptr atom() {
rule_ptr result;
switch (peek()) {
case '(':
next();
result = rule();
2014-03-04 12:36:38 -08:00
if (has_error()) return result;
if (peek() != ')') {
error = "mismatched parens";
return result;
}
next();
2013-11-15 08:46:45 -08:00
break;
case '[':
next();
result = char_set().copy();
2014-03-04 12:36:38 -08:00
if (has_error()) return result;
if (peek() != ']') {
error = "mismatched square brackets";
return result;
}
next();
break;
2013-11-15 08:46:45 -08:00
case ')':
2014-03-04 12:36:38 -08:00
error = "mismatched parens";
break;
default:
result = single_char().copy();
}
return result;
}
2014-03-09 19:49:35 -07:00
CharacterSet char_set() {
bool is_affirmative = true;
if (peek() == '^') {
next();
is_affirmative = false;
}
CharacterSet result;
while (has_more_input() && (peek() != ']'))
2014-02-07 12:57:35 -08:00
result.add_set(single_char());
return is_affirmative ? result : result.complement();
}
2014-03-09 19:49:35 -07:00
CharacterSet single_char() {
2014-03-04 12:36:38 -08:00
CharacterSet value;
switch (peek()) {
case '\\':
next();
value = escaped_char(peek());
2014-03-04 12:36:38 -08:00
if (has_error()) return value;
2013-11-15 08:46:45 -08:00
next();
break;
default:
char first_char = peek();
next();
if (peek() == '-') {
next();
value = CharacterSet({ CharacterRange(first_char, peek()) });
next();
} else {
value = CharacterSet({ first_char });
}
}
return value;
}
2014-03-09 19:49:35 -07:00
CharacterSet escaped_char(char value) {
2013-11-20 19:00:20 -08:00
switch (value) {
case '\\':
2013-11-20 19:00:20 -08:00
case '(':
case ')':
return CharacterSet({ value });
2013-11-20 19:00:20 -08:00
case 'w':
return CharacterSet({{'a', 'z'}, {'A', 'Z'}});
2013-11-20 19:00:20 -08:00
case 'd':
return CharacterSet({CharacterRange('0', '9')});
2013-11-20 19:00:20 -08:00
default:
2014-03-04 12:36:38 -08:00
error = "unrecognized escape sequence";
return CharacterSet();
2013-11-20 19:00:20 -08:00
}
}
2014-03-09 19:49:35 -07:00
void next() {
position++;
}
2014-03-09 19:49:35 -07:00
char peek() {
return input[position];
}
2014-03-09 19:49:35 -07:00
bool has_more_input() {
return position < length;
}
2014-03-09 19:49:35 -07:00
2014-03-04 12:36:38 -08:00
bool has_error() {
return error != "";
2013-11-15 08:46:45 -08:00
}
2014-03-09 19:49:35 -07:00
2014-03-04 12:36:38 -08:00
string error;
const string input;
const size_t length;
size_t position;
};
2014-03-09 19:49:35 -07:00
2014-03-09 22:24:52 -07:00
Pattern::Pattern(const string &string) : value(string) {}
2014-03-09 19:49:35 -07:00
2013-12-18 20:58:05 -08:00
bool Pattern::operator==(tree_sitter::rules::Rule const &other) const {
2013-11-20 19:00:20 -08:00
auto pattern = dynamic_cast<const Pattern *>(&other);
return pattern && (pattern->value == value);
}
2014-03-09 19:49:35 -07:00
2013-12-30 23:52:38 -08:00
size_t Pattern::hash_code() const {
2014-02-15 16:12:16 -08:00
return hash<string>()(value);
2013-12-30 23:52:38 -08:00
}
2014-03-09 19:49:35 -07:00
2014-01-02 13:04:41 -08:00
rule_ptr Pattern::copy() const {
return std::make_shared<Pattern>(*this);
}
2014-03-09 19:49:35 -07:00
string Pattern::to_string() const {
2013-12-30 23:12:19 -08:00
return string("#<pattern '") + value + "'>";
}
2014-03-09 19:49:35 -07:00
void Pattern::accept(Visitor *visitor) const {
visitor->visit(this);
2013-12-18 20:58:05 -08:00
}
2014-03-09 19:49:35 -07:00
2013-12-18 20:58:05 -08:00
rule_ptr Pattern::to_rule_tree() const {
return PatternParser(value).rule();
}
}
}