Compute transitions for regex pattern rules

This commit is contained in:
Max Brunsfeld 2013-11-14 21:25:58 -08:00
parent 040ec86000
commit ecd317ccd9
13 changed files with 206 additions and 23 deletions

View file

@ -23,6 +23,7 @@
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; };
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; };
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; };
27A343CA69E17E0F9EBEDF1C /* Pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* Pattern.cpp */; };
@ -131,7 +132,7 @@
121492C5181E200B008E9BDA /* igloo_alt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = igloo_alt.h; sourceTree = "<group>"; };
121492C6181E200B008E9BDA /* igloo_framework.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = igloo_framework.h; sourceTree = "<group>"; };
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; };
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = "<group>"; };
@ -141,6 +142,7 @@
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = "<group>"; };
12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; };
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; };
12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; };
12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = "<group>"; };
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
@ -418,6 +420,16 @@
path = spec/test_grammars;
sourceTree = "<group>";
};
12D1369F18357066005F3369 /* rules */ = {
isa = PBXGroup;
children = (
121492EA181E200B008E9BDA /* rules_spec.cpp */,
12D136A0183570F5005F3369 /* pattern_spec.cpp */,
);
name = rules;
path = spec/rules;
sourceTree = "<group>";
};
12E716F9181D010E0051A649 = {
isa = PBXGroup;
children = (
@ -452,11 +464,11 @@
12E71796181D02A80051A649 /* spec */ = {
isa = PBXGroup;
children = (
12D1369F18357066005F3369 /* rules */,
125120A118307FCA00C9B56A /* test_grammars */,
1214925C181E200B008E9BDA /* externals */,
1213061C182C854F00FCF928 /* lr */,
121492E9181E200B008E9BDA /* main.cpp */,
121492EA181E200B008E9BDA /* rules_spec.cpp */,
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */,
12F9A64D182DD5FD00FAF50C /* spec_helper.h */,
);
@ -515,6 +527,7 @@
buildActionMask = 2147483647;
files = (
12130614182C3A1700FCF928 /* seq.cpp in Sources */,
12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */,
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */,
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */,
1213061B182C84DF00FCF928 /* item.cpp in Sources */,

View file

@ -1,7 +1,10 @@
#include "spec_helper.h"
#include "../test_grammars/arithmetic.h"
using namespace tree_sitter::lr;
Describe(parse_table_construction) {
Describe(the_starting_state) {
Describe(build_parse_tables) {
Describe(lexing_tables) {
};
};

View file

@ -1,8 +1,6 @@
#include <igloo/igloo_alt.h>
using namespace igloo;
int main(int argc, char *argv[])
{
return TestRunner::RunAllTests(argc, argv);
return igloo::TestRunner::RunAllTests(argc, argv);
}

View file

@ -0,0 +1,63 @@
#include "spec_helper.h"
#include "rules.h"
#include "transition_map.h"
using namespace tree_sitter::rules;
Describe(pattern_rules) {
It(parses_simple_strings) {
pattern_ptr rule = pattern("abc");
AssertThat(
rule->to_rule_tree()->to_string(),
Equals(seq({
character('a'),
character('b'),
character('c')
})->to_string()));
};
It(parses_choices) {
pattern_ptr rule = pattern("ab|cd|ef");
AssertThat(
rule->to_rule_tree()->to_string(),
Equals(choice({
seq({
character('a'),
character('b'),
}),
seq({
character('c'),
character('d')
}),
seq({
character('e'),
character('f')
})
})->to_string()));
};
It(parses_choices_in_sequences) {
pattern_ptr rule = pattern("(a|b)cd");
AssertThat(
rule->to_rule_tree()->to_string(),
Equals(seq({
choice({
character('a'),
character('b'),
}),
character('c'),
character('d')
})->to_string()));
};
It(parses_special_characters_when_they_are_escaped) {
pattern_ptr rule = pattern("a\\(b");
AssertThat(
rule->to_rule_tree()->to_string(),
Equals(seq({
character('a'),
character('('),
character('b')
})->to_string()));
}
};

View file

@ -11,11 +11,11 @@ Describe(Rules) {
It(constructs_binary_trees) {
AssertThat(
rules::seq({ symbol1, symbol2, symbol3 })->to_string(),
Equals(std::string("(seq (sym '1') (seq (sym '2') (sym '3')))")));
Equals(std::string("(seq (seq (sym '1') (sym '2')) (sym '3'))")));
AssertThat(
rules::choice({ symbol1, symbol2, symbol3 })->to_string(),
Equals(std::string("(choice (sym '1') (choice (sym '2') (sym '3')))")));
Equals(std::string("(choice (choice (sym '1') (sym '2')) (sym '3'))")));
}
};
@ -65,8 +65,11 @@ Describe(Rules) {
It(handles_long_sequences) {
AssertThat(
rules::seq({
rules::seq({ symbol1, symbol2 }),
rules::seq({ symbol3, symbol4 }) })->transitions(),
symbol1,
symbol2,
symbol3,
symbol4
})->transitions(),
EqualsTransitionMap(TransitionMap<rules::Rule>(
{ symbol1 },
{ rules::seq({ symbol2, symbol3, symbol4 }) }
@ -92,5 +95,14 @@ Describe(Rules) {
{ rules::seq({ rules::character('a'), rules::character('d') }) }
)));
}
};
It(handles_patterns) {
AssertThat(
rules::pattern("a|b")->transitions(),
EqualsTransitionMap(TransitionMap<rules::Rule>(
{ rules::character('a'), rules::character('b') },
{ rules::blank(), rules::blank() }
)));
}
};
};

View file

@ -1,8 +1,80 @@
#include "choice.h"
#include "seq.h"
#include "Pattern.h"
#include "transition_map.h"
using namespace std;
namespace tree_sitter {
namespace rules {
class PatternParser {
public:
PatternParser(const string &input) :
input(input),
position(0),
length(input.length()) {}
rule_ptr rule() {
auto result = term();
while (has_more_input() && peek() == '|') {
next();
result = choice({ result, term() });
}
return result;
}
private:
rule_ptr term() {
rule_ptr result = factor();
while (has_more_input() && (peek() != '|') && (peek() != ')'))
result = seq({ result, factor() });
return result;
}
rule_ptr factor() {
return atom();
}
rule_ptr atom() {
rule_ptr result;
switch (peek()) {
case '(':
next();
result = rule();
if (peek() == ')') {
next();
return result;
} else {
throw std::string("Invalid regex pattern: ") + input;
}
break;
case '\\':
next();
default:
result = character(peek());
next();
return result;
break;
}
}
void next() {
position++;
}
char peek() {
return input[position];
}
bool has_more_input() {
return position < length;
}
const std::string input;
const size_t length;
int position;
};
Pattern::Pattern(const std::string &string) : value(string) {};
pattern_ptr pattern(const std::string &value) {
@ -10,9 +82,13 @@ namespace tree_sitter {
}
TransitionMap<Rule> Pattern::transitions() const {
return tree_sitter::TransitionMap<Rule>();
return to_rule_tree()->transitions();
}
rule_ptr Pattern::to_rule_tree() const {
return PatternParser(value).rule();
}
bool Pattern::operator ==(tree_sitter::rules::Rule const &other) const {
return false;
}

View file

@ -11,6 +11,7 @@ namespace tree_sitter {
TransitionMap<Rule> transitions() const;
bool operator==(const Rule& other) const;
std::string to_string() const;
rule_ptr to_rule_tree() const;
private:
const std::string value;
};

View file

@ -18,11 +18,11 @@ namespace tree_sitter {
bool Char::operator==(const Rule &rule) const {
const Char *other = dynamic_cast<const Char *>(&rule);
return (other != nullptr) && (other->value == value);
return other && (other->value == value);
}
string Char::to_string() const {
return std::string("'") + &value + "'";
return std::string("'") + value + "'";
}
}
}

View file

@ -19,7 +19,7 @@ namespace tree_sitter {
bool Choice::operator==(const Rule &rule) const {
const Choice *other = dynamic_cast<const Choice *>(&rule);
return (other != NULL) && (*other->left == *left) && (*other->right == *right);
return other && (*other->left == *left) && (*other->right == *right);
}
std::string Choice::to_string() const {

View file

@ -2,10 +2,23 @@
namespace tree_sitter {
namespace rules {
bool Rule::operator==(const rule_ptr other) const {
return true;
}
std::ostream& operator<<(std::ostream& stream, const Rule &rule)
{
stream << rule.to_string();
return stream;
}
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule)
{
if (rule.get() == nullptr)
stream << std::string("<NULL rule>");
else
stream << rule->to_string();
return stream;
}
}
}

View file

@ -7,22 +7,26 @@ namespace tree_sitter {
template<class value> class TransitionMap;
namespace rules {
class Rule;
typedef std::shared_ptr<const Rule> rule_ptr;
class Rule {
public:
virtual TransitionMap<Rule> transitions() const = 0;
virtual bool operator==(const Rule& other) const = 0;
virtual std::string to_string() const = 0;
bool operator==(const rule_ptr other) const;
};
typedef std::shared_ptr<const Rule> rule_ptr;
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
template <typename RuleClass>
rule_ptr build_binary_rule_tree(const std::initializer_list<rule_ptr> &rules) {
rule_ptr result(nullptr);
for (auto it = rules.end() - 1; it >= rules.begin(); --it)
result = result.get() ? std::make_shared<RuleClass>(*it, result) : *it;
rule_ptr result;
for (auto rule : rules)
result = result.get() ? std::make_shared<RuleClass>(result, rule) : rule;
return result;
}
}

View file

@ -21,7 +21,7 @@ namespace tree_sitter {
bool Seq::operator==(const Rule &rule) const {
const Seq *other = dynamic_cast<const Seq *>(&rule);
return (other != NULL) && (*other->left == *left) && (*other->right == *right);
return other && (*other->left == *left) && (*other->right == *right);
}
std::string Seq::to_string() const {

View file

@ -16,7 +16,7 @@ namespace tree_sitter {
bool Symbol::operator==(const Rule &rule) const {
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
return (other != NULL) && (other->name == name);
return other && (other->name == name);
}
std::string Symbol::to_string() const {