Compute transitions for regex pattern rules
This commit is contained in:
parent
040ec86000
commit
ecd317ccd9
13 changed files with 206 additions and 23 deletions
|
|
@ -23,6 +23,7 @@
|
|||
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; };
|
||||
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
|
||||
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
|
||||
12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
|
||||
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; };
|
||||
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; };
|
||||
27A343CA69E17E0F9EBEDF1C /* Pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* Pattern.cpp */; };
|
||||
|
|
@ -131,7 +132,7 @@
|
|||
121492C5181E200B008E9BDA /* igloo_alt.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = igloo_alt.h; sourceTree = "<group>"; };
|
||||
121492C6181E200B008E9BDA /* igloo_framework.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = igloo_framework.h; sourceTree = "<group>"; };
|
||||
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
|
||||
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
|
||||
1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = "<group>"; };
|
||||
|
|
@ -141,6 +142,7 @@
|
|||
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = "<group>"; };
|
||||
12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
|
||||
12D136A0183570F5005F3369 /* pattern_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = pattern_spec.cpp; path = spec/rules/pattern_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = "<group>"; };
|
||||
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
|
||||
|
|
@ -418,6 +420,16 @@
|
|||
path = spec/test_grammars;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
12D1369F18357066005F3369 /* rules */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
121492EA181E200B008E9BDA /* rules_spec.cpp */,
|
||||
12D136A0183570F5005F3369 /* pattern_spec.cpp */,
|
||||
);
|
||||
name = rules;
|
||||
path = spec/rules;
|
||||
sourceTree = "<group>";
|
||||
};
|
||||
12E716F9181D010E0051A649 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
|
|
@ -452,11 +464,11 @@
|
|||
12E71796181D02A80051A649 /* spec */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
12D1369F18357066005F3369 /* rules */,
|
||||
125120A118307FCA00C9B56A /* test_grammars */,
|
||||
1214925C181E200B008E9BDA /* externals */,
|
||||
1213061C182C854F00FCF928 /* lr */,
|
||||
121492E9181E200B008E9BDA /* main.cpp */,
|
||||
121492EA181E200B008E9BDA /* rules_spec.cpp */,
|
||||
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */,
|
||||
12F9A64D182DD5FD00FAF50C /* spec_helper.h */,
|
||||
);
|
||||
|
|
@ -515,6 +527,7 @@
|
|||
buildActionMask = 2147483647;
|
||||
files = (
|
||||
12130614182C3A1700FCF928 /* seq.cpp in Sources */,
|
||||
12D136A1183570F5005F3369 /* pattern_spec.cpp in Sources */,
|
||||
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */,
|
||||
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */,
|
||||
1213061B182C84DF00FCF928 /* item.cpp in Sources */,
|
||||
|
|
|
|||
|
|
@ -1,7 +1,10 @@
|
|||
#include "spec_helper.h"
|
||||
#include "../test_grammars/arithmetic.h"
|
||||
|
||||
using namespace tree_sitter::lr;
|
||||
|
||||
Describe(parse_table_construction) {
|
||||
Describe(the_starting_state) {
|
||||
Describe(build_parse_tables) {
|
||||
Describe(lexing_tables) {
|
||||
|
||||
};
|
||||
};
|
||||
|
|
|
|||
|
|
@ -1,8 +1,6 @@
|
|||
#include <igloo/igloo_alt.h>
|
||||
|
||||
using namespace igloo;
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
return TestRunner::RunAllTests(argc, argv);
|
||||
return igloo::TestRunner::RunAllTests(argc, argv);
|
||||
}
|
||||
63
spec/rules/pattern_spec.cpp
Normal file
63
spec/rules/pattern_spec.cpp
Normal file
|
|
@ -0,0 +1,63 @@
|
|||
#include "spec_helper.h"
|
||||
#include "rules.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using namespace tree_sitter::rules;
|
||||
|
||||
Describe(pattern_rules) {
|
||||
It(parses_simple_strings) {
|
||||
pattern_ptr rule = pattern("abc");
|
||||
AssertThat(
|
||||
rule->to_rule_tree()->to_string(),
|
||||
Equals(seq({
|
||||
character('a'),
|
||||
character('b'),
|
||||
character('c')
|
||||
})->to_string()));
|
||||
};
|
||||
|
||||
It(parses_choices) {
|
||||
pattern_ptr rule = pattern("ab|cd|ef");
|
||||
AssertThat(
|
||||
rule->to_rule_tree()->to_string(),
|
||||
Equals(choice({
|
||||
seq({
|
||||
character('a'),
|
||||
character('b'),
|
||||
}),
|
||||
seq({
|
||||
character('c'),
|
||||
character('d')
|
||||
}),
|
||||
seq({
|
||||
character('e'),
|
||||
character('f')
|
||||
})
|
||||
})->to_string()));
|
||||
};
|
||||
|
||||
It(parses_choices_in_sequences) {
|
||||
pattern_ptr rule = pattern("(a|b)cd");
|
||||
AssertThat(
|
||||
rule->to_rule_tree()->to_string(),
|
||||
Equals(seq({
|
||||
choice({
|
||||
character('a'),
|
||||
character('b'),
|
||||
}),
|
||||
character('c'),
|
||||
character('d')
|
||||
})->to_string()));
|
||||
};
|
||||
|
||||
It(parses_special_characters_when_they_are_escaped) {
|
||||
pattern_ptr rule = pattern("a\\(b");
|
||||
AssertThat(
|
||||
rule->to_rule_tree()->to_string(),
|
||||
Equals(seq({
|
||||
character('a'),
|
||||
character('('),
|
||||
character('b')
|
||||
})->to_string()));
|
||||
}
|
||||
};
|
||||
|
|
@ -11,11 +11,11 @@ Describe(Rules) {
|
|||
It(constructs_binary_trees) {
|
||||
AssertThat(
|
||||
rules::seq({ symbol1, symbol2, symbol3 })->to_string(),
|
||||
Equals(std::string("(seq (sym '1') (seq (sym '2') (sym '3')))")));
|
||||
Equals(std::string("(seq (seq (sym '1') (sym '2')) (sym '3'))")));
|
||||
|
||||
AssertThat(
|
||||
rules::choice({ symbol1, symbol2, symbol3 })->to_string(),
|
||||
Equals(std::string("(choice (sym '1') (choice (sym '2') (sym '3')))")));
|
||||
Equals(std::string("(choice (choice (sym '1') (sym '2')) (sym '3'))")));
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -65,8 +65,11 @@ Describe(Rules) {
|
|||
It(handles_long_sequences) {
|
||||
AssertThat(
|
||||
rules::seq({
|
||||
rules::seq({ symbol1, symbol2 }),
|
||||
rules::seq({ symbol3, symbol4 }) })->transitions(),
|
||||
symbol1,
|
||||
symbol2,
|
||||
symbol3,
|
||||
symbol4
|
||||
})->transitions(),
|
||||
EqualsTransitionMap(TransitionMap<rules::Rule>(
|
||||
{ symbol1 },
|
||||
{ rules::seq({ symbol2, symbol3, symbol4 }) }
|
||||
|
|
@ -92,5 +95,14 @@ Describe(Rules) {
|
|||
{ rules::seq({ rules::character('a'), rules::character('d') }) }
|
||||
)));
|
||||
}
|
||||
};
|
||||
|
||||
It(handles_patterns) {
|
||||
AssertThat(
|
||||
rules::pattern("a|b")->transitions(),
|
||||
EqualsTransitionMap(TransitionMap<rules::Rule>(
|
||||
{ rules::character('a'), rules::character('b') },
|
||||
{ rules::blank(), rules::blank() }
|
||||
)));
|
||||
}
|
||||
};
|
||||
};
|
||||
|
|
@ -1,8 +1,80 @@
|
|||
#include "choice.h"
|
||||
#include "seq.h"
|
||||
#include "Pattern.h"
|
||||
#include "transition_map.h"
|
||||
|
||||
using namespace std;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
class PatternParser {
|
||||
public:
|
||||
PatternParser(const string &input) :
|
||||
input(input),
|
||||
position(0),
|
||||
length(input.length()) {}
|
||||
|
||||
rule_ptr rule() {
|
||||
auto result = term();
|
||||
while (has_more_input() && peek() == '|') {
|
||||
next();
|
||||
result = choice({ result, term() });
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
private:
|
||||
rule_ptr term() {
|
||||
rule_ptr result = factor();
|
||||
while (has_more_input() && (peek() != '|') && (peek() != ')'))
|
||||
result = seq({ result, factor() });
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr factor() {
|
||||
return atom();
|
||||
}
|
||||
|
||||
rule_ptr atom() {
|
||||
rule_ptr result;
|
||||
switch (peek()) {
|
||||
case '(':
|
||||
next();
|
||||
result = rule();
|
||||
if (peek() == ')') {
|
||||
next();
|
||||
return result;
|
||||
} else {
|
||||
throw std::string("Invalid regex pattern: ") + input;
|
||||
}
|
||||
break;
|
||||
case '\\':
|
||||
next();
|
||||
default:
|
||||
result = character(peek());
|
||||
next();
|
||||
return result;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
void next() {
|
||||
position++;
|
||||
}
|
||||
|
||||
char peek() {
|
||||
return input[position];
|
||||
}
|
||||
|
||||
bool has_more_input() {
|
||||
return position < length;
|
||||
}
|
||||
|
||||
const std::string input;
|
||||
const size_t length;
|
||||
int position;
|
||||
};
|
||||
|
||||
Pattern::Pattern(const std::string &string) : value(string) {};
|
||||
|
||||
pattern_ptr pattern(const std::string &value) {
|
||||
|
|
@ -10,9 +82,13 @@ namespace tree_sitter {
|
|||
}
|
||||
|
||||
TransitionMap<Rule> Pattern::transitions() const {
|
||||
return tree_sitter::TransitionMap<Rule>();
|
||||
return to_rule_tree()->transitions();
|
||||
}
|
||||
|
||||
|
||||
rule_ptr Pattern::to_rule_tree() const {
|
||||
return PatternParser(value).rule();
|
||||
}
|
||||
|
||||
bool Pattern::operator ==(tree_sitter::rules::Rule const &other) const {
|
||||
return false;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ namespace tree_sitter {
|
|||
TransitionMap<Rule> transitions() const;
|
||||
bool operator==(const Rule& other) const;
|
||||
std::string to_string() const;
|
||||
rule_ptr to_rule_tree() const;
|
||||
private:
|
||||
const std::string value;
|
||||
};
|
||||
|
|
|
|||
|
|
@ -18,11 +18,11 @@ namespace tree_sitter {
|
|||
|
||||
bool Char::operator==(const Rule &rule) const {
|
||||
const Char *other = dynamic_cast<const Char *>(&rule);
|
||||
return (other != nullptr) && (other->value == value);
|
||||
return other && (other->value == value);
|
||||
}
|
||||
|
||||
string Char::to_string() const {
|
||||
return std::string("'") + &value + "'";
|
||||
return std::string("'") + value + "'";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ namespace tree_sitter {
|
|||
|
||||
bool Choice::operator==(const Rule &rule) const {
|
||||
const Choice *other = dynamic_cast<const Choice *>(&rule);
|
||||
return (other != NULL) && (*other->left == *left) && (*other->right == *right);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
}
|
||||
|
||||
std::string Choice::to_string() const {
|
||||
|
|
|
|||
|
|
@ -2,10 +2,23 @@
|
|||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
bool Rule::operator==(const rule_ptr other) const {
|
||||
return true;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const Rule &rule)
|
||||
{
|
||||
stream << rule.to_string();
|
||||
return stream;
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule)
|
||||
{
|
||||
if (rule.get() == nullptr)
|
||||
stream << std::string("<NULL rule>");
|
||||
else
|
||||
stream << rule->to_string();
|
||||
return stream;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -7,22 +7,26 @@ namespace tree_sitter {
|
|||
template<class value> class TransitionMap;
|
||||
|
||||
namespace rules {
|
||||
class Rule;
|
||||
typedef std::shared_ptr<const Rule> rule_ptr;
|
||||
|
||||
class Rule {
|
||||
public:
|
||||
virtual TransitionMap<Rule> transitions() const = 0;
|
||||
virtual bool operator==(const Rule& other) const = 0;
|
||||
virtual std::string to_string() const = 0;
|
||||
bool operator==(const rule_ptr other) const;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Rule> rule_ptr;
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
|
||||
std::ostream& operator<<(std::ostream& stream, const rule_ptr &rule);
|
||||
|
||||
template <typename RuleClass>
|
||||
rule_ptr build_binary_rule_tree(const std::initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result(nullptr);
|
||||
for (auto it = rules.end() - 1; it >= rules.begin(); --it)
|
||||
result = result.get() ? std::make_shared<RuleClass>(*it, result) : *it;
|
||||
rule_ptr result;
|
||||
for (auto rule : rules)
|
||||
result = result.get() ? std::make_shared<RuleClass>(result, rule) : rule;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ namespace tree_sitter {
|
|||
|
||||
bool Seq::operator==(const Rule &rule) const {
|
||||
const Seq *other = dynamic_cast<const Seq *>(&rule);
|
||||
return (other != NULL) && (*other->left == *left) && (*other->right == *right);
|
||||
return other && (*other->left == *left) && (*other->right == *right);
|
||||
}
|
||||
|
||||
std::string Seq::to_string() const {
|
||||
|
|
|
|||
|
|
@ -16,7 +16,7 @@ namespace tree_sitter {
|
|||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
const Symbol *other = dynamic_cast<const Symbol *>(&rule);
|
||||
return (other != NULL) && (other->name == name);
|
||||
return other && (other->name == name);
|
||||
}
|
||||
|
||||
std::string Symbol::to_string() const {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue