Move shared rule pointer factories into individual rule files
This commit is contained in:
parent
8a0a442a24
commit
040ec86000
19 changed files with 102 additions and 76 deletions
|
|
@ -20,7 +20,6 @@
|
|||
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; };
|
||||
12512093182F307C00C9B56A /* parse_table_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_spec.cpp */; };
|
||||
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
|
||||
1251209D18303CFB00C9B56A /* rules.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209C18303CFB00C9B56A /* rules.cpp */; };
|
||||
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; };
|
||||
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
|
||||
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
|
||||
|
|
@ -135,13 +134,13 @@
|
|||
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12512092182F307C00C9B56A /* parse_table_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_spec.cpp; path = spec/lr/parse_table_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
|
||||
1251209C18303CFB00C9B56A /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = "<group>"; };
|
||||
1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = "<group>"; };
|
||||
1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = "<group>"; };
|
||||
125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/test_grammars/arithmetic.h; sourceTree = SOURCE_ROOT; };
|
||||
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/test_grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = "<group>"; };
|
||||
12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; };
|
||||
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
|
||||
12E71794181D02A80051A649 /* specs */ = {isa = PBXFileReference; explicitFileType = "compiled.mach-o.executable"; includeInIndex = 0; path = specs; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
12E71852181D081C0051A649 /* rules.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = rules.h; sourceTree = "<group>"; };
|
||||
12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = spec_helper.cpp; path = spec/spec_helper.cpp; sourceTree = SOURCE_ROOT; };
|
||||
|
|
@ -422,6 +421,7 @@
|
|||
12E716F9181D010E0051A649 = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
12D1369E18342088005F3369 /* todo.md */,
|
||||
12E71701181D01890051A649 /* src */,
|
||||
12E71796181D02A80051A649 /* spec */,
|
||||
12E71795181D02A80051A649 /* Products */,
|
||||
|
|
@ -435,7 +435,6 @@
|
|||
12F9A650182DD6BC00FAF50C /* grammar.h */,
|
||||
12130618182C84B700FCF928 /* lr */,
|
||||
12130602182C344400FCF928 /* rules */,
|
||||
1251209C18303CFB00C9B56A /* rules.cpp */,
|
||||
12E71852181D081C0051A649 /* rules.h */,
|
||||
12C344421822F27700B07BE3 /* transition_map.h */,
|
||||
);
|
||||
|
|
@ -519,7 +518,6 @@
|
|||
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */,
|
||||
1214930F181E200B008E9BDA /* rules_spec.cpp in Sources */,
|
||||
1213061B182C84DF00FCF928 /* item.cpp in Sources */,
|
||||
1251209D18303CFB00C9B56A /* rules.cpp in Sources */,
|
||||
12130617182C3D2900FCF928 /* string.cpp in Sources */,
|
||||
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
|
||||
12D1369D18328C5A005F3369 /* item_spec.cpp in Sources */,
|
||||
|
|
|
|||
|
|
@ -1,41 +0,0 @@
|
|||
#include "rules.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
rule_ptr blank() {
|
||||
return rule_ptr(new Blank());
|
||||
}
|
||||
|
||||
rule_ptr sym(const std::string &name) {
|
||||
return rule_ptr(new Symbol(name));
|
||||
}
|
||||
|
||||
rule_ptr character(char value) {
|
||||
return rule_ptr(new Char(value));
|
||||
}
|
||||
|
||||
rule_ptr str(const std::string &value) {
|
||||
return rule_ptr(new String(value));
|
||||
}
|
||||
|
||||
rule_ptr pattern(const std::string &value) {
|
||||
return rule_ptr(new Pattern(value));
|
||||
}
|
||||
|
||||
template <typename RuleClass>
|
||||
rule_ptr build_binary_tree(const std::initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result(nullptr);
|
||||
for (auto it = rules.end() - 1; it >= rules.begin(); --it)
|
||||
result = result.get() ? rule_ptr(new RuleClass(*it, result)) : *it;
|
||||
return result;
|
||||
}
|
||||
|
||||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_tree<Seq>(rules);
|
||||
}
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_tree<Choice>(rules);
|
||||
}
|
||||
}
|
||||
}
|
||||
14
src/rules.h
14
src/rules.h
|
|
@ -10,18 +10,4 @@
|
|||
#include "pattern.h"
|
||||
#include "char.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
rule_ptr blank();
|
||||
rule_ptr sym(const std::string &name);
|
||||
rule_ptr character(char value);
|
||||
rule_ptr str(const std::string &value);
|
||||
rule_ptr pattern(const std::string &value);
|
||||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules);
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
|
||||
|
||||
typedef std::shared_ptr<const Symbol> sym_ptr;
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -4,7 +4,10 @@
|
|||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Pattern::Pattern(const std::string &string) : value(string) {};
|
||||
Pattern::Pattern(const char *string) : value(string) {};
|
||||
|
||||
pattern_ptr pattern(const std::string &value) {
|
||||
return std::make_shared<Pattern>(value);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Pattern::transitions() const {
|
||||
return tree_sitter::TransitionMap<Rule>();
|
||||
|
|
|
|||
|
|
@ -7,7 +7,6 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
class Pattern : public Rule {
|
||||
public:
|
||||
Pattern(const char *string);
|
||||
Pattern(const std::string &string);
|
||||
TransitionMap<Rule> transitions() const;
|
||||
bool operator==(const Rule& other) const;
|
||||
|
|
@ -15,6 +14,9 @@ namespace tree_sitter {
|
|||
private:
|
||||
const std::string value;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Pattern> pattern_ptr;
|
||||
pattern_ptr pattern(const std::string &value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,6 +5,10 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
Blank::Blank() {}
|
||||
|
||||
blank_ptr blank() {
|
||||
return std::make_shared<Blank>();
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Blank::transitions() const {
|
||||
return TransitionMap<Rule>();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,9 @@ namespace tree_sitter {
|
|||
bool operator==(const Rule& other) const;
|
||||
std::string to_string() const;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Blank> blank_ptr;
|
||||
blank_ptr blank();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,9 +7,13 @@ using namespace std;
|
|||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Char::Char(char value) : value(value) {};
|
||||
|
||||
char_ptr character(char value) {
|
||||
return std::make_shared<Char>(value);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Char::transitions() const {
|
||||
return TransitionMap<Rule>({ rule_ptr(new Char(value)) }, { rule_ptr(new Blank()) });
|
||||
return TransitionMap<Rule>({ character(value) }, { blank() });
|
||||
}
|
||||
|
||||
bool Char::operator==(const Rule &rule) const {
|
||||
|
|
|
|||
|
|
@ -14,6 +14,9 @@ namespace tree_sitter {
|
|||
private:
|
||||
const char value;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Char> char_ptr;
|
||||
char_ptr character(char value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,14 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
Choice::Choice(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_rule_tree<Choice>(rules);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Choice::transitions() const {
|
||||
auto result = left->transitions();
|
||||
result.merge(right->transitions(), [&](rule_ptr left, rule_ptr right) -> rule_ptr {
|
||||
return rule_ptr(new Choice(left, right));
|
||||
return choice({ left, right });
|
||||
});
|
||||
return result;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ namespace tree_sitter {
|
|||
const rule_ptr left;
|
||||
const rule_ptr right;
|
||||
};
|
||||
|
||||
rule_ptr choice(const std::initializer_list<rule_ptr> &rules);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,16 @@ namespace tree_sitter {
|
|||
};
|
||||
|
||||
typedef std::shared_ptr<const Rule> rule_ptr;
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, const Rule &rule);
|
||||
|
||||
template <typename RuleClass>
|
||||
rule_ptr build_binary_rule_tree(const std::initializer_list<rule_ptr> &rules) {
|
||||
rule_ptr result(nullptr);
|
||||
for (auto it = rules.end() - 1; it >= rules.begin(); --it)
|
||||
result = result.get() ? std::make_shared<RuleClass>(*it, result) : *it;
|
||||
return result;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -6,12 +6,16 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
Seq::Seq(rule_ptr left, rule_ptr right) : left(left), right(right) {};
|
||||
|
||||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules) {
|
||||
return build_binary_rule_tree<Seq>(rules);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Seq::transitions() const {
|
||||
return left->transitions().map<Rule>([&](rule_ptr left_rule) -> rule_ptr {
|
||||
if (typeid(*left_rule) == typeid(Blank))
|
||||
return right;
|
||||
else
|
||||
return rule_ptr(new Seq(left_rule, right));
|
||||
return seq({ left_rule, right });
|
||||
});
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -15,6 +15,8 @@ namespace tree_sitter {
|
|||
const rule_ptr left;
|
||||
const rule_ptr right;
|
||||
};
|
||||
|
||||
rule_ptr seq(const std::initializer_list<rule_ptr> &rules);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -7,10 +7,14 @@ namespace tree_sitter {
|
|||
namespace rules {
|
||||
String::String(std::string value) : value(value) {};
|
||||
|
||||
string_ptr str(const std::string &value) {
|
||||
return std::make_shared<String>(value);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> String::transitions() const {
|
||||
auto result = rule_ptr(new Char(value[0]));
|
||||
rule_ptr result = character(value[0]);
|
||||
for (int i = 1; i < value.length(); i++)
|
||||
result = rule_ptr(new Seq(result, rule_ptr(new Char(value[i]))));
|
||||
result = seq({ result, character(value[i]) });
|
||||
return result->transitions();
|
||||
}
|
||||
|
||||
|
|
@ -18,13 +22,10 @@ namespace tree_sitter {
|
|||
const String *other = dynamic_cast<const String *>(&rule);
|
||||
return (other != NULL) && (other->value == value);
|
||||
}
|
||||
|
||||
String * String::copy() const {
|
||||
return new String(value);
|
||||
}
|
||||
|
||||
|
||||
std::string String::to_string() const {
|
||||
return std::string("(string '") + value + "')";
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
|
@ -9,12 +9,14 @@ namespace tree_sitter {
|
|||
public:
|
||||
String(std::string value);
|
||||
TransitionMap<Rule> transitions() const;
|
||||
String * copy() const;
|
||||
bool operator==(const Rule& other) const;
|
||||
std::string to_string() const;
|
||||
private:
|
||||
const std::string value;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const String> string_ptr;
|
||||
string_ptr str(const std::string &value);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -5,10 +5,13 @@
|
|||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
Symbol::Symbol(const std::string &name) : name(name) {};
|
||||
Symbol::Symbol(const char *name) : name(name) {};
|
||||
|
||||
sym_ptr sym(const std::string &name) {
|
||||
return std::make_shared<Symbol>(name);
|
||||
}
|
||||
|
||||
TransitionMap<Rule> Symbol::transitions() const {
|
||||
return TransitionMap<Rule>({ rule_ptr(new Symbol(name)) }, { rule_ptr(new Blank()) });
|
||||
return TransitionMap<Rule>({ sym(name) }, { blank() });
|
||||
}
|
||||
|
||||
bool Symbol::operator==(const Rule &rule) const {
|
||||
|
|
|
|||
|
|
@ -8,13 +8,14 @@ namespace tree_sitter {
|
|||
class Symbol : public Rule {
|
||||
public:
|
||||
Symbol(const std::string &name);
|
||||
Symbol(const char *name);
|
||||
TransitionMap<Rule> transitions() const;
|
||||
Symbol * copy() const;
|
||||
bool operator==(const Rule& other) const;
|
||||
std::string to_string() const;
|
||||
const std::string name;
|
||||
};
|
||||
|
||||
typedef std::shared_ptr<const Symbol> sym_ptr;
|
||||
sym_ptr sym(const std::string &name);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
36
todo.md
Normal file
36
todo.md
Normal file
|
|
@ -0,0 +1,36 @@
|
|||
TODO
|
||||
====
|
||||
|
||||
# complete the list of rule types
|
||||
|
||||
- add repeat rules
|
||||
- parse regex rules into trees of choices, sequences, repeats
|
||||
|
||||
# generate lexers for sets of terminal rules (can be mix of throwaway and meaningful)
|
||||
|
||||
Introduce ParseTable type which contains a vector of ParseStates. A ParseState contains a
|
||||
TransitionMap of ParseActions. For a lexer, a ParseAction can be one of:
|
||||
- Accept(symbol)
|
||||
- Advance(state index)
|
||||
|
||||
Then generate a C function for a ParseTable
|
||||
|
||||
# generate parsers from sets of non-termina rules
|
||||
|
||||
For a Parser, the ParseActions can be any of:
|
||||
- Accept(symbol)
|
||||
- Shift(symbol)
|
||||
- Reduce(symbol, number of child symbols)
|
||||
|
||||
# normalize grammars
|
||||
|
||||
- add concept of throwaway-terminals (tokens that won't appear in constructed AST)
|
||||
- classify rules as non-terminals or terminals
|
||||
- extract strings and regexes from non-terminal rules into their own throwaway-terminals,
|
||||
in order to separate lexing from parsing
|
||||
|
||||
After this, a grammar will have these fields:
|
||||
- non-terminal rules
|
||||
- terminal rules
|
||||
- throwaway terminal rules
|
||||
|
||||
Loading…
Add table
Add a link
Reference in a new issue