Start work on lexing

This commit is contained in:
Max Brunsfeld 2013-12-27 17:31:08 -08:00
parent 323184f981
commit a5e39d2512
20 changed files with 719 additions and 334 deletions

View file

@ -19,14 +19,14 @@
1251209B1830145300C9B56A /* rule.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209A1830145300C9B56A /* rule.cpp */; };
125120A018307DEC00C9B56A /* parse_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1251209E18307DEC00C9B56A /* parse_table.cpp */; };
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
129D242C183EB1EB00FE9F71 /* parse_table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */; };
129D242C183EB1EB00FE9F71 /* table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* table_builder.cpp */; };
12D136A4183678A2005F3369 /* repeat.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A2183678A2005F3369 /* repeat.cpp */; };
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64C182DD5FD00FAF50C /* spec_helper.cpp */; };
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12F9A64F182DD6BC00FAF50C /* grammar.cpp */; };
12FD4061185E68470041A84E /* c_code.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD405F185E68470041A84E /* c_code.cpp */; };
12FD4064185E75290041A84E /* generate_parsers.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD4063185E75290041A84E /* generate_parsers.cpp */; };
12FD40B3185EEB5E0041A84E /* seq.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130612182C3A1700FCF928 /* seq.cpp */; };
12FD40B4185EEB5E0041A84E /* parse_table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */; };
12FD40B4185EEB5E0041A84E /* table_builder.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 129D242A183EB1EB00FE9F71 /* table_builder.cpp */; };
12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 125120A3183083BD00C9B56A /* arithmetic.cpp */; };
12FD40B8185EEB5E0041A84E /* item.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130619182C84DF00FCF928 /* item.cpp */; };
12FD40B9185EEB5E0041A84E /* string.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12130615182C3D2900FCF928 /* string.cpp */; };
@ -44,7 +44,7 @@
12FD40CB185EEB5E0041A84E /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; };
12FD40D2185EEB970041A84E /* arithmetic.c in Sources */ = {isa = PBXBuildFile; fileRef = 12FD4065185E7C2F0041A84E /* arithmetic.c */; };
12FD40D5185FEEDB0041A84E /* item_set_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 1213061D182C857100FCF928 /* item_set_spec.cpp */; };
12FD40D6185FEEDB0041A84E /* parse_table_builder_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */; };
12FD40D6185FEEDB0041A84E /* table_builder_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12512092182F307C00C9B56A /* table_builder_spec.cpp */; };
12FD40D7185FEEDB0041A84E /* item_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D1369C18328C5A005F3369 /* item_spec.cpp */; };
12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 121492EA181E200B008E9BDA /* rules_spec.cpp */; };
12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12D136A0183570F5005F3369 /* pattern_spec.cpp */; };
@ -59,6 +59,8 @@
12FD40F01866415D0041A84E /* visitor.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40E618639B910041A84E /* visitor.cpp */; };
12FD40F3186641C00041A84E /* char_match.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F1186641C00041A84E /* char_match.cpp */; };
12FD40F4186641C00041A84E /* char_match.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F1186641C00041A84E /* char_match.cpp */; };
12FD40F7186A16020041A84E /* lex_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F5186A16020041A84E /* lex_table.cpp */; };
12FD40F8186A16030041A84E /* lex_table.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 12FD40F5186A16020041A84E /* lex_table.cpp */; };
27A343CA69E17E0F9EBEDF1C /* pattern.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 27A340F3EEB184C040521323 /* pattern.cpp */; };
/* End PBXBuildFile section */
@ -104,14 +106,14 @@
12130621182C85D300FCF928 /* item_set.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = item_set.h; sourceTree = "<group>"; };
121492E9181E200B008E9BDA /* main.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = main.cpp; path = spec/main.cpp; sourceTree = SOURCE_ROOT; };
121492EA181E200B008E9BDA /* rules_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = rules_spec.cpp; path = spec/compiler/rules/rules_spec.cpp; sourceTree = SOURCE_ROOT; };
12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = parse_table_builder_spec.cpp; path = spec/compiler/lr/parse_table_builder_spec.cpp; sourceTree = SOURCE_ROOT; };
12512092182F307C00C9B56A /* table_builder_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = table_builder_spec.cpp; path = spec/compiler/lr/table_builder_spec.cpp; sourceTree = SOURCE_ROOT; };
1251209A1830145300C9B56A /* rule.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rule.cpp; sourceTree = "<group>"; };
1251209E18307DEC00C9B56A /* parse_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table.cpp; sourceTree = "<group>"; };
1251209F18307DEC00C9B56A /* parse_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table.h; sourceTree = "<group>"; };
125120A218307FFD00C9B56A /* arithmetic.h */ = {isa = PBXFileReference; lastKnownFileType = sourcecode.c.h; name = arithmetic.h; path = spec/fixtures/grammars/arithmetic.h; sourceTree = SOURCE_ROOT; };
125120A3183083BD00C9B56A /* arithmetic.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = arithmetic.cpp; path = spec/fixtures/grammars/arithmetic.cpp; sourceTree = SOURCE_ROOT; };
129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = parse_table_builder.cpp; sourceTree = "<group>"; };
129D242B183EB1EB00FE9F71 /* parse_table_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = parse_table_builder.h; sourceTree = "<group>"; };
129D242A183EB1EB00FE9F71 /* table_builder.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = table_builder.cpp; sourceTree = "<group>"; };
129D242B183EB1EB00FE9F71 /* table_builder.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = table_builder.h; sourceTree = "<group>"; };
12C344421822F27700B07BE3 /* transition_map.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = transition_map.h; sourceTree = "<group>"; };
12D1369C18328C5A005F3369 /* item_spec.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = item_spec.cpp; path = spec/compiler/lr/item_spec.cpp; sourceTree = SOURCE_ROOT; };
12D1369E18342088005F3369 /* todo.md */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = text; path = todo.md; sourceTree = "<group>"; };
@ -140,6 +142,8 @@
12FD40E818641FB70041A84E /* rules.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = rules.cpp; sourceTree = "<group>"; };
12FD40F1186641C00041A84E /* char_match.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = char_match.cpp; sourceTree = "<group>"; };
12FD40F2186641C00041A84E /* char_match.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = char_match.h; sourceTree = "<group>"; };
12FD40F5186A16020041A84E /* lex_table.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = lex_table.cpp; sourceTree = "<group>"; };
12FD40F6186A16020041A84E /* lex_table.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = lex_table.h; sourceTree = "<group>"; };
27A340F3EEB184C040521323 /* pattern.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = pattern.cpp; sourceTree = "<group>"; };
27A3438C4FA59A3882E8493B /* pattern.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = pattern.h; sourceTree = "<group>"; };
/* End PBXFileReference section */
@ -200,10 +204,12 @@
1213061A182C84DF00FCF928 /* item.h */,
12130620182C85D300FCF928 /* item_set.cpp */,
12130621182C85D300FCF928 /* item_set.h */,
12FD40F5186A16020041A84E /* lex_table.cpp */,
12FD40F6186A16020041A84E /* lex_table.h */,
1251209E18307DEC00C9B56A /* parse_table.cpp */,
1251209F18307DEC00C9B56A /* parse_table.h */,
129D242A183EB1EB00FE9F71 /* parse_table_builder.cpp */,
129D242B183EB1EB00FE9F71 /* parse_table_builder.h */,
129D242A183EB1EB00FE9F71 /* table_builder.cpp */,
129D242B183EB1EB00FE9F71 /* table_builder.h */,
);
path = lr;
sourceTree = "<group>";
@ -212,7 +218,7 @@
isa = PBXGroup;
children = (
1213061D182C857100FCF928 /* item_set_spec.cpp */,
12512092182F307C00C9B56A /* parse_table_builder_spec.cpp */,
12512092182F307C00C9B56A /* table_builder_spec.cpp */,
12D1369C18328C5A005F3369 /* item_spec.cpp */,
);
name = lr;
@ -427,7 +433,7 @@
12FD40D7185FEEDB0041A84E /* item_spec.cpp in Sources */,
12FD40D5185FEEDB0041A84E /* item_set_spec.cpp in Sources */,
12130614182C3A1700FCF928 /* seq.cpp in Sources */,
129D242C183EB1EB00FE9F71 /* parse_table_builder.cpp in Sources */,
129D242C183EB1EB00FE9F71 /* table_builder.cpp in Sources */,
125120A4183083BD00C9B56A /* arithmetic.cpp in Sources */,
1213061B182C84DF00FCF928 /* item.cpp in Sources */,
12FD40D9185FEEDF0041A84E /* pattern_spec.cpp in Sources */,
@ -436,6 +442,7 @@
12130611182C3A1100FCF928 /* blank.cpp in Sources */,
1213060E182C398300FCF928 /* choice.cpp in Sources */,
12F9A64E182DD5FD00FAF50C /* spec_helper.cpp in Sources */,
12FD40F7186A16020041A84E /* lex_table.cpp in Sources */,
12FD40E918641FB70041A84E /* rules.cpp in Sources */,
12FD4061185E68470041A84E /* c_code.cpp in Sources */,
12FD40D8185FEEDF0041A84E /* rules_spec.cpp in Sources */,
@ -443,7 +450,7 @@
12FD4064185E75290041A84E /* generate_parsers.cpp in Sources */,
1214930E181E200B008E9BDA /* main.cpp in Sources */,
12F9A651182DD6BC00FAF50C /* grammar.cpp in Sources */,
12FD40D6185FEEDB0041A84E /* parse_table_builder_spec.cpp in Sources */,
12FD40D6185FEEDB0041A84E /* table_builder_spec.cpp in Sources */,
12D136A4183678A2005F3369 /* repeat.cpp in Sources */,
12FD40F3186641C00041A84E /* char_match.cpp in Sources */,
12FD40E718639B910041A84E /* visitor.cpp in Sources */,
@ -460,7 +467,7 @@
buildActionMask = 2147483647;
files = (
12FD40B3185EEB5E0041A84E /* seq.cpp in Sources */,
12FD40B4185EEB5E0041A84E /* parse_table_builder.cpp in Sources */,
12FD40B4185EEB5E0041A84E /* table_builder.cpp in Sources */,
12FD40B6185EEB5E0041A84E /* arithmetic.cpp in Sources */,
12FD40DD185FF12C0041A84E /* parser.c in Sources */,
12FD40B8185EEB5E0041A84E /* item.cpp in Sources */,
@ -471,6 +478,7 @@
12FD40BD185EEB5E0041A84E /* choice.cpp in Sources */,
12FD40DF1860064C0041A84E /* tree.c in Sources */,
12FD40BF185EEB5E0041A84E /* c_code.cpp in Sources */,
12FD40F8186A16030041A84E /* lex_table.cpp in Sources */,
12FD40D2185EEB970041A84E /* arithmetic.c in Sources */,
12FD40DB185FEF0D0041A84E /* arithmetic_spec.cpp in Sources */,
12FD40C0185EEB5E0041A84E /* parse_table.cpp in Sources */,

View file

@ -33,37 +33,60 @@ TSParser TSParserMake(const char *input);
void TSParserShift(TSParser *parser, TSState state);
void TSParserReduce(TSParser *parser, TSSymbol symbol, int child_count);
void TSParserError(TSParser *parser);
TSState TSParserState(const TSParser *parser);
TSState TSParserParseState(const TSParser *parser);
TSState TSParserLexState(const TSParser *parser);
TSSymbol TSParserLookahead(const TSParser *parser);
#pragma mark - DSL
#define START_PARSER() \
TSParser parser = TSParserMake(input); \
start: \
ts_lex(&parser);
#define START_LEXER() \
start:
#define LOOKAHEAD() \
#define LOOKAHEAD_SYM() \
TSParserLookahead(&parser)
#define LOOKAHEAD_CHAR() \
'a'
#define PARSE_STATE() \
TSParserState(&parser)
TSParserParseState(&parser)
#define LEX_STATE() \
TSParserLexState(parser)
#define SHIFT(number) \
{ TSParserShift(&parser, number); break; }
#define ACCEPT() \
#define ADVANCE(state_index) \
{ break; }
#define ACCEPT_INPUT() \
{ goto done; }
#define ACCEPT_TOKEN(symbol) \
{ goto done; }
#define REDUCE(symbol, child_count) \
{ TSParserReduce(&parser, symbol, child_count); break; }
#define ERROR() \
#define PARSE_ERROR() \
TSParserError(&parser)
#define LEX_ERROR() \
TSParserError(parser)
#define FINISH_PARSER() \
done: \
return parser.tree;
#define FINISH_LEXER() \
done:
#endif
#ifdef __cplusplus

View file

@ -1,5 +1,5 @@
#include "spec_helper.h"
#include "parse_table_builder.h"
#include "table_builder.h"
#include "parse_table.h"
#include "c_code.h"
#include <fstream>
@ -13,8 +13,8 @@ Describe(code_generation) {
It(works_for_the_arithmetic_grammar) {
Grammar grammar = test_grammars::arithmetic();
ParseTable table = build_tables(grammar);
pair<ParseTable, LexTable> tables = build_tables(grammar);
std::ofstream parser_file(test_parser_dir + "/arithmetic.c");
parser_file << code_gen::c_code(grammar, table);
parser_file << code_gen::c_code(grammar, tables.first, tables.second);
}
};

View file

@ -1,43 +0,0 @@
#include "spec_helper.h"
#include "parse_table_builder.h"
#include "parse_table.h"
using namespace tree_sitter::lr;
typedef std::unordered_set<ParseAction> actions;
Describe(ParseTableBuilder_test) {
Grammar grammar = test_grammars::arithmetic();
ParseTable table = build_tables(grammar);
It(has_the_right_starting_state) {
AssertThat(table.states[0].actions, Equals(unordered_map<string, actions>({
{ "expression", actions({ ParseAction::Shift(1) }) },
{ "term", actions({ ParseAction::Shift(2) }) },
{ "factor", actions({ ParseAction::Shift(5) }) },
{ "variable", actions({ ParseAction::Shift(8) }) },
{ "number", actions({ ParseAction::Shift(8) }) },
{ "left_paren", actions({ ParseAction::Shift(9) }) }
})));
}
It(accepts_when_the_start_symbol_is_reduced) {
AssertThat(table.states[1].actions, Equals(unordered_map<string, actions>({
{ ParseTable::END_OF_INPUT, actions({ ParseAction::Accept() }) }
})));
}
It(has_the_right_next_states) {
AssertThat(table.states[2].actions, Equals(unordered_map<string, actions>({
{ "plus", actions({ ParseAction::Shift(3) }) },
})));
AssertThat(table.states[3].actions, Equals(unordered_map<string, actions>({
{ "variable", actions({ ParseAction::Shift(8) }) },
{ "factor", actions({ ParseAction::Shift(5) }) },
{ "left_paren", actions({ ParseAction::Shift(9) }) },
{ "number", actions({ ParseAction::Shift(8) }) },
{ "term", actions({ ParseAction::Shift(4) }) },
})));
}
};

View file

@ -0,0 +1,61 @@
#include "spec_helper.h"
#include "parse_table.h"
#include "lex_table.h"
#include "table_builder.h"
using namespace tree_sitter::lr;
typedef std::unordered_set<ParseAction> parse_actions;
typedef std::unordered_set<LexAction> lex_actions;
Describe(TableBuilderSpec) {
Grammar grammar = test_grammars::arithmetic();
ParseTable table = build_tables(grammar).first;
LexTable lex_table = build_tables(grammar).second;
ParseState parse_state(size_t index) {
return table.states[index];
}
LexState lex_state(size_t parse_state_index) {
size_t index = table.states[parse_state_index].lex_state_index;
return lex_table.states[index];
}
It(has_the_right_starting_state) {
AssertThat(parse_state(0).actions, Equals(unordered_map<string, parse_actions>({
{ "expression", parse_actions({ ParseAction::Shift(1) }) },
{ "term", parse_actions({ ParseAction::Shift(2) }) },
{ "factor", parse_actions({ ParseAction::Shift(5) }) },
{ "variable", parse_actions({ ParseAction::Shift(8) }) },
{ "number", parse_actions({ ParseAction::Shift(8) }) },
{ "left_paren", parse_actions({ ParseAction::Shift(9) }) }
})));
AssertThat(lex_state(0).actions, Equals(unordered_map<CharMatch, lex_actions>({
{ CharMatchClass(CharClassWord), lex_actions({ LexAction::Advance(1) }) },
{ CharMatchClass(CharClassDigit), lex_actions({ LexAction::Advance(4) }) },
{ CharMatchSpecific('('), lex_actions({ LexAction::Advance(11) }) }
})));
}
It(accepts_when_the_start_symbol_is_reduced) {
AssertThat(parse_state(1).actions, Equals(unordered_map<string, parse_actions>({
{ ParseTable::END_OF_INPUT, parse_actions({ ParseAction::Accept() }) }
})));
}
It(has_the_right_next_states) {
AssertThat(parse_state(2).actions, Equals(unordered_map<string, parse_actions>({
{ "plus", parse_actions({ ParseAction::Shift(3) }) },
})));
AssertThat(parse_state(3).actions, Equals(unordered_map<string, parse_actions>({
{ "variable", parse_actions({ ParseAction::Shift(8) }) },
{ "factor", parse_actions({ ParseAction::Shift(5) }) },
{ "left_paren", parse_actions({ ParseAction::Shift(9) }) },
{ "number", parse_actions({ ParseAction::Shift(8) }) },
{ "term", parse_actions({ ParseAction::Shift(4) }) },
})));
}
};

View file

@ -3,14 +3,16 @@
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const unordered_map<string, unordered_set<ParseAction>> &map) {
template<typename TKey, typename TValue>
std::ostream & stream_map_of_sets(std::ostream &stream, const unordered_map<TKey, unordered_set<TValue>> &map) {
stream << string("{");
bool started = false;
for (auto pair : map) {
if (started) stream << string(", ");
stream << string("{") << pair.first << string(", [");
stream << pair.first;
stream << string(" => [");
bool started_set = false;
for (ParseAction action : pair.second) {
for (TValue action : pair.second) {
if (started_set) stream << ", ";
stream << action;
started_set = true;
@ -18,9 +20,16 @@ namespace tree_sitter {
stream << string("]}");
started = true;
}
stream << string("}");
return stream;
}
std::ostream& operator<<(std::ostream &stream, const unordered_map<string, unordered_set<ParseAction>> &map) {
return stream_map_of_sets(stream, map);
}
std::ostream& operator<<(std::ostream &stream, const unordered_map<CharMatch, unordered_set<LexAction>> &map) {
return stream_map_of_sets(stream, map);
}
}
}

View file

@ -9,7 +9,7 @@
#include "item_set.h"
#include "grammar.h"
#include "parse_table.h"
#include "parse_table_builder.h"
#include "table_builder.h"
#include "../fixtures/grammars/arithmetic.h"
@ -21,6 +21,7 @@ using namespace igloo;
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const unordered_map<string, unordered_set<ParseAction>> &map);
std::ostream& operator<<(std::ostream &stream, const unordered_map<CharMatch, unordered_set<LexAction>> &map);
}
}

View file

@ -1,208 +1,251 @@
#include "runtime.h"
#include <stdlib.h>
#include <ctype.h>
typedef enum {
ts_symbol_type_expression,
ts_symbol_type_term,
ts_symbol_type_right_paren,
ts_symbol_type_number,
ts_symbol_type_factor,
ts_symbol_type_variable,
ts_symbol_type_plus,
ts_symbol_type_times,
ts_symbol_type_left_paren,
ts_symbol_type___END__
} ts_symbol_type;
ts_symbol_expression,
ts_symbol_term,
ts_symbol_right_paren,
ts_symbol_number,
ts_symbol_factor,
ts_symbol_variable,
ts_symbol_plus,
ts_symbol_times,
ts_symbol_left_paren,
ts_symbol___END__
} ts_symbol;
static void ts_lex(TSParser *parser) {
START_LEXER();
switch (LEX_STATE()) {
case 0:
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(11);
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(4);
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(1);
LEX_ERROR();
case 1:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(2);
LEX_ERROR();
case 2:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(3);
LEX_ERROR();
case 3:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(1);
LEX_ERROR();
case 4:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(5);
LEX_ERROR();
case 5:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(6);
LEX_ERROR();
case 6:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(7);
LEX_ERROR();
case 7:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(8);
LEX_ERROR();
case 8:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(9);
LEX_ERROR();
case 9:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(10);
LEX_ERROR();
case 10:
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(6);
LEX_ERROR();
case 11:
ACCEPT_TOKEN(ts_symbol_left_paren);
case 12:
ACCEPT_TOKEN(ts_symbol___START__);
case 13:
if (LOOKAHEAD_CHAR() == '+')
ADVANCE(14);
LEX_ERROR();
case 14:
ACCEPT_TOKEN(ts_symbol_plus);
case 15:
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(11);
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(4);
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(16);
LEX_ERROR();
case 16:
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(2);
LEX_ERROR();
case 17:
ACCEPT_TOKEN(ts_symbol_expression);
case 18:
if (LOOKAHEAD_CHAR() == '*')
ADVANCE(19);
LEX_ERROR();
case 19:
ACCEPT_TOKEN(ts_symbol_times);
case 20:
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(11);
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(4);
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(16);
LEX_ERROR();
case 21:
ACCEPT_TOKEN(ts_symbol_term);
case 22:
ACCEPT_TOKEN(ts_symbol_factor);
case 23:
if (LOOKAHEAD_CHAR() == '(')
ADVANCE(11);
if (isdigit(LOOKAHEAD_CHAR()))
ADVANCE(4);
if (isalnum(LOOKAHEAD_CHAR()))
ADVANCE(16);
LEX_ERROR();
case 24:
if (LOOKAHEAD_CHAR() == ')')
ADVANCE(25);
LEX_ERROR();
case 25:
ACCEPT_TOKEN(ts_symbol_right_paren);
case 26:
ACCEPT_TOKEN(ts_symbol_factor);
default:
LEX_ERROR();
}
FINISH_LEXER();
}
TSTree ts_parse_arithmetic(const char *input) {
START_PARSER();
switch (PARSE_STATE()) {
case 0:
switch (LOOKAHEAD()) {
case ts_symbol_type_left_paren:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_left_paren:
SHIFT(9);
case ts_symbol_type_variable:
case ts_symbol_variable:
SHIFT(8);
case ts_symbol_type_factor:
case ts_symbol_factor:
SHIFT(5);
case ts_symbol_type_number:
case ts_symbol_number:
SHIFT(8);
case ts_symbol_type_term:
case ts_symbol_term:
SHIFT(2);
case ts_symbol_type_expression:
case ts_symbol_expression:
SHIFT(1);
default:
ERROR();
PARSE_ERROR();
}
case 1:
switch (LOOKAHEAD()) {
case ts_symbol_type___END__:
ACCEPT();
switch (LOOKAHEAD_SYM()) {
case ts_symbol___END__:
ACCEPT_INPUT();
default:
ERROR();
PARSE_ERROR();
}
case 2:
switch (LOOKAHEAD()) {
case ts_symbol_type_plus:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_plus:
SHIFT(3);
default:
ERROR();
PARSE_ERROR();
}
case 3:
switch (LOOKAHEAD()) {
case ts_symbol_type_variable:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_variable:
SHIFT(8);
case ts_symbol_type_factor:
case ts_symbol_factor:
SHIFT(5);
case ts_symbol_type_left_paren:
case ts_symbol_left_paren:
SHIFT(9);
case ts_symbol_type_number:
case ts_symbol_number:
SHIFT(8);
case ts_symbol_type_term:
case ts_symbol_term:
SHIFT(4);
default:
ERROR();
PARSE_ERROR();
}
case 4:
switch (LOOKAHEAD()) {
case ts_symbol_type_expression:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_term:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_right_paren:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_number:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_factor:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_variable:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_times:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_plus:
REDUCE(ts_symbol_type_expression, 3);
case ts_symbol_type_left_paren:
REDUCE(ts_symbol_type_expression, 3);
switch (LOOKAHEAD_SYM()) {
default:
ERROR();
REDUCE(ts_symbol_expression, 3);
}
case 5:
switch (LOOKAHEAD()) {
case ts_symbol_type_times:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_times:
SHIFT(6);
default:
ERROR();
PARSE_ERROR();
}
case 6:
switch (LOOKAHEAD()) {
case ts_symbol_type_left_paren:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_left_paren:
SHIFT(9);
case ts_symbol_type_number:
case ts_symbol_number:
SHIFT(8);
case ts_symbol_type_variable:
case ts_symbol_variable:
SHIFT(8);
case ts_symbol_type_factor:
case ts_symbol_factor:
SHIFT(7);
default:
ERROR();
PARSE_ERROR();
}
case 7:
switch (LOOKAHEAD()) {
case ts_symbol_type_expression:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_term:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_right_paren:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_number:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_factor:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_variable:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_times:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_plus:
REDUCE(ts_symbol_type_term, 3);
case ts_symbol_type_left_paren:
REDUCE(ts_symbol_type_term, 3);
switch (LOOKAHEAD_SYM()) {
default:
ERROR();
REDUCE(ts_symbol_term, 3);
}
case 8:
switch (LOOKAHEAD()) {
case ts_symbol_type_expression:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_term:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_right_paren:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_number:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_factor:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_variable:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_times:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_plus:
REDUCE(ts_symbol_type_factor, 1);
case ts_symbol_type_left_paren:
REDUCE(ts_symbol_type_factor, 1);
switch (LOOKAHEAD_SYM()) {
default:
ERROR();
REDUCE(ts_symbol_factor, 1);
}
case 9:
switch (LOOKAHEAD()) {
case ts_symbol_type_left_paren:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_left_paren:
SHIFT(9);
case ts_symbol_type_variable:
case ts_symbol_variable:
SHIFT(8);
case ts_symbol_type_factor:
case ts_symbol_factor:
SHIFT(5);
case ts_symbol_type_number:
case ts_symbol_number:
SHIFT(8);
case ts_symbol_type_term:
case ts_symbol_term:
SHIFT(2);
case ts_symbol_type_expression:
case ts_symbol_expression:
SHIFT(10);
default:
ERROR();
PARSE_ERROR();
}
case 10:
switch (LOOKAHEAD()) {
case ts_symbol_type_right_paren:
switch (LOOKAHEAD_SYM()) {
case ts_symbol_right_paren:
SHIFT(11);
default:
ERROR();
PARSE_ERROR();
}
case 11:
switch (LOOKAHEAD()) {
case ts_symbol_type_expression:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_term:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_right_paren:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_number:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_factor:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_variable:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_times:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_plus:
REDUCE(ts_symbol_type_factor, 3);
case ts_symbol_type_left_paren:
REDUCE(ts_symbol_type_factor, 3);
switch (LOOKAHEAD_SYM()) {
default:
ERROR();
REDUCE(ts_symbol_factor, 3);
}
default:
ERROR();
PARSE_ERROR();
}
FINISH_PARSER();
}

View file

@ -10,13 +10,13 @@ namespace tree_sitter {
}
CharMatch CharMatchClass(CharClass value) {
CharMatch result = { .type = CharMatchTypeSpecific };
CharMatch result = { .type = CharMatchTypeClass };
result.value.character = value;
return result;
}
CharMatch CharMatchRange(char min, char max) {
CharMatch result = { .type = CharMatchTypeSpecific };
CharMatch result = { .type = CharMatchTypeRange };
result.value.range.min_character = min;
result.value.range.max_character = max;
return result;

View file

@ -1,6 +1,7 @@
#ifndef __TreeSitter__char_match__
#define __TreeSitter__char_match__
#include <unordered_map>
#include <string>
namespace tree_sitter {
@ -36,4 +37,16 @@ namespace tree_sitter {
std::ostream& operator<<(std::ostream& stream, const CharMatch &rule);
}
namespace std {
template<>
struct hash<tree_sitter::CharMatch> {
size_t operator()(const tree_sitter::CharMatch &match) const {
return (
hash<int>()(match.type) ^
hash<char>()(match.value.range.min_character) ^
hash<char>()(match.value.range.max_character));
}
};
}
#endif

View file

@ -4,6 +4,7 @@
#include <unordered_set>
using std::string;
using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
@ -22,7 +23,7 @@ namespace tree_sitter {
}
}
string indent(std::string input) {
string indent(string input) {
string tab = " ";
str_replace(input, "\n", "\n" + tab);
return tab + input;
@ -44,48 +45,108 @@ namespace tree_sitter {
indent(body);
}
string _if(string condition, string body) {
return string("if (") + condition + ")\n" +
indent(body) + "\n";
}
class CCodeGenerator {
const Grammar grammar;
const ParseTable parse_table;
const LexTable lex_table;
public:
CCodeGenerator(const Grammar &grammar, const ParseTable &parse_table) :
CCodeGenerator(const Grammar &grammar, const ParseTable &parse_table, const LexTable &lex_table) :
grammar(grammar),
parse_table(parse_table)
parse_table(parse_table),
lex_table(lex_table)
{}
string symbol_id(string symbol_name) {
return "ts_symbol_type_" + symbol_name;
return "ts_symbol_" + symbol_name;
}
string code_for_actions(const unordered_set<ParseAction> &actions) {
auto action = *actions.begin();
switch (action.type) {
case ParseActionTypeAccept:
return "ACCEPT();";
case ParseActionTypeShift:
return "SHIFT(" + std::to_string(action.state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action.symbol_name) + ", " + std::to_string(action.child_symbol_count) + ");";
string condition_for_char_match(const CharMatch &char_match) {
auto value = "LOOKAHEAD_CHAR()";
switch (char_match.type) {
case CharMatchTypeClass:
switch (char_match.value.character_class) {
case CharClassDigit:
return string("isdigit(") + value + ")";
case CharClassWord:
return string("isalnum(") + value + ")";
}
case CharMatchTypeSpecific:
return string(value) + " == '" + char_match.value.character + "'";
default:
return "";
}
}
string switch_on_lookahead(const ParseState &parse_state) {
string code_for_parse_actions(const unordered_set<ParseAction> &actions) {
auto action = actions.begin();
if (action == actions.end()) {
return "PARSE_ERROR();";
} else {
switch (action->type) {
case ParseActionTypeAccept:
return "ACCEPT_INPUT();";
case ParseActionTypeShift:
return "SHIFT(" + to_string(action->state_index) + ");";
case ParseActionTypeReduce:
return "REDUCE(" + symbol_id(action->symbol_name) + ", " + std::to_string(action->child_symbol_count) + ");";
default:
return "";
}
}
}
string code_for_lex_actions(const unordered_set<LexAction> &actions) {
auto action = actions.begin();
if (action == actions.end()) {
return "LEX_ERROR();";
} else {
switch (action->type) {
case LexActionTypeAdvance:
return "ADVANCE(" + to_string(action->state_index) + ");";
case LexActionTypeAccept:
return "ACCEPT_TOKEN(" + symbol_id(action->symbol_name) + ");";
case LexActionTypeError:
return "";
}
}
}
string switch_on_lookahead_sym(const ParseState &parse_state) {
string body = "";
for (auto pair : parse_state.actions)
body += _case(symbol_id(pair.first), code_for_actions(pair.second));
body += _default("ERROR();");
return _switch("LOOKAHEAD()", body);
body += _case(symbol_id(pair.first), code_for_parse_actions(pair.second));
body += _default(code_for_parse_actions(parse_state.default_actions));
return _switch("LOOKAHEAD_SYM()", body);
}
string switch_on_current_state(const ParseTable &parse_table) {
string switch_on_lookahead_char(const LexState &parse_state) {
string result = "";
for (auto pair : parse_state.actions)
result += _if(condition_for_char_match(pair.first), code_for_lex_actions(pair.second));
result += code_for_lex_actions(parse_state.default_actions);
return result;
}
string switch_on_parse_state() {
string body = "";
for (int i = 0; i < parse_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead(parse_table.states[i]));
body += _default("ERROR();");
body += _case(std::to_string(i), switch_on_lookahead_sym(parse_table.states[i]));
body += _default("PARSE_ERROR();");
return _switch("PARSE_STATE()", body);
}
string switch_on_lex_state() {
string body = "";
for (int i = 0; i < lex_table.states.size(); i++)
body += _case(std::to_string(i), switch_on_lookahead_char(lex_table.states[i]));
body += _default("LEX_ERROR();");
return _switch("LEX_STATE()", body);
}
string symbol_enum() {
string result = "typedef enum {\n";
@ -93,32 +154,45 @@ namespace tree_sitter {
result += indent(symbol_id(rule_name)) + ",\n";
result += indent(symbol_id(ParseTable::END_OF_INPUT));
return result + "\n"
"} ts_symbol_type;\n";
"} ts_symbol;\n";
}
string includes() {
return string(
"#include \"runtime.h\"\n"
"#include <stdlib.h>\n"
"#include <ctype.h>");
}
string parse_function() {
return
"TSTree ts_parse_arithmetic(const char *input) {\n" +
indent("START_PARSER();") + "\n" +
indent(switch_on_current_state(parse_table)) + "\n" +
indent(switch_on_parse_state()) + "\n" +
indent("FINISH_PARSER();") + "\n"
"}";
}
string lex_function() {
return
"static void ts_lex(TSParser *parser) {\n" +
indent("START_LEXER();") + "\n" +
indent(switch_on_lex_state()) + "\n" +
indent("FINISH_LEXER();") + "\n"
"}";
}
string code() {
return
"#include \"runtime.h\"\n"
"#include <stdlib.h>\n"
"\n\n" +
symbol_enum() +
"\n\n" +
parse_function() +
"\n";
return
includes() + "\n\n" +
symbol_enum() + "\n\n" +
lex_function() + "\n\n" +
parse_function() + "\n";
}
};
string c_code(const Grammar &grammar, const ParseTable &parse_table) {
return CCodeGenerator(grammar, parse_table).code();
string c_code(const Grammar &grammar, const ParseTable &parse_table, const LexTable &lex_table) {
return CCodeGenerator(grammar, parse_table, lex_table).code();
}
}
}

View file

@ -3,10 +3,11 @@
#include "grammar.h"
#include "parse_table.h"
#include "lex_table.h"
namespace tree_sitter {
namespace code_gen {
std::string c_code(const Grammar &grammar, const lr::ParseTable &parse_table);
std::string c_code(const Grammar &grammar, const lr::ParseTable &parse_table, const lr::LexTable &lex_table);
}
}

View file

@ -0,0 +1,67 @@
#include "lex_table.h"
using std::string;
using std::to_string;
using std::unordered_map;
using std::unordered_set;
using std::vector;
namespace tree_sitter {
namespace lr {
// Action
LexAction::LexAction(LexActionType type, size_t state_index, std::string symbol_name) :
type(type),
state_index(state_index),
symbol_name(symbol_name) {}
LexAction LexAction::Error() {
return LexAction(LexActionTypeError, -1, "");
}
LexAction LexAction::Advance(size_t state_index) {
return LexAction(LexActionTypeAdvance, state_index, "");
}
LexAction LexAction::Accept(std::string symbol_name) {
return LexAction(LexActionTypeAccept, -1, symbol_name);
}
bool LexAction::operator==(const LexAction &other) const {
return
(type == other.type) &&
(state_index == other.state_index) &&
(symbol_name == other.symbol_name);
}
std::ostream& operator<<(std::ostream &stream, const LexAction &action) {
switch (action.type) {
case LexActionTypeError:
return stream << string("(error)");
case LexActionTypeAccept:
return stream << string("(accept ") + action.symbol_name + ")";
case LexActionTypeAdvance:
return stream << string("(advance ") + to_string(action.state_index) + ")";
}
}
// State
LexState::LexState() : actions(unordered_map<CharMatch, unordered_set<LexAction>>()) {}
// Table
LexTable::LexTable(vector<string> rule_names) : symbol_names(rule_names) {}
size_t LexTable::add_state() {
states.push_back(LexState());
return states.size() - 1;
}
void LexTable::add_action(size_t state_index, CharMatch match, LexAction action) {
states[state_index].actions[match].insert(action);
}
void LexTable::add_default_action(size_t state_index, LexAction action) {
states[state_index].default_actions.insert(action);
}
}
}

View file

@ -0,0 +1,72 @@
#ifndef __TreeSitter__lex_table__
#define __TreeSitter__lex_table__
#include <unordered_map>
#include <vector>
#include <string>
#include <unordered_set>
#include "char_match.h"
namespace tree_sitter {
namespace lr {
typedef enum {
LexActionTypeAccept,
LexActionTypeError,
LexActionTypeAdvance
} LexActionType;
class LexAction {
LexAction(LexActionType type, size_t state_index, std::string symbol_name);
public:
static LexAction Accept(std::string symbol_name);
static LexAction Error();
static LexAction Advance(size_t state_index);
bool operator==(const LexAction &action) const;
LexActionType type;
std::string symbol_name;
size_t state_index;
};
}
}
namespace std {
template<>
struct hash<tree_sitter::lr::LexAction> {
size_t operator()(const tree_sitter::lr::LexAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index));
}
};
}
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const LexAction &item);
class LexState {
public:
LexState();
std::unordered_map<CharMatch, std::unordered_set<LexAction>> actions;
std::unordered_set<LexAction> default_actions;
};
class LexTable {
public:
LexTable(std::vector<std::string> rule_names);
size_t add_state();
void add_action(size_t state_index, CharMatch match, LexAction action);
void add_default_action(size_t state_index, LexAction action);
static const std::string START;
static const std::string END_OF_INPUT;
std::vector<LexState> states;
const std::vector<std::string> symbol_names;
};
}
}
#endif

View file

@ -48,7 +48,11 @@ namespace tree_sitter {
}
// State
ParseState::ParseState() : actions(unordered_map<string, unordered_set<ParseAction>>()) {}
ParseState::ParseState() :
actions(unordered_map<string, unordered_set<ParseAction>>()),
default_actions(unordered_set<ParseAction>()),
lex_state_index(-1)
{}
// Table
ParseTable::ParseTable(vector<string> symbol_names) :
@ -63,8 +67,12 @@ namespace tree_sitter {
void ParseTable::add_action(size_t state_index, string sym_name, ParseAction action) {
states[state_index].actions[sym_name].insert(action);
}
void ParseTable::add_default_action(size_t state_index, ParseAction action) {
states[state_index].default_actions.insert(action);
}
const string ParseTable::START = "__START__";
const string ParseTable::END_OF_INPUT = "__END__";
}
}
}

View file

@ -29,13 +29,32 @@ namespace tree_sitter {
std::string symbol_name;
size_t state_index;
};
}
}
namespace std {
template<>
struct hash<tree_sitter::lr::ParseAction> {
size_t operator()(const tree_sitter::lr::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
}
};
}
namespace tree_sitter {
namespace lr {
std::ostream& operator<<(std::ostream &stream, const ParseAction &item);
class ParseState {
public:
ParseState();
std::unordered_map<std::string, std::unordered_set<ParseAction>> actions;
std::unordered_set<ParseAction> default_actions;
size_t lex_state_index;
};
class ParseTable {
@ -44,6 +63,7 @@ namespace tree_sitter {
size_t add_state();
void add_action(size_t state_index, std::string symbol_name, ParseAction action);
void add_default_action(size_t state_index, ParseAction action);
static const std::string START;
static const std::string END_OF_INPUT;
@ -53,17 +73,4 @@ namespace tree_sitter {
}
}
namespace std {
template<>
struct hash<tree_sitter::lr::ParseAction> {
size_t operator()(const tree_sitter::lr::ParseAction &action) const {
return (
hash<int>()(action.type) ^
hash<string>()(action.symbol_name) ^
hash<size_t>()(action.state_index) ^
hash<size_t>()(action.child_symbol_count));
}
};
}
#endif

View file

@ -1,78 +0,0 @@
#include "parse_table_builder.h"
#include <unordered_map>
#include "item_set.h"
#include "rules.h"
#include "item_set.h"
#include "grammar.h"
using namespace std;
namespace tree_sitter {
namespace lr {
static int NOT_FOUND = -1;
class ParseTableBuilder {
const Grammar grammar;
std::unordered_map<const ItemSet, size_t> state_indices;
ParseTable table;
long state_index_for_item_set(const ItemSet &item_set) const {
auto entry = state_indices.find(item_set);
return (entry == state_indices.end()) ? NOT_FOUND : entry->second;
}
void add_shift_actions(const ItemSet &item_set, size_t state_index) {
for (auto transition : item_set.sym_transitions(grammar)) {
rules::Symbol symbol = *transition.first;
ItemSet item_set = *transition.second;
size_t new_state_index = add_item_set(item_set);
table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index));
}
}
void add_reduce_actions(const ItemSet &item_set, size_t state_index) {
for (Item item : item_set) {
if (item.is_done()) {
if (item.rule_name == ParseTable::START) {
table.add_action(state_index, ParseTable::END_OF_INPUT, ParseAction::Accept());
} else {
for (string rule_name : table.symbol_names)
table.add_action(state_index, rule_name, ParseAction::Reduce(item.rule_name, item.consumed_sym_count));
}
}
}
}
size_t add_item_set(const ItemSet &item_set) {
auto state_index = state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = table.add_state();
state_indices[item_set] = state_index;
add_shift_actions(item_set, state_index);
add_reduce_actions(item_set, state_index);
}
return state_index;
}
public:
ParseTableBuilder(const Grammar &grammar) :
grammar(grammar),
table(ParseTable(grammar.rule_names())),
state_indices(unordered_map<const ItemSet, size_t>())
{};
ParseTable build() {
auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0);
auto item_set = ItemSet(item, grammar);
add_item_set(item_set);
return table;
}
};
ParseTable build_tables(const tree_sitter::Grammar &grammar) {
return ParseTableBuilder(grammar).build();
}
}
}

View file

@ -0,0 +1,115 @@
#include "table_builder.h"
#include <unordered_map>
#include "item_set.h"
#include "rules.h"
#include "item_set.h"
#include "grammar.h"
using namespace std;
namespace tree_sitter {
namespace lr {
static int NOT_FOUND = -1;
class ParseTableBuilder {
const Grammar grammar;
std::unordered_map<const ItemSet, size_t> parse_state_indices;
std::unordered_map<const ItemSet, size_t> lex_state_indices;
ParseTable parse_table;
LexTable lex_table;
long parse_state_index_for_item_set(const ItemSet &item_set) const {
auto entry = parse_state_indices.find(item_set);
return (entry == parse_state_indices.end()) ? NOT_FOUND : entry->second;
}
long lex_state_index_for_item_set(const ItemSet &item_set) const {
auto entry = lex_state_indices.find(item_set);
return (entry == lex_state_indices.end()) ? NOT_FOUND : entry->second;
}
void add_shift_actions(const ItemSet &item_set, size_t state_index) {
for (auto transition : item_set.sym_transitions(grammar)) {
rules::Symbol symbol = *transition.first;
ItemSet item_set = *transition.second;
size_t new_state_index = add_parse_state(item_set);
parse_table.add_action(state_index, symbol.name, ParseAction::Shift(new_state_index));
}
}
void add_advance_actions(const ItemSet &item_set, size_t state_index) {
for (auto transition : item_set.char_transitions(grammar)) {
rules::Character rule = *transition.first;
ItemSet item_set = *transition.second;
size_t new_state_index = add_lex_state(item_set);
lex_table.add_action(state_index, rule.value, LexAction::Advance(new_state_index));
}
}
void add_accept_token_actions(const ItemSet &item_set, size_t state_index) {
for (Item item : item_set) {
if (item.is_done()) {
lex_table.add_default_action(state_index, LexAction::Accept(item.rule_name));
}
}
}
void add_reduce_actions(const ItemSet &item_set, size_t state_index) {
for (Item item : item_set) {
if (item.is_done()) {
if (item.rule_name == ParseTable::START) {
parse_table.add_action(state_index, ParseTable::END_OF_INPUT, ParseAction::Accept());
} else {
parse_table.add_default_action(state_index, ParseAction::Reduce(item.rule_name, item.consumed_sym_count));
}
}
}
}
size_t add_lex_state(const ItemSet &item_set) {
auto state_index = lex_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = lex_table.add_state();
lex_state_indices[item_set] = state_index;
add_advance_actions(item_set, state_index);
add_accept_token_actions(item_set, state_index);
}
return state_index;
}
size_t add_parse_state(const ItemSet &item_set) {
auto state_index = parse_state_index_for_item_set(item_set);
if (state_index == NOT_FOUND) {
state_index = parse_table.add_state();
parse_state_indices[item_set] = state_index;
parse_table.states[state_index].lex_state_index = add_lex_state(item_set);
add_shift_actions(item_set, state_index);
add_reduce_actions(item_set, state_index);
}
return state_index;
}
public:
ParseTableBuilder(const Grammar &grammar) :
grammar(grammar),
parse_table(ParseTable(grammar.rule_names())),
lex_table(LexTable(grammar.rule_names())),
parse_state_indices(unordered_map<const ItemSet, size_t>()),
lex_state_indices(unordered_map<const ItemSet, size_t>())
{};
std::pair<ParseTable, LexTable> build() {
auto item = Item(ParseTable::START, rules::sym(grammar.start_rule_name), 0);
auto item_set = ItemSet(item, grammar);
add_parse_state(item_set);
return std::pair<ParseTable, LexTable>(parse_table, lex_table);
}
};
std::pair<ParseTable, LexTable> build_tables(const tree_sitter::Grammar &grammar) {
return ParseTableBuilder(grammar).build();
}
}
}

View file

@ -2,12 +2,13 @@
#define __TreeSitter__parse_table_builder__
#include "parse_table.h"
#include "lex_table.h"
namespace tree_sitter {
class Grammar;
namespace lr {
ParseTable build_tables(const Grammar &grammar);
std::pair<ParseTable, LexTable> build_tables(const Grammar &grammar);
}
}

View file

@ -36,7 +36,10 @@ TSSymbol TSParserLookahead(const TSParser *parser) {
return 1;
}
TSState TSParserState(const TSParser *parser) {
TSState TSParserParseState(const TSParser *parser) {
return 5;
}
TSState TSParserLexState(const TSParser *parser) {
return 5;
}