Revert "Remove the separator characters construct"

This reverts commit 5cd07648fd.

The separators construct is useful as an optimization. It turns out that
constructing a node for every chunk of whitespace in a document causes a
significant performance regression.

Conflicts:
	src/compiler/build_tables/build_lex_table.cc
	src/compiler/grammar.cc
	src/runtime/parser.c
This commit is contained in:
Max Brunsfeld 2014-09-02 07:41:29 -07:00
parent e941f8c175
commit 545e575508
43 changed files with 9065 additions and 11203 deletions

View file

@ -34,6 +34,7 @@ std::ostream &operator<<(std::ostream &stream, const rules::rule_ptr &rule);
class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr> > rules_;
std::set<std::string> ubiquitous_tokens_;
std::set<char> separators_;
public:
Grammar(const std::vector<std::pair<std::string, rules::rule_ptr> > &rules);
@ -43,6 +44,8 @@ class Grammar {
const std::vector<std::pair<std::string, rules::rule_ptr> > &rules() const;
const std::set<std::string> &ubiquitous_tokens() const;
Grammar &ubiquitous_tokens(const std::set<std::string> &ubiquitous_tokens);
const std::set<char> &separators() const;
Grammar &separators(const std::set<char> &separators);
};
struct Conflict {

View file

@ -20,6 +20,7 @@ typedef struct TSLexer {
size_t chunk_size;
size_t position_in_chunk;
size_t token_end_position;
size_t token_start_position;
TSTree *(*accept_fn)(struct TSLexer *, TSSymbol, int);
int (*advance_fn)(struct TSLexer *);
@ -33,6 +34,10 @@ static inline char ts_lexer_lookahead_char(const TSLexer *lexer) {
return lexer->chunk[lexer->position_in_chunk];
}
static inline void ts_lexer_start_token(TSLexer *lexer) {
lexer->token_start_position = ts_lexer_position(lexer);
}
static inline int ts_lexer_advance(TSLexer *lexer) {
return lexer->advance_fn(lexer);
}
@ -101,6 +106,8 @@ struct TSLanguage {
lookahead = ts_lexer_lookahead_char(lexer); \
DEBUG_LEX("CHAR '%c'", lookahead);
#define START_TOKEN() ts_lexer_start_token(lexer);
#define ADVANCE(state_index) \
{ \
DEBUG_LEX("ADVANCE %d", state_index); \