diff --git a/spec/compiler/build_tables/rule_transitions_spec.cc b/spec/compiler/build_tables/rule_transitions_spec.cc index c0bb7e2e..fe132b1d 100644 --- a/spec/compiler/build_tables/rule_transitions_spec.cc +++ b/spec/compiler/build_tables/rule_transitions_spec.cc @@ -101,15 +101,15 @@ describe("char_transitions", []() { AssertThat( char_transitions(choice({ seq({ - character({ {'a', 's'} }), + character({ 'a', 'b', 'c', 'd' }), sym("x") }), seq({ - character({ { 'm', 'z' } }), + character({ 'c', 'd', 'e', 'f' }), sym("y") }) })), Equals(rule_map({ - { CharacterSet().include('a','l'), sym("x") }, - { CharacterSet().include('m','s'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('t','z'), sym("y") }, + { CharacterSet().include('a', 'b'), sym("x") }, + { CharacterSet().include('c', 'd'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('e', 'f'), sym("y") }, }))); }); @@ -143,27 +143,27 @@ describe("char_transitions", []() { AssertThat( char_transitions(choice({ seq({ - character({ {'a', 'c'} }), + character({ 'b', 'c', 'd' }), sym("x") }), seq({ - character({ { 'a', 'z' } }), + character({ 'a', 'b', 'c', 'd', 'e', 'f' }), sym("y") }) })), Equals(rule_map({ - { CharacterSet().include('a', 'c'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('d', 'z'), sym("y") }, + { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('a').include('e', 'f'), sym("y") }, }))); AssertThat( char_transitions(choice({ seq({ - character({ { 'a', 'z' } }), + character({ 'a', 'b', 'c', 'd', 'e', 'f' }), sym("x") }), seq({ - character({ {'a', 'c'} }), + character({ 'b', 'c', 'd' }), sym("y") }) })), Equals(rule_map({ - { CharacterSet().include('a', 'c'), choice({ sym("x"), sym("y") }) }, - { CharacterSet().include('d', 'z'), sym("x") }, + { CharacterSet().include('b', 'd'), choice({ sym("x"), sym("y") }) }, + { CharacterSet().include('a').include('e', 'f'), sym("x") }, }))); }); diff --git a/spec/compiler/helpers/rule_helpers.cc b/spec/compiler/helpers/rule_helpers.cc index 60a7c616..042793eb 100644 --- a/spec/compiler/helpers/rule_helpers.cc +++ b/spec/compiler/helpers/rule_helpers.cc @@ -8,19 +8,19 @@ namespace tree_sitter { using std::map; namespace rules { - rule_ptr character(const set &ranges) { + rule_ptr character(const set &ranges) { return character(ranges, true); } - rule_ptr character(const set &ranges, bool sign) { + rule_ptr character(const set &chars, bool sign) { CharacterSet result; if (sign) { - for (auto &range : ranges) - result.include(range.min, range.max); + for (uint32_t c : chars) + result.include(c); } else { result.include_all(); - for (auto &range : ranges) - result.exclude(range.min, range.max); + for (uint32_t c : chars) + result.exclude(c); } return result.copy(); } diff --git a/spec/compiler/helpers/rule_helpers.h b/spec/compiler/helpers/rule_helpers.h index 57cb02c4..a18318cb 100644 --- a/spec/compiler/helpers/rule_helpers.h +++ b/spec/compiler/helpers/rule_helpers.h @@ -8,8 +8,8 @@ namespace tree_sitter { namespace rules { rule_ptr metadata(rule_ptr, std::map); - rule_ptr character(const std::set &ranges); - rule_ptr character(const std::set &ranges, bool sign); + rule_ptr character(const std::set &); + rule_ptr character(const std::set &, bool sign); rule_ptr i_sym(size_t index); rule_ptr i_aux_sym(size_t index); rule_ptr i_token(size_t index); diff --git a/spec/compiler/prepare_grammar/parse_regex_spec.cc b/spec/compiler/prepare_grammar/parse_regex_spec.cc index 743872b9..2b1f530b 100644 --- a/spec/compiler/prepare_grammar/parse_regex_spec.cc +++ b/spec/compiler/prepare_grammar/parse_regex_spec.cc @@ -30,9 +30,15 @@ describe("parse_regex", []() { "character classes", "\\w-\\d", seq({ - character({ {'a', 'z'}, {'A', 'Z'}, {'0', '9'} }), + character({ + 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', + 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', + 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + }), character({ '-' }), - character({ {'0', '9'} }) }) + character({ '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }) }) }, { @@ -66,13 +72,16 @@ describe("parse_regex", []() { { "character ranges", "[12a-dA-D3]", - character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, }) + character({ + '1', '2', '3', + 'a', 'b', 'c', 'd', + 'A', 'B', 'C', 'D' }) }, { "negated characters", "[^a\\d]", - character({ {'a'}, {'0', '9'} }, false) + character({ 'a', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9' }, false) }, { diff --git a/src/compiler/prepare_grammar/extract_tokens.cc b/src/compiler/prepare_grammar/extract_tokens.cc index 6b73c00a..30b2a20c 100644 --- a/src/compiler/prepare_grammar/extract_tokens.cc +++ b/src/compiler/prepare_grammar/extract_tokens.cc @@ -60,7 +60,7 @@ class SymbolInliner : public rules::IdentityRuleFn { return Symbol(new_index_for_symbol(rule), rule.options); } - SymbolInliner(const map &replacements) + explicit SymbolInliner(const map &replacements) : replacements(replacements) {} }; diff --git a/src/compiler/prepare_grammar/parse_regex.cc b/src/compiler/prepare_grammar/parse_regex.cc index 1c69a8e1..1d400ed7 100644 --- a/src/compiler/prepare_grammar/parse_regex.cc +++ b/src/compiler/prepare_grammar/parse_regex.cc @@ -22,7 +22,6 @@ using rules::Seq; using rules::Blank; using rules::Choice; using rules::Repeat; -using rules::CharacterRange; using rules::blank; class PatternParser { diff --git a/src/compiler/rules/character_range.cc b/src/compiler/rules/character_range.cc index bff44033..a2638559 100644 --- a/src/compiler/rules/character_range.cc +++ b/src/compiler/rules/character_range.cc @@ -8,8 +8,6 @@ namespace rules { using std::ostream; using std::string; -static const unsigned char MAX_CHAR = -1; - CharacterRange::CharacterRange(unsigned char value) : min(value), max(value) {} CharacterRange::CharacterRange(unsigned char min, unsigned char max) : min(min), max(max) {} @@ -31,23 +29,19 @@ bool CharacterRange::operator<(const CharacterRange &other) const { string escape_character(unsigned char input) { switch (input) { case '\0': - return ""; + return "\\0"; case '\n': return "\\n"; case '\r': return "\\r"; case '\t': return "\\t"; - case MAX_CHAR: - return ""; default: return string() + static_cast(input); } } string CharacterRange::to_string() const { - if (min == 0 && max == MAX_CHAR) - return ""; if (min == max) return escape_character(min); else diff --git a/src/compiler/rules/character_range.h b/src/compiler/rules/character_range.h index c11e6c75..ecb73eb0 100644 --- a/src/compiler/rules/character_range.h +++ b/src/compiler/rules/character_range.h @@ -8,33 +8,18 @@ namespace tree_sitter { namespace rules { struct CharacterRange { - unsigned char min; - unsigned char max; + uint32_t min; + uint32_t max; - // IMPLICIT_CONSTRUCTORS - CharacterRange(unsigned char value); - CharacterRange(unsigned char min, unsigned char max); + explicit CharacterRange(uint32_t value); + explicit CharacterRange(uint32_t min, uint32_t max); bool operator==(const CharacterRange &other) const; bool operator<(const CharacterRange &others) const; std::string to_string() const; }; -std::ostream &operator<<(std::ostream &stream, const CharacterRange &rule); - } // namespace rules } // namespace tree_sitter -namespace std { - -template <> -struct hash { - size_t operator()(const tree_sitter::rules::CharacterRange &range) const { - return (hash()(range.min) ^ - hash()(range.max)); - } -}; - -} // namespace std - #endif // COMPILER_RULES_CHARACTER_RANGE_H_ diff --git a/src/compiler/rules/character_set.cc b/src/compiler/rules/character_set.cc index 5b0fec35..175ca4fc 100644 --- a/src/compiler/rules/character_set.cc +++ b/src/compiler/rules/character_set.cc @@ -12,13 +12,14 @@ using std::hash; using std::set; using std::vector; -static void add_range(set *characters, CharacterRange range) { - for (uint32_t c = range.min; c <= range.max; c++) +static void add_range(set *characters, uint32_t min, uint32_t max) { + for (uint32_t c = min; c <= max; c++) characters->insert(c); } -static void remove_range(set *characters, CharacterRange range) { - for (uint32_t c = range.min; c <= range.max; c++) +static void remove_range(set *characters, uint32_t min, + uint32_t max) { + for (uint32_t c = min; c <= max; c++) characters->erase(c); } @@ -53,9 +54,9 @@ static vector consolidate_ranges(const set &chars) { if (last.min < last.max && last.max == (c - 1)) last.max = c; else - result.push_back(c); + result.push_back(CharacterRange(c)); } else { - result.push_back(c); + result.push_back(CharacterRange(c)); } } return result; @@ -124,17 +125,17 @@ CharacterSet &CharacterSet::include_all() { CharacterSet &CharacterSet::include(uint32_t min, uint32_t max) { if (includes_all) - remove_range(&excluded_chars, CharacterRange(min, max)); + remove_range(&excluded_chars, min, max); else - add_range(&included_chars, CharacterRange(min, max)); + add_range(&included_chars, min, max); return *this; } CharacterSet &CharacterSet::exclude(uint32_t min, uint32_t max) { if (includes_all) - add_range(&excluded_chars, CharacterRange(min, max)); + add_range(&excluded_chars, min, max); else - remove_range(&included_chars, CharacterRange(min, max)); + remove_range(&included_chars, min, max); return *this; } diff --git a/src/compiler/rules/character_set.h b/src/compiler/rules/character_set.h index 9fed4aea..0e19173b 100644 --- a/src/compiler/rules/character_set.h +++ b/src/compiler/rules/character_set.h @@ -1,8 +1,8 @@ #ifndef COMPILER_RULES_CHARACTER_SET_H_ #define COMPILER_RULES_CHARACTER_SET_H_ -#include #include +#include #include #include #include diff --git a/src/compiler/util/string_helpers.cc b/src/compiler/util/string_helpers.cc index 26603666..0c6a487a 100644 --- a/src/compiler/util/string_helpers.cc +++ b/src/compiler/util/string_helpers.cc @@ -1,6 +1,5 @@ #include "compiler/util/string_helpers.h" #include -#include namespace tree_sitter { namespace util { @@ -46,7 +45,7 @@ string escape_char(char character) { return string("'") + character + "'"; } else { char buffer[5]; - sprintf(buffer, "%d", int(character)); + snprintf(buffer, sizeof(buffer), "%d", static_cast(character)); return string(buffer); } }