Implement character set difference
This commit is contained in:
parent
b94fa3ed35
commit
df3397f02c
7 changed files with 200 additions and 139 deletions
|
|
@ -30,33 +30,63 @@ describe("character sets", []() {
|
|||
describe("computing unions", []() {
|
||||
it("works for disjoint sets", []() {
|
||||
CharacterSet set({ {'a', 'z'} }, true);
|
||||
set.union_with(CharacterSet({ {'A', 'Z'} }, true));
|
||||
set.add_set(CharacterSet({ {'A', 'Z'} }, true));
|
||||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'}, {'A', 'Z'}, })));
|
||||
});
|
||||
|
||||
it("works for sets with adjacent ranges", []() {
|
||||
CharacterSet set({ {'a', 'r'} }, true);
|
||||
set.union_with(CharacterSet({ {'s', 'z'} }, true));
|
||||
set.add_set(CharacterSet({ {'s', 'z'} }, true));
|
||||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
|
||||
|
||||
set = CharacterSet({ 'c' });
|
||||
auto c = set.complement();
|
||||
set.union_with(c);
|
||||
set.add_set(c);
|
||||
AssertThat(set, Equals(CharacterSet({ {0, -1} }, true)));
|
||||
});
|
||||
|
||||
it("works when the result becomes a continuous range", []() {
|
||||
CharacterSet set({ {'a', 'd'}, {'f', 'z'} }, true);
|
||||
set.union_with(CharacterSet({ {'c', 'g'} }, true));
|
||||
set.add_set(CharacterSet({ {'c', 'g'} }, true));
|
||||
AssertThat(set, Equals(CharacterSet({ {'a', 'z'} }, true)));
|
||||
});
|
||||
|
||||
it("does nothing for the set of all characters", []() {
|
||||
CharacterSet set({ 'a' });
|
||||
set.union_with(set.complement());
|
||||
set.add_set(set.complement());
|
||||
AssertThat(set, Equals(CharacterSet({ {'\0', '\xff'} }, true)));
|
||||
});
|
||||
});
|
||||
|
||||
describe("computing differences", []() {
|
||||
it("works for disjoint sets", []() {
|
||||
CharacterSet set1({ {'a','z'} }, true);
|
||||
set1.remove_set(CharacterSet({ {'A','Z'} }, true));
|
||||
AssertThat(set1, Equals(CharacterSet({ {'a', 'z'} }, true)));
|
||||
});
|
||||
|
||||
it("works when one set spans the other", []() {
|
||||
CharacterSet set1({ {'a','z'} }, true);
|
||||
set1.remove_set(CharacterSet({ {'d','s'} }, true));
|
||||
AssertThat(set1, Equals(CharacterSet({ {'a', 'c'}, {'t', 'z'} })));
|
||||
});
|
||||
|
||||
it("works for sets that overlap", []() {
|
||||
CharacterSet set1({ {'a','s'} }, true);
|
||||
set1.remove_set(CharacterSet({ {'m','z'} }, true));
|
||||
AssertThat(set1, Equals(CharacterSet({ {'a', 'l'} }, true)));
|
||||
|
||||
CharacterSet set2({ {'m','z'} }, true);
|
||||
set2.remove_set(CharacterSet({ {'a','s'} }, true));
|
||||
AssertThat(set2, Equals(CharacterSet({ {'t', 'z'} }, true)));
|
||||
});
|
||||
|
||||
it("works for sets with multiple ranges", []() {
|
||||
CharacterSet set1({ {'a','d'}, {'m', 'z'} });
|
||||
set1.remove_set(CharacterSet({ {'c','o'}, {'s','x'} }));
|
||||
AssertThat(set1, Equals(CharacterSet({ {'a', 'b'}, {'p','r'}, {'y','z'} })));
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
END_TEST
|
||||
56
spec/fixtures/parsers/arithmetic.c
vendored
56
spec/fixtures/parsers/arithmetic.c
vendored
|
|
@ -5,12 +5,12 @@ enum ts_symbol {
|
|||
ts_symbol_factor,
|
||||
ts_aux_token1,
|
||||
ts_symbol_plus,
|
||||
ts_aux_token2,
|
||||
ts_symbol_number,
|
||||
ts_symbol_times,
|
||||
ts_symbol_expression,
|
||||
ts_symbol_variable,
|
||||
ts_aux_token2,
|
||||
ts_symbol_term,
|
||||
ts_symbol_variable,
|
||||
ts_symbol_expression,
|
||||
ts_symbol___END__,
|
||||
};
|
||||
|
||||
|
|
@ -18,12 +18,12 @@ static const char *ts_symbol_names[] = {
|
|||
"factor",
|
||||
"token1",
|
||||
"plus",
|
||||
"token2",
|
||||
"number",
|
||||
"times",
|
||||
"expression",
|
||||
"variable",
|
||||
"token2",
|
||||
"term",
|
||||
"variable",
|
||||
"expression",
|
||||
"__END__",
|
||||
};
|
||||
|
||||
|
|
@ -73,11 +73,11 @@ static void ts_lex(TSParser *parser) {
|
|||
ADVANCE(8);
|
||||
LEX_ERROR(2, EXPECT({")", "+"}));
|
||||
case 10:
|
||||
if ((LOOKAHEAD_CHAR() == '('))
|
||||
ADVANCE(12);
|
||||
if (('A' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'Z') ||
|
||||
('a' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= 'z'))
|
||||
ADVANCE(13);
|
||||
if ((LOOKAHEAD_CHAR() == '('))
|
||||
ADVANCE(12);
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(4, EXPECT({"(", "0-9", "A-Z", "a-z"}));
|
||||
|
|
@ -124,10 +124,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
SHIFT(42);
|
||||
case ts_symbol_number:
|
||||
SHIFT(41);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(41);
|
||||
case ts_symbol_term:
|
||||
SHIFT(2);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(41);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(1);
|
||||
default:
|
||||
|
|
@ -190,12 +190,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(32);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(32);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -252,12 +252,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(23);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(23);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -282,12 +282,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(14);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(14);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -356,12 +356,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(20);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(20);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -452,12 +452,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(29);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(29);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -544,12 +544,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(38);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(38);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -598,12 +598,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(43);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(43);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
@ -634,10 +634,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 45:
|
||||
SET_LEX_STATE(15);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_plus:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol___END__:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol_plus:
|
||||
REDUCE(ts_symbol_term, 1, COLLAPSE({0}));
|
||||
case ts_symbol_times:
|
||||
SHIFT(46);
|
||||
default:
|
||||
|
|
@ -672,12 +672,12 @@ static TSParseResult ts_parse(const char *input) {
|
|||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_factor:
|
||||
SHIFT(16);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(49);
|
||||
case ts_aux_token1:
|
||||
SHIFT(13);
|
||||
case ts_symbol_number:
|
||||
SHIFT(12);
|
||||
case ts_symbol_expression:
|
||||
SHIFT(49);
|
||||
case ts_symbol_variable:
|
||||
SHIFT(12);
|
||||
case ts_symbol_term:
|
||||
|
|
|
|||
144
spec/fixtures/parsers/json.c
vendored
144
spec/fixtures/parsers/json.c
vendored
|
|
@ -4,36 +4,36 @@
|
|||
enum ts_symbol {
|
||||
ts_aux_token6,
|
||||
ts_symbol_number,
|
||||
ts_symbol_string,
|
||||
ts_symbol_array,
|
||||
ts_symbol_value,
|
||||
ts_aux_token5,
|
||||
ts_aux_repeat_helper1,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token4,
|
||||
ts_aux_token3,
|
||||
ts_aux_token1,
|
||||
ts_symbol_object,
|
||||
ts_aux_token2,
|
||||
ts_aux_token5,
|
||||
ts_aux_token7,
|
||||
ts_aux_token4,
|
||||
ts_aux_repeat_helper2,
|
||||
ts_aux_token1,
|
||||
ts_aux_token3,
|
||||
ts_symbol_value,
|
||||
ts_symbol_string,
|
||||
ts_aux_token2,
|
||||
ts_symbol_array,
|
||||
ts_aux_repeat_helper1,
|
||||
ts_symbol___END__,
|
||||
};
|
||||
|
||||
static const char *ts_symbol_names[] = {
|
||||
"token6",
|
||||
"number",
|
||||
"string",
|
||||
"array",
|
||||
"value",
|
||||
"token5",
|
||||
"repeat_helper1",
|
||||
"repeat_helper2",
|
||||
"token4",
|
||||
"token3",
|
||||
"token1",
|
||||
"object",
|
||||
"token2",
|
||||
"token5",
|
||||
"token7",
|
||||
"token4",
|
||||
"repeat_helper2",
|
||||
"token1",
|
||||
"token3",
|
||||
"value",
|
||||
"string",
|
||||
"token2",
|
||||
"array",
|
||||
"repeat_helper1",
|
||||
"__END__",
|
||||
};
|
||||
|
||||
|
|
@ -79,10 +79,10 @@ static void ts_lex(TSParser *parser) {
|
|||
case 10:
|
||||
if ((LOOKAHEAD_CHAR() == '{'))
|
||||
ADVANCE(16);
|
||||
if ((LOOKAHEAD_CHAR() == '\"'))
|
||||
ADVANCE(12);
|
||||
if ((LOOKAHEAD_CHAR() == '['))
|
||||
ADVANCE(15);
|
||||
if ((LOOKAHEAD_CHAR() == '\"'))
|
||||
ADVANCE(12);
|
||||
if (('0' <= LOOKAHEAD_CHAR() && LOOKAHEAD_CHAR() <= '9'))
|
||||
ADVANCE(11);
|
||||
LEX_ERROR(4, EXPECT({"\"", "0-9", "[", "{"}));
|
||||
|
|
@ -128,14 +128,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 0:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(53);
|
||||
case ts_symbol_string:
|
||||
SHIFT(53);
|
||||
case ts_symbol_array:
|
||||
SHIFT(53);
|
||||
case ts_symbol_object:
|
||||
SHIFT(53);
|
||||
case ts_symbol_number:
|
||||
SHIFT(53);
|
||||
case ts_aux_token5:
|
||||
SHIFT(47);
|
||||
case ts_aux_token1:
|
||||
|
|
@ -156,14 +156,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 2:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
|
|
@ -176,14 +176,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 3:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
|
|
@ -226,18 +226,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 7:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(43);
|
||||
case ts_symbol_string:
|
||||
SHIFT(43);
|
||||
case ts_aux_token5:
|
||||
SHIFT(35);
|
||||
case ts_symbol_object:
|
||||
SHIFT(43);
|
||||
case ts_symbol_array:
|
||||
SHIFT(43);
|
||||
case ts_symbol_value:
|
||||
SHIFT(41);
|
||||
case ts_symbol_object:
|
||||
SHIFT(43);
|
||||
case ts_symbol_number:
|
||||
SHIFT(43);
|
||||
case ts_aux_token5:
|
||||
SHIFT(35);
|
||||
case ts_aux_token1:
|
||||
SHIFT(8);
|
||||
default:
|
||||
|
|
@ -246,14 +246,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 8:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
|
|
@ -312,18 +312,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 14:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(15);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -378,18 +378,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 20:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(34);
|
||||
case ts_symbol_string:
|
||||
SHIFT(34);
|
||||
case ts_aux_token5:
|
||||
SHIFT(26);
|
||||
case ts_symbol_object:
|
||||
SHIFT(34);
|
||||
case ts_symbol_array:
|
||||
SHIFT(34);
|
||||
case ts_symbol_value:
|
||||
SHIFT(32);
|
||||
case ts_symbol_object:
|
||||
SHIFT(34);
|
||||
case ts_symbol_number:
|
||||
SHIFT(34);
|
||||
case ts_aux_token5:
|
||||
SHIFT(26);
|
||||
case ts_aux_token1:
|
||||
SHIFT(21);
|
||||
default:
|
||||
|
|
@ -398,14 +398,14 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 21:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_value:
|
||||
|
|
@ -474,18 +474,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 28:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(29);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -524,10 +524,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 32:
|
||||
SET_LEX_STATE(9);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token7:
|
||||
REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(18);
|
||||
case ts_aux_token7:
|
||||
REDUCE(ts_aux_repeat_helper1, 4, COLLAPSE({1, 0, 1, 0}));
|
||||
case ts_aux_repeat_helper1:
|
||||
SHIFT(33);
|
||||
default:
|
||||
|
|
@ -570,18 +570,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 37:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(38);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
@ -620,10 +620,10 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 41:
|
||||
SET_LEX_STATE(6);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_aux_token2:
|
||||
SHIFT(7);
|
||||
case ts_aux_token4:
|
||||
REDUCE(ts_aux_repeat_helper2, 2, COLLAPSE({1, 0}));
|
||||
case ts_aux_token2:
|
||||
SHIFT(7);
|
||||
case ts_aux_repeat_helper2:
|
||||
SHIFT(42);
|
||||
default:
|
||||
|
|
@ -694,18 +694,18 @@ static TSParseResult ts_parse(const char *input) {
|
|||
case 49:
|
||||
SET_LEX_STATE(10);
|
||||
switch (LOOKAHEAD_SYM()) {
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_symbol_string:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_array:
|
||||
SHIFT(25);
|
||||
case ts_symbol_value:
|
||||
SHIFT(50);
|
||||
case ts_symbol_object:
|
||||
SHIFT(25);
|
||||
case ts_symbol_number:
|
||||
SHIFT(25);
|
||||
case ts_aux_token5:
|
||||
SHIFT(12);
|
||||
case ts_aux_token1:
|
||||
SHIFT(3);
|
||||
default:
|
||||
|
|
|
|||
|
|
@ -162,14 +162,14 @@ namespace tree_sitter {
|
|||
string lex_error_call(const unordered_set<rules::CharacterSet> &expected_inputs) {
|
||||
rules::CharacterSet expected_set;
|
||||
for (auto &rule : expected_inputs)
|
||||
expected_set.union_with(rule);
|
||||
expected_set.add_set(rule);
|
||||
|
||||
string result = "LEX_ERROR(" + to_string(expected_set.ranges.size()) + ", EXPECT({";
|
||||
bool started = false;
|
||||
for (auto &ranges : expected_set.ranges) {
|
||||
for (auto &range : expected_set.ranges) {
|
||||
if (started) result += ", ";
|
||||
started = true;
|
||||
result += "\"" + escape_string(ranges.to_string()) + "\"";
|
||||
result += "\"" + escape_string(range.to_string()) + "\"";
|
||||
}
|
||||
result += "}));";
|
||||
return result;
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
using std::string;
|
||||
using std::hash;
|
||||
using std::set;
|
||||
using std::pair;
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace rules {
|
||||
|
|
@ -36,23 +37,12 @@ namespace tree_sitter {
|
|||
}
|
||||
}
|
||||
|
||||
int CharacterRange::max_int() const {
|
||||
return max == MAX_CHAR ? 255 : (int)max;
|
||||
int max_int(const CharacterRange &range) {
|
||||
return range.max == MAX_CHAR ? 255 : (int)range.max;
|
||||
}
|
||||
|
||||
int CharacterRange::min_int() const {
|
||||
return (int)min;
|
||||
}
|
||||
|
||||
bool CharacterRange::is_adjacent(const CharacterRange &other) const {
|
||||
return
|
||||
(min_int() <= other.min_int() && max_int() >= (other.min_int() - 1)) ||
|
||||
(min_int() <= (other.max_int() + 1) && max_int() >= other.max_int());
|
||||
}
|
||||
|
||||
void CharacterRange::add_range(const CharacterRange &other) {
|
||||
if (other.min < min) min = other.min;
|
||||
if (other.max_int() > max_int()) max = other.max;
|
||||
int min_int(const CharacterRange &range) {
|
||||
return (int)range.min;
|
||||
}
|
||||
|
||||
string CharacterRange::to_string() const {
|
||||
|
|
@ -101,7 +91,7 @@ namespace tree_sitter {
|
|||
result.insert(CharacterRange(current_char, MAX_CHAR));
|
||||
return CharacterSet(result);
|
||||
}
|
||||
|
||||
|
||||
std::pair<CharacterSet, bool> CharacterSet::most_compact_representation() const {
|
||||
auto first_range = *ranges.begin();
|
||||
if (first_range.min == 0 && first_range.max > 0) {
|
||||
|
|
@ -113,10 +103,26 @@ namespace tree_sitter {
|
|||
|
||||
void add_range(CharacterSet *self, CharacterRange new_range) {
|
||||
set<CharacterRange> new_ranges;
|
||||
|
||||
for (auto range : self->ranges) {
|
||||
if (range.is_adjacent(new_range)) {
|
||||
new_range.add_range(range);
|
||||
} else {
|
||||
auto new_min = min_int(new_range);
|
||||
auto new_max = max_int(new_range);
|
||||
bool is_adjacent = false;
|
||||
|
||||
if (min_int(range) < new_min) {
|
||||
if (max_int(range) >= new_min - 1) {
|
||||
is_adjacent = true;
|
||||
new_range.min = range.min;
|
||||
}
|
||||
}
|
||||
if (max_int(range) > new_max) {
|
||||
if (min_int(range) <= new_max + 1) {
|
||||
is_adjacent = true;
|
||||
new_range.max = range.max;
|
||||
}
|
||||
}
|
||||
|
||||
if (!is_adjacent) {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
|
|
@ -124,12 +130,44 @@ namespace tree_sitter {
|
|||
self->ranges = new_ranges;
|
||||
}
|
||||
|
||||
void CharacterSet::union_with(const CharacterSet &other) {
|
||||
void remove_range(CharacterSet *self, CharacterRange new_range) {
|
||||
set<CharacterRange> new_ranges;
|
||||
auto new_min = min_int(new_range);
|
||||
auto new_max = max_int(new_range);
|
||||
|
||||
for (auto range : self->ranges) {
|
||||
if (new_min <= min_int(range)) {
|
||||
if (new_max < min_int(range)) {
|
||||
new_ranges.insert(range);
|
||||
} else if (new_max <= max_int(range)) {
|
||||
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
||||
}
|
||||
} else if (new_min <= max_int(range)) {
|
||||
if (new_max < max_int(range)) {
|
||||
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
||||
new_ranges.insert(CharacterRange(new_max + 1, range.max));
|
||||
} else {
|
||||
new_ranges.insert(CharacterRange(range.min, new_min - 1));
|
||||
}
|
||||
} else {
|
||||
new_ranges.insert(range);
|
||||
}
|
||||
}
|
||||
self->ranges = new_ranges;
|
||||
}
|
||||
|
||||
void CharacterSet::add_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
add_range(this, other_range);
|
||||
}
|
||||
}
|
||||
|
||||
void CharacterSet::remove_set(const CharacterSet &other) {
|
||||
for (auto &other_range : other.ranges) {
|
||||
remove_range(this, other_range);
|
||||
}
|
||||
}
|
||||
|
||||
void CharacterSet::accept(Visitor &visitor) const {
|
||||
visitor.visit(this);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,19 +9,10 @@ namespace tree_sitter {
|
|||
struct CharacterRange {
|
||||
char min;
|
||||
char max;
|
||||
|
||||
CharacterRange(char);
|
||||
CharacterRange(char, char);
|
||||
|
||||
int max_int() const;
|
||||
int min_int() const;
|
||||
|
||||
bool operator==(const CharacterRange &) const;
|
||||
bool operator<(const CharacterRange &) const;
|
||||
bool is_adjacent(const CharacterRange &) const;
|
||||
|
||||
void add_range(const CharacterRange &);
|
||||
|
||||
std::string to_string() const;
|
||||
};
|
||||
}
|
||||
|
|
@ -45,8 +36,10 @@ namespace tree_sitter {
|
|||
CharacterSet(const std::set<CharacterRange> &ranges, bool);
|
||||
|
||||
CharacterSet complement() const;
|
||||
void union_with(const CharacterSet &other);
|
||||
std::pair<CharacterSet, bool> most_compact_representation() const;
|
||||
|
||||
void add_set(const CharacterSet &other);
|
||||
void remove_set(const CharacterSet &other);
|
||||
|
||||
bool operator==(const Rule& other) const;
|
||||
size_t hash_code() const;
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ namespace tree_sitter {
|
|||
}
|
||||
CharacterSet result;
|
||||
while (has_more_input() && (peek() != ']'))
|
||||
result.union_with(single_char());
|
||||
result.add_set(single_char());
|
||||
return is_affirmative ? result : result.complement();
|
||||
}
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue