From 386b12486659dfc02d0c027c87a2c930cbb0b780 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 20 Dec 2015 15:26:35 -0800 Subject: [PATCH 1/7] Ensure that there are no duplicate lex states --- spec/fixtures/parsers/anonymous_tokens.c | 10 +- spec/fixtures/parsers/c.c | 643 +++++++++--------- src/compiler/build_tables/build_lex_table.cc | 17 +- .../build_tables/build_parse_table.cc | 52 +- .../build_tables/parse_conflict_manager.cc | 4 +- .../build_tables/parse_conflict_manager.h | 3 +- .../build_tables/remove_duplicate_states.h | 58 ++ src/compiler/lex_table.cc | 11 + src/compiler/lex_table.h | 5 +- src/compiler/parse_table.cc | 11 + src/compiler/parse_table.h | 5 +- 11 files changed, 418 insertions(+), 401 deletions(-) create mode 100644 src/compiler/build_tables/remove_duplicate_states.h diff --git a/spec/fixtures/parsers/anonymous_tokens.c b/spec/fixtures/parsers/anonymous_tokens.c index ba918aac..2c4c1958 100644 --- a/spec/fixtures/parsers/anonymous_tokens.c +++ b/spec/fixtures/parsers/anonymous_tokens.c @@ -102,20 +102,14 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == ' ')) ADVANCE(14); if (lookahead == '\n') - ADVANCE(15); + ADVANCE(2); if (lookahead == '\r') - ADVANCE(16); + ADVANCE(3); if (lookahead == '\"') ADVANCE(4); if ('0' <= lookahead && lookahead <= '9') ADVANCE(11); LEX_ERROR(); - case 15: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_LF); - case 16: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_CR); case ts_lex_state_error: START_TOKEN(); if (lookahead == 0) diff --git a/spec/fixtures/parsers/c.c b/spec/fixtures/parsers/c.c index 99933b0f..6311d521 100644 --- a/spec/fixtures/parsers/c.c +++ b/spec/fixtures/parsers/c.c @@ -1806,11 +1806,11 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == ' ')) ADVANCE(145); if (lookahead == '\n') - ADVANCE(157); + ADVANCE(153); if (lookahead == '/') ADVANCE(146); if (lookahead == '\\') - ADVANCE(154); + ADVANCE(151); if (!((lookahead == 0) || (lookahead == '\t') || (lookahead == '\n') || @@ -1818,7 +1818,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == ' ') || (lookahead == '/') || (lookahead == '\\'))) - ADVANCE(156); + ADVANCE(152); LEX_ERROR(); case 145: START_TOKEN(); @@ -1829,7 +1829,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '/') ADVANCE(146); if (lookahead == '\\') - ADVANCE(154); + ADVANCE(151); if (!((lookahead == 0) || (lookahead == '\t') || (lookahead == '\n') || @@ -1837,21 +1837,21 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == ' ') || (lookahead == '/') || (lookahead == '\\'))) - ADVANCE(156); + ADVANCE(152); ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); case 146: if (lookahead == '*') ADVANCE(147); if (lookahead == '/') - ADVANCE(152); + ADVANCE(150); if (lookahead == '\\') - ADVANCE(154); + ADVANCE(151); if (!((lookahead == 0) || (lookahead == '\n') || (lookahead == '*') || (lookahead == '/') || (lookahead == '\\'))) - ADVANCE(156); + ADVANCE(152); ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); case 147: if (lookahead == '\n') @@ -1859,7 +1859,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '*') ADVANCE(148); if (lookahead == '\\') - ADVANCE(150); + ADVANCE(149); if (!((lookahead == 0) || (lookahead == '\n') || (lookahead == '*') || @@ -1870,9 +1870,9 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '\n') ADVANCE(11); if (lookahead == '/') - ADVANCE(149); + ADVANCE(13); if (lookahead == '\\') - ADVANCE(150); + ADVANCE(149); if (!((lookahead == 0) || (lookahead == '\n') || (lookahead == '/') || @@ -1880,99 +1880,65 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ADVANCE(147); ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); case 149: - ACCEPT_TOKEN(sym_comment); - case 150: if (lookahead == '\n') - ADVANCE(151); + ADVANCE(147); if (lookahead == '*') ADVANCE(148); if (lookahead == '\\') - ADVANCE(150); + ADVANCE(149); if (!((lookahead == 0) || (lookahead == '\n') || (lookahead == '*') || (lookahead == '\\'))) ADVANCE(147); ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); - case 151: - if (lookahead == '\n') - ADVANCE(11); - if (lookahead == '*') - ADVANCE(148); + case 150: if (lookahead == '\\') ADVANCE(150); if (!((lookahead == 0) || (lookahead == '\n') || - (lookahead == '*') || (lookahead == '\\'))) - ADVANCE(147); + ADVANCE(150); + ACCEPT_TOKEN(sym_comment); + case 151: + if (lookahead == '\n') + ADVANCE(152); + if (lookahead == '\\') + ADVANCE(151); + if (!((lookahead == 0) || + (lookahead == '\n') || + (lookahead == '\\'))) + ADVANCE(152); ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); case 152: if (lookahead == '\\') - ADVANCE(153); + ADVANCE(151); if (!((lookahead == 0) || (lookahead == '\n') || (lookahead == '\\'))) ADVANCE(152); - ACCEPT_TOKEN(sym_comment); + ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); case 153: - if (lookahead == '\\') - ADVANCE(153); - if (!((lookahead == 0) || - (lookahead == '\n') || - (lookahead == '\\'))) - ADVANCE(152); - ACCEPT_TOKEN(sym_comment); - case 154: - if (lookahead == '\n') - ADVANCE(155); - if (lookahead == '\\') - ADVANCE(154); - if (!((lookahead == 0) || - (lookahead == '\n') || - (lookahead == '\\'))) - ADVANCE(156); - ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); - case 155: - if (lookahead == '\\') - ADVANCE(154); - if (!((lookahead == 0) || - (lookahead == '\n') || - (lookahead == '\\'))) - ADVANCE(156); - ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); - case 156: - if (lookahead == '\\') - ADVANCE(154); - if (!((lookahead == 0) || - (lookahead == '\n') || - (lookahead == '\\'))) - ADVANCE(156); - ACCEPT_FRAGILE_TOKEN(sym_preproc_arg); - case 157: START_TOKEN(); ACCEPT_TOKEN(anon_sym_LF); - case 158: + case 154: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(158); + ADVANCE(154); if (lookahead == '\n') - ADVANCE(159); + ADVANCE(153); if (lookahead == '/') ADVANCE(10); LEX_ERROR(); - case 159: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_LF); - case 160: + case 155: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(160); + ADVANCE(155); if (lookahead == ',') ADVANCE(127); if (lookahead == '/') @@ -1980,13 +1946,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == ';') ADVANCE(101); LEX_ERROR(); - case 161: + case 156: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(161); + ADVANCE(156); if (lookahead == '(') ADVANCE(97); if (lookahead == ',') @@ -2002,13 +1968,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '{') ADVANCE(105); LEX_ERROR(); - case 162: + case 157: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(162); + ADVANCE(157); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2035,7 +2001,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == 'e') ADVANCE(25); if (lookahead == 'f') - ADVANCE(163); + ADVANCE(158); if (lookahead == 'l') ADVANCE(31); if (lookahead == 'r') @@ -2053,7 +2019,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '}') ADVANCE(110); LEX_ERROR(); - case 163: + case 158: if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || @@ -2061,9 +2027,9 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ('p' <= lookahead && lookahead <= 'z')) ADVANCE(15); if (lookahead == 'o') - ADVANCE(164); + ADVANCE(159); ACCEPT_FRAGILE_TOKEN(sym_identifier); - case 164: + case 159: if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || @@ -2071,22 +2037,22 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ('s' <= lookahead && lookahead <= 'z')) ADVANCE(15); if (lookahead == 'r') - ADVANCE(165); + ADVANCE(160); ACCEPT_FRAGILE_TOKEN(sym_identifier); - case 165: + case 160: if (('0' <= lookahead && lookahead <= '9') || ('A' <= lookahead && lookahead <= 'Z') || (lookahead == '_') || ('a' <= lookahead && lookahead <= 'z')) ADVANCE(15); ACCEPT_TOKEN(anon_sym_for); - case 166: + case 161: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(166); + ADVANCE(161); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2105,13 +2071,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '{') ADVANCE(105); LEX_ERROR(); - case 167: + case 162: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(167); + ADVANCE(162); if (lookahead == '(') ADVANCE(97); if (lookahead == '*') @@ -2127,13 +2093,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '=') ADVANCE(131); LEX_ERROR(); - case 168: + case 163: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(168); + ADVANCE(163); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2143,7 +2109,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '*') ADVANCE(98); if (lookahead == '.') - ADVANCE(169); + ADVANCE(164); if (lookahead == '/') ADVANCE(10); if ('0' <= lookahead && lookahead <= '9') @@ -2156,8 +2122,103 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '{') ADVANCE(105); LEX_ERROR(); - case 169: + case 164: ACCEPT_TOKEN(anon_sym_DOT); + case 165: + START_TOKEN(); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(165); + if (lookahead == ',') + ADVANCE(127); + if (lookahead == '/') + ADVANCE(10); + if (lookahead == '}') + ADVANCE(110); + LEX_ERROR(); + case 166: + START_TOKEN(); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(166); + if (lookahead == '(') + ADVANCE(97); + if (lookahead == '*') + ADVANCE(98); + if (lookahead == '+') + ADVANCE(130); + if (lookahead == ',') + ADVANCE(127); + if (lookahead == '/') + ADVANCE(10); + if (lookahead == '=') + ADVANCE(131); + if (lookahead == '}') + ADVANCE(110); + LEX_ERROR(); + case 167: + START_TOKEN(); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(167); + if (lookahead == '/') + ADVANCE(10); + if (lookahead == '=') + ADVANCE(131); + LEX_ERROR(); + case 168: + START_TOKEN(); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(168); + if (lookahead == '.') + ADVANCE(164); + if (lookahead == '/') + ADVANCE(10); + if (lookahead == '=') + ADVANCE(131); + if (lookahead == '[') + ADVANCE(114); + LEX_ERROR(); + case 169: + START_TOKEN(); + if ((lookahead == '\t') || + (lookahead == '\n') || + (lookahead == '\r') || + (lookahead == ' ')) + ADVANCE(169); + if (lookahead == '\"') + ADVANCE(116); + if (lookahead == '&') + ADVANCE(120); + if (lookahead == '(') + ADVANCE(97); + if (lookahead == '*') + ADVANCE(98); + if (lookahead == '.') + ADVANCE(164); + if (lookahead == '/') + ADVANCE(10); + if ('0' <= lookahead && lookahead <= '9') + ADVANCE(121); + if (('A' <= lookahead && lookahead <= 'Z') || + ('a' <= lookahead && lookahead <= 'z')) + ADVANCE(15); + if (lookahead == '[') + ADVANCE(114); + if (lookahead == '{') + ADVANCE(105); + if (lookahead == '}') + ADVANCE(110); + LEX_ERROR(); case 170: START_TOKEN(); if ((lookahead == '\t') || @@ -2165,12 +2226,18 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == '\r') || (lookahead == ' ')) ADVANCE(170); + if (lookahead == '(') + ADVANCE(97); if (lookahead == ',') ADVANCE(127); if (lookahead == '/') ADVANCE(10); - if (lookahead == '}') - ADVANCE(110); + if (lookahead == ';') + ADVANCE(101); + if (lookahead == '=') + ADVANCE(131); + if (lookahead == '[') + ADVANCE(114); LEX_ERROR(); case 171: START_TOKEN(); @@ -2185,14 +2252,12 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ADVANCE(98); if (lookahead == '+') ADVANCE(130); - if (lookahead == ',') - ADVANCE(127); if (lookahead == '/') ADVANCE(10); + if (lookahead == ';') + ADVANCE(101); if (lookahead == '=') ADVANCE(131); - if (lookahead == '}') - ADVANCE(110); LEX_ERROR(); case 172: START_TOKEN(); @@ -2203,8 +2268,10 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ADVANCE(172); if (lookahead == '/') ADVANCE(10); - if (lookahead == '=') - ADVANCE(131); + if (lookahead == ';') + ADVANCE(101); + if (lookahead == '}') + ADVANCE(110); LEX_ERROR(); case 173: START_TOKEN(); @@ -2213,14 +2280,10 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == '\r') || (lookahead == ' ')) ADVANCE(173); - if (lookahead == '.') - ADVANCE(169); + if (lookahead == '(') + ADVANCE(97); if (lookahead == '/') ADVANCE(10); - if (lookahead == '=') - ADVANCE(131); - if (lookahead == '[') - ADVANCE(114); LEX_ERROR(); case 174: START_TOKEN(); @@ -2229,29 +2292,21 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == '\r') || (lookahead == ' ')) ADVANCE(174); - if (lookahead == '\"') - ADVANCE(116); - if (lookahead == '&') - ADVANCE(120); if (lookahead == '(') ADVANCE(97); if (lookahead == '*') ADVANCE(98); - if (lookahead == '.') - ADVANCE(169); + if (lookahead == '+') + ADVANCE(130); if (lookahead == '/') ADVANCE(10); - if ('0' <= lookahead && lookahead <= '9') - ADVANCE(121); + if (lookahead == ';') + ADVANCE(101); + if (lookahead == '=') + ADVANCE(131); if (('A' <= lookahead && lookahead <= 'Z') || ('a' <= lookahead && lookahead <= 'z')) ADVANCE(15); - if (lookahead == '[') - ADVANCE(114); - if (lookahead == '{') - ADVANCE(105); - if (lookahead == '}') - ADVANCE(110); LEX_ERROR(); case 175: START_TOKEN(); @@ -2260,95 +2315,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { (lookahead == '\r') || (lookahead == ' ')) ADVANCE(175); - if (lookahead == '(') - ADVANCE(97); - if (lookahead == ',') - ADVANCE(127); - if (lookahead == '/') - ADVANCE(10); - if (lookahead == ';') - ADVANCE(101); - if (lookahead == '=') - ADVANCE(131); - if (lookahead == '[') - ADVANCE(114); - LEX_ERROR(); - case 176: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(176); - if (lookahead == '(') - ADVANCE(97); - if (lookahead == '*') - ADVANCE(98); - if (lookahead == '+') - ADVANCE(130); - if (lookahead == '/') - ADVANCE(10); - if (lookahead == ';') - ADVANCE(101); - if (lookahead == '=') - ADVANCE(131); - LEX_ERROR(); - case 177: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(177); - if (lookahead == '/') - ADVANCE(10); - if (lookahead == ';') - ADVANCE(101); - if (lookahead == '}') - ADVANCE(110); - LEX_ERROR(); - case 178: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(178); - if (lookahead == '(') - ADVANCE(97); - if (lookahead == '/') - ADVANCE(10); - LEX_ERROR(); - case 179: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(179); - if (lookahead == '(') - ADVANCE(97); - if (lookahead == '*') - ADVANCE(98); - if (lookahead == '+') - ADVANCE(130); - if (lookahead == '/') - ADVANCE(10); - if (lookahead == ';') - ADVANCE(101); - if (lookahead == '=') - ADVANCE(131); - if (('A' <= lookahead && lookahead <= 'Z') || - ('a' <= lookahead && lookahead <= 'z')) - ADVANCE(15); - LEX_ERROR(); - case 180: - START_TOKEN(); - if ((lookahead == '\t') || - (lookahead == '\n') || - (lookahead == '\r') || - (lookahead == ' ')) - ADVANCE(180); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2389,13 +2355,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == 'v') ADVANCE(83); LEX_ERROR(); - case 181: + case 176: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(181); + ADVANCE(176); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2414,13 +2380,13 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ('a' <= lookahead && lookahead <= 'z')) ADVANCE(15); LEX_ERROR(); - case 182: + case 177: START_TOKEN(); if ((lookahead == '\t') || (lookahead == '\n') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(182); + ADVANCE(177); if (lookahead == '\"') ADVANCE(116); if (lookahead == '&') @@ -2438,20 +2404,20 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { ('g' <= lookahead && lookahead <= 'z')) ADVANCE(15); if (lookahead == 'f') - ADVANCE(163); + ADVANCE(158); if (lookahead == '{') ADVANCE(105); LEX_ERROR(); - case 183: + case 178: START_TOKEN(); if (lookahead == 0) ADVANCE(2); if ((lookahead == '\t') || (lookahead == '\r') || (lookahead == ' ')) - ADVANCE(183); + ADVANCE(178); if (lookahead == '\n') - ADVANCE(184); + ADVANCE(153); if (lookahead == '\"') ADVANCE(116); if (lookahead == '#') @@ -2469,7 +2435,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == ',') ADVANCE(127); if (lookahead == '.') - ADVANCE(169); + ADVANCE(164); if (lookahead == '/') ADVANCE(10); if ('0' <= lookahead && lookahead <= '9') @@ -2496,7 +2462,7 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == 'e') ADVANCE(25); if (lookahead == 'f') - ADVANCE(163); + ADVANCE(158); if (lookahead == 'l') ADVANCE(31); if (lookahead == 'r') @@ -2514,9 +2480,6 @@ static TSTree *ts_lex(TSLexer *lexer, TSStateId lex_state) { if (lookahead == '}') ADVANCE(110); LEX_ERROR(); - case 184: - START_TOKEN(); - ACCEPT_TOKEN(anon_sym_LF); case ts_lex_state_error: START_TOKEN(); if (lookahead == 0) @@ -2828,222 +2791,222 @@ static TSStateId ts_lex_states[STATE_COUNT] = { [230] = 99, [231] = 144, [232] = 1, - [233] = 158, + [233] = 154, [234] = 1, [235] = 1, [236] = 96, [237] = 92, [238] = 91, - [239] = 160, - [240] = 161, - [241] = 161, + [239] = 155, + [240] = 156, + [241] = 156, [242] = 96, [243] = 96, - [244] = 161, + [244] = 156, [245] = 115, [246] = 125, [247] = 126, - [248] = 161, + [248] = 156, [249] = 106, - [250] = 161, - [251] = 161, + [250] = 156, + [251] = 156, [252] = 129, - [253] = 161, - [254] = 161, + [253] = 156, + [254] = 156, [255] = 141, - [256] = 161, + [256] = 156, [257] = 1, [258] = 100, - [259] = 162, + [259] = 157, [260] = 96, [261] = 1, - [262] = 166, - [263] = 160, - [264] = 167, - [265] = 167, - [266] = 168, + [262] = 161, + [263] = 155, + [264] = 162, + [265] = 162, + [266] = 163, [267] = 132, [268] = 135, - [269] = 167, - [270] = 167, + [269] = 162, + [270] = 162, [271] = 136, [272] = 135, [273] = 135, - [274] = 167, - [275] = 167, + [274] = 162, + [275] = 162, [276] = 137, - [277] = 167, + [277] = 162, [278] = 106, - [279] = 167, - [280] = 167, + [279] = 162, + [280] = 162, [281] = 106, [282] = 140, - [283] = 167, + [283] = 162, [284] = 135, - [285] = 167, - [286] = 170, - [287] = 170, - [288] = 166, - [289] = 171, - [290] = 171, - [291] = 172, - [292] = 168, + [285] = 162, + [286] = 165, + [287] = 165, + [288] = 161, + [289] = 166, + [290] = 166, + [291] = 167, + [292] = 163, [293] = 135, [294] = 99, [295] = 132, [296] = 135, - [297] = 171, - [298] = 171, + [297] = 166, + [298] = 166, [299] = 136, [300] = 135, [301] = 135, - [302] = 171, - [303] = 171, + [302] = 166, + [303] = 166, [304] = 137, - [305] = 171, + [305] = 166, [306] = 106, - [307] = 171, - [308] = 171, + [307] = 166, + [308] = 166, [309] = 106, [310] = 140, - [311] = 171, + [311] = 166, [312] = 135, - [313] = 171, - [314] = 173, - [315] = 172, + [313] = 166, + [314] = 168, + [315] = 167, [316] = 129, - [317] = 173, - [318] = 172, - [319] = 170, - [320] = 170, - [321] = 174, - [322] = 170, - [323] = 166, - [324] = 170, - [325] = 170, - [326] = 166, + [317] = 168, + [318] = 167, + [319] = 165, + [320] = 165, + [321] = 169, + [322] = 165, + [323] = 161, + [324] = 165, + [325] = 165, + [326] = 161, [327] = 135, - [328] = 171, - [329] = 170, - [330] = 160, - [331] = 174, - [332] = 160, + [328] = 166, + [329] = 165, + [330] = 155, + [331] = 169, + [332] = 155, [333] = 135, - [334] = 167, - [335] = 160, - [336] = 175, - [337] = 175, + [334] = 162, + [335] = 155, + [336] = 170, + [337] = 170, [338] = 96, [339] = 96, - [340] = 175, + [340] = 170, [341] = 115, [342] = 125, [343] = 126, - [344] = 175, + [344] = 170, [345] = 106, - [346] = 175, - [347] = 175, + [346] = 170, + [347] = 170, [348] = 129, - [349] = 175, - [350] = 175, + [349] = 170, + [350] = 170, [351] = 141, - [352] = 175, + [352] = 170, [353] = 100, [354] = 92, [355] = 96, - [356] = 162, - [357] = 162, - [358] = 176, - [359] = 176, + [356] = 157, + [357] = 157, + [358] = 171, + [359] = 171, [360] = 112, - [361] = 177, - [362] = 162, + [361] = 172, + [362] = 157, [363] = 1, [364] = 132, [365] = 135, - [366] = 178, - [367] = 179, - [368] = 180, + [366] = 173, + [367] = 174, + [368] = 175, [369] = 92, [370] = 96, - [371] = 181, - [372] = 176, + [371] = 176, + [372] = 171, [373] = 100, - [374] = 176, + [374] = 171, [375] = 136, - [376] = 176, + [376] = 171, [377] = 137, - [378] = 182, - [379] = 162, + [378] = 177, + [379] = 157, [380] = 106, - [381] = 182, - [382] = 162, - [383] = 182, - [384] = 162, + [381] = 177, + [382] = 157, + [383] = 177, + [384] = 157, [385] = 136, [386] = 135, [387] = 136, [388] = 135, [389] = 135, - [390] = 176, - [391] = 176, + [390] = 171, + [391] = 171, [392] = 137, - [393] = 176, + [393] = 171, [394] = 106, - [395] = 176, - [396] = 176, - [397] = 176, + [395] = 171, + [396] = 171, + [397] = 171, [398] = 137, [399] = 106, - [400] = 182, - [401] = 162, - [402] = 181, - [403] = 181, - [404] = 176, + [400] = 177, + [401] = 157, + [402] = 176, + [403] = 176, + [404] = 171, [405] = 136, [406] = 137, [407] = 106, - [408] = 182, - [409] = 162, - [410] = 160, - [411] = 175, + [408] = 177, + [409] = 157, + [410] = 155, + [411] = 170, [412] = 100, - [413] = 181, - [414] = 181, + [413] = 176, + [414] = 176, [415] = 96, - [416] = 160, - [417] = 175, + [416] = 155, + [417] = 170, [418] = 100, - [419] = 181, - [420] = 176, + [419] = 176, + [420] = 171, [421] = 106, [422] = 140, - [423] = 176, + [423] = 171, [424] = 135, - [425] = 176, + [425] = 171, [426] = 112, - [427] = 177, - [428] = 162, - [429] = 162, - [430] = 162, + [427] = 172, + [428] = 157, + [429] = 157, + [430] = 157, [431] = 1, [432] = 112, [433] = 100, - [434] = 162, - [435] = 160, - [436] = 175, + [434] = 157, + [435] = 155, + [436] = 170, [437] = 100, - [438] = 162, - [439] = 162, + [438] = 157, + [439] = 157, [440] = 96, - [441] = 160, - [442] = 175, + [441] = 155, + [442] = 170, [443] = 100, - [444] = 162, + [444] = 157, [445] = 1, [446] = 96, - [447] = 160, - [448] = 161, + [447] = 155, + [448] = 156, [449] = 1, [450] = 100, [451] = 1, diff --git a/src/compiler/build_tables/build_lex_table.cc b/src/compiler/build_tables/build_lex_table.cc index c3fc22b3..797d2439 100644 --- a/src/compiler/build_tables/build_lex_table.cc +++ b/src/compiler/build_tables/build_lex_table.cc @@ -7,6 +7,7 @@ #include #include #include "compiler/build_tables/lex_conflict_manager.h" +#include "compiler/build_tables/remove_duplicate_states.h" #include "compiler/build_tables/lex_item.h" #include "compiler/build_tables/does_match_any_line.h" #include "compiler/parse_table.h" @@ -40,8 +41,7 @@ class LexTableBuilder { public: LexTableBuilder(ParseTable *parse_table, const LexicalGrammar &lex_grammar) - : lex_grammar(lex_grammar), - parse_table(parse_table) { + : lex_grammar(lex_grammar), parse_table(parse_table) { for (const rule_ptr &rule : lex_grammar.separators) separator_rules.push_back(rules::Repeat::build(rule)); separator_rules.push_back(rules::Blank::build()); @@ -59,6 +59,7 @@ class LexTableBuilder { populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID); mark_fragile_tokens(); + remove_duplicate_lex_states(); return lex_table; } @@ -161,6 +162,18 @@ class LexTableBuilder { if (conflict_manager.fragile_tokens.count(state.default_action.symbol)) state.default_action.type = LexActionTypeAcceptFragile; } + + void remove_duplicate_lex_states() { + auto replacements = + remove_duplicate_states( + &lex_table.states); + + for (ParseState &parse_state : parse_table->states) { + auto replacement = replacements.find(parse_state.lex_state_id); + if (replacement != replacements.end()) + parse_state.lex_state_id = replacement->second; + } + } }; LexTable build_lex_table(ParseTable *table, const LexicalGrammar &grammar) { diff --git a/src/compiler/build_tables/build_parse_table.cc b/src/compiler/build_tables/build_parse_table.cc index 85ee9c5a..2eec11f1 100644 --- a/src/compiler/build_tables/build_parse_table.cc +++ b/src/compiler/build_tables/build_parse_table.cc @@ -7,6 +7,7 @@ #include #include "compiler/parse_table.h" #include "compiler/build_tables/parse_conflict_manager.h" +#include "compiler/build_tables/remove_duplicate_states.h" #include "compiler/build_tables/parse_item.h" #include "compiler/build_tables/item_set_closure.h" #include "compiler/lexical_grammar.h" @@ -42,8 +43,7 @@ class ParseTableBuilder { public: ParseTableBuilder(const SyntaxGrammar &grammar, const LexicalGrammar &lex_grammar) - : grammar(grammar), - lexical_grammar(lex_grammar) {} + : grammar(grammar), lexical_grammar(lex_grammar) {} pair build() { Symbol start_symbol = Symbol(0, grammar.variables.empty()); @@ -79,7 +79,7 @@ class ParseTableBuilder { } mark_fragile_actions(); - remove_duplicate_states(); + remove_duplicate_parse_states(); parse_table.symbols.insert({ rules::ERROR(), {} }); @@ -192,49 +192,9 @@ class ParseTableBuilder { } } - void remove_duplicate_states() { - bool done = false; - while (!done) { - done = true; - - map replacements; - for (size_t i = 0, size = parse_table.states.size(); i < size; i++) { - for (size_t j = 0; j < i; j++) { - if (parse_table.states[i].actions == parse_table.states[j].actions) { - replacements.insert({ i, j }); - done = false; - break; - } - } - } - - for (ParseState &state : parse_table.states) { - for (auto &entry : state.actions) { - for (ParseAction &action : entry.second) { - if (action.type == ParseActionTypeShift) { - ParseStateId state_index = action.state_index; - auto replacement = replacements.find(action.state_index); - if (replacement != replacements.end()) { - state_index = replacement->second; - } - - size_t prior_removed = 0; - for (const auto &replacement : replacements) { - if (replacement.first >= state_index) - break; - prior_removed++; - } - - state_index -= prior_removed; - action.state_index = state_index; - } - } - } - } - - for (auto i = replacements.rbegin(); i != replacements.rend(); ++i) - parse_table.states.erase(parse_table.states.begin() + i->first); - } + void remove_duplicate_parse_states() { + remove_duplicate_states( + &parse_table.states); } ParseAction *add_action(ParseStateId state_id, Symbol lookahead, diff --git a/src/compiler/build_tables/parse_conflict_manager.cc b/src/compiler/build_tables/parse_conflict_manager.cc index 52caf305..ca612926 100644 --- a/src/compiler/build_tables/parse_conflict_manager.cc +++ b/src/compiler/build_tables/parse_conflict_manager.cc @@ -22,9 +22,9 @@ pair ParseConflictManager::resolve( case ParseActionTypeShift: if (new_action.extra) { - return {false, ConflictTypeNone}; + return { false, ConflictTypeNone }; } else if (old_action.extra) { - return {true, ConflictTypeNone}; + return { true, ConflictTypeNone }; } else if (new_action.type == ParseActionTypeReduce) { int min_precedence = old_action.precedence_range.min; int max_precedence = old_action.precedence_range.max; diff --git a/src/compiler/build_tables/parse_conflict_manager.h b/src/compiler/build_tables/parse_conflict_manager.h index a38066cf..b4927373 100644 --- a/src/compiler/build_tables/parse_conflict_manager.h +++ b/src/compiler/build_tables/parse_conflict_manager.h @@ -23,7 +23,8 @@ enum ConflictType { class ParseConflictManager { public: - std::pair resolve(const ParseAction &, const ParseAction &) const; + std::pair resolve(const ParseAction &, + const ParseAction &) const; }; } // namespace build_tables diff --git a/src/compiler/build_tables/remove_duplicate_states.h b/src/compiler/build_tables/remove_duplicate_states.h new file mode 100644 index 00000000..35b2d822 --- /dev/null +++ b/src/compiler/build_tables/remove_duplicate_states.h @@ -0,0 +1,58 @@ +#ifndef COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_ +#define COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_ + +#include +#include + +namespace tree_sitter { +namespace build_tables { + +template +std::map remove_duplicate_states( + std::vector *states) { + std::map replacements; + + while (true) { + std::map duplicates; + for (size_t i = 0, size = states->size(); i < size; i++) + for (size_t j = 0; j < i; j++) + if (states->at(i) == states->at(j)) { + duplicates.insert({ i, j }); + break; + } + + if (duplicates.empty()) + break; + + for (StateType &state : *states) + state.each_action([&duplicates, &replacements](ActionType *action) { + if (action->type == advance_action) { + size_t state_index = action->state_index; + auto replacement = duplicates.find(action->state_index); + if (replacement != duplicates.end()) + state_index = replacement->second; + + size_t prior_removed = 0; + for (const auto &replacement : duplicates) { + if (replacement.first >= state_index) + break; + prior_removed++; + } + + state_index -= prior_removed; + replacements.insert({ action->state_index, state_index }); + action->state_index = state_index; + } + }); + + for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) + states->erase(states->begin() + i->first); + } + + return replacements; +} + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_REMOVE_DUPLICATE_STATES_H_ diff --git a/src/compiler/lex_table.cc b/src/compiler/lex_table.cc index e74233fc..31a383b5 100644 --- a/src/compiler/lex_table.cc +++ b/src/compiler/lex_table.cc @@ -3,6 +3,7 @@ namespace tree_sitter { +using std::function; using std::string; using std::to_string; using std::map; @@ -53,6 +54,16 @@ set LexState::expected_inputs() const { return result; } +bool LexState::operator==(const LexState &other) const { + return actions == other.actions && default_action == other.default_action && + is_token_start == other.is_token_start; +} + +void LexState::each_action(function fn) { + for (auto &entry : actions) + fn(&entry.second); +} + LexStateId LexTable::add_state() { states.push_back(LexState()); return states.size() - 1; diff --git a/src/compiler/lex_table.h b/src/compiler/lex_table.h index e96be64b..40db82af 100644 --- a/src/compiler/lex_table.h +++ b/src/compiler/lex_table.h @@ -56,9 +56,12 @@ namespace tree_sitter { class LexState { public: LexState(); + std::set expected_inputs() const; + bool operator==(const LexState &) const; + void each_action(std::function); + std::map actions; LexAction default_action; - std::set expected_inputs() const; bool is_token_start; }; diff --git a/src/compiler/parse_table.cc b/src/compiler/parse_table.cc index 847354c8..843b4393 100644 --- a/src/compiler/parse_table.cc +++ b/src/compiler/parse_table.cc @@ -9,6 +9,7 @@ using std::ostream; using std::to_string; using std::set; using std::vector; +using std::function; using rules::Symbol; ParseAction::ParseAction(ParseActionType type, ParseStateId state_index, @@ -126,6 +127,16 @@ set ParseState::expected_inputs() const { return result; } +void ParseState::each_action(function fn) { + for (auto &entry : actions) + for (ParseAction &action : entry.second) + fn(&action); +} + +bool ParseState::operator==(const ParseState &other) const { + return actions == other.actions; +} + set ParseTable::all_symbols() const { set result; for (auto &pair : symbols) diff --git a/src/compiler/parse_table.h b/src/compiler/parse_table.h index babfbe9e..df61a8a6 100644 --- a/src/compiler/parse_table.h +++ b/src/compiler/parse_table.h @@ -79,8 +79,11 @@ namespace tree_sitter { class ParseState { public: ParseState(); - std::map> actions; std::set expected_inputs() const; + bool operator==(const ParseState &) const; + void each_action(std::function); + + std::map> actions; LexStateId lex_state_id; }; From 2bcd2e4d004782da6c0f154f708c94329471799a Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Mon, 21 Dec 2015 16:04:11 -0800 Subject: [PATCH 2/7] Reuse fragile tokens that came from the current lex state --- include/tree_sitter/parser.h | 1 + src/runtime/lexer.c | 5 ++- src/runtime/parser.c | 72 +++++++++++++++++++++--------------- src/runtime/tree.c | 8 ++-- src/runtime/tree.h | 3 ++ 5 files changed, 54 insertions(+), 35 deletions(-) diff --git a/include/tree_sitter/parser.h b/include/tree_sitter/parser.h index e1da810c..9b29cf60 100644 --- a/include/tree_sitter/parser.h +++ b/include/tree_sitter/parser.h @@ -47,6 +47,7 @@ typedef struct TSLexer { size_t lookahead_size; int32_t lookahead; + TSStateId starting_state; TSInput input; TSDebugger debugger; diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index d2b69fc0..d859223b 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -45,6 +45,7 @@ static void ts_lexer__start(TSLexer *self, TSStateId lex_state) { LOG("start_lex state:%d, pos:%lu", lex_state, self->current_position.chars); LOG_LOOKAHEAD(); + self->starting_state = lex_state; if (!self->chunk) ts_lexer__get_chunk(self); if (!self->lookahead_size) @@ -101,7 +102,9 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, result = ts_tree_make_leaf(symbol, padding, size, metadata); } - result->options.fragile_left = fragile; + if (fragile) + result->context.lex_state = self->starting_state; + return result; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 494aada0..4617eb18 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -84,19 +84,26 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { } while (last_child->child_count > 0); } +static void ts_parser__pop_reusable_subtree(LookaheadState *state); + /* * Replace the parser's reusable_subtree with its first non-fragile descendant. * Return true if a suitable descendant is found, false otherwise. */ -static bool ts_parser__breakdown_reusable_subtree(LookaheadState *state) { +static void ts_parser__breakdown_reusable_subtree(LookaheadState *state) { do { - if (state->reusable_subtree->symbol == ts_builtin_sym_error) - return false; - if (state->reusable_subtree->child_count == 0) - return false; + if (state->reusable_subtree->symbol == ts_builtin_sym_error) { + ts_parser__pop_reusable_subtree(state); + return; + } + + if (state->reusable_subtree->child_count == 0) { + ts_parser__pop_reusable_subtree(state); + return; + } + state->reusable_subtree = state->reusable_subtree->children[0]; } while (ts_tree_is_fragile(state->reusable_subtree)); - return true; } /* @@ -118,13 +125,30 @@ static void ts_parser__pop_reusable_subtree(LookaheadState *state) { } static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { - if (!subtree || subtree->symbol == ts_builtin_sym_error || - ts_tree_is_fragile(subtree)) + if (!subtree) return false; + if (subtree->symbol == ts_builtin_sym_error) + return false; + if (ts_tree_is_fragile(subtree)) + return false; + TSStateId state = ts_stack_top_state(self->stack, head); + + if (subtree->context.lex_state != TSTREE_LEX_STATE_INDEPENDENT) { + TSStateId lex_state = self->language->lex_states[state]; + if (subtree->context.lex_state != lex_state) + return false; + } + const TSParseAction *action = ts_language_actions(self->language, state, subtree->symbol); - return action->type != TSParseActionTypeError && !action->can_hide_split; + if (action->type == TSParseActionTypeError || action->can_hide_split) + return false; + + if (ts_tree_is_extra(subtree) && !action->extra) + return false; + + return true; } /* @@ -142,33 +166,25 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { } if (state->reusable_subtree_pos < position.chars) { - LOG("past_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + LOG("past_reusable sym:%s", SYM_NAME(state->reusable_subtree->symbol)); ts_parser__pop_reusable_subtree(state); continue; } - bool can_reuse = true; if (ts_tree_has_changes(state->reusable_subtree)) { if (state->is_verifying) { ts_parser__breakdown_top_of_stack(self, head); state->is_verifying = false; } + LOG("breakdown_changed sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; - } else if (ts_tree_is_extra(state->reusable_subtree)) { - LOG("breakdown_extra sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; - } else if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) { - LOG("breakdown_non_reusable sym:%s", - SYM_NAME(state->reusable_subtree->symbol)); - can_reuse = false; + ts_parser__breakdown_reusable_subtree(state); + continue; } - if (!can_reuse) { - if (!ts_parser__breakdown_reusable_subtree(state)) { - LOG("dont_reuse sym:%s", SYM_NAME(state->reusable_subtree->symbol)); - ts_parser__pop_reusable_subtree(state); - } + if (!ts_parser__can_reuse(self, head, state->reusable_subtree)) { + LOG("breakdown_unreusable sym:%s", SYM_NAME(state->reusable_subtree->symbol)); + ts_parser__breakdown_reusable_subtree(state); continue; } @@ -217,8 +233,6 @@ static TSTree *ts_parser__select_tree(void *data, TSTree *left, TSTree *right) { static bool ts_parser__shift(TSParser *self, int head, TSStateId parse_state, TSTree *lookahead) { - if (self->language->symbol_metadata[lookahead->symbol].extra) - ts_tree_set_fragile(lookahead); if (ts_stack_push(self->stack, head, parse_state, lookahead)) { LOG("merge head:%d", head); vector_erase(&self->lookahead_states, head); @@ -643,10 +657,8 @@ TSTree *ts_parser_parse(TSParser *self, TSInput input, TSTree *previous_tree) { if (!ts_parser__can_reuse(self, head, lookahead) || position.chars != last_position.chars) { - TSTree *reused_lookahead = ts_parser__get_next_lookahead(self, head); - if (ts_parser__can_reuse(self, head, reused_lookahead)) { - lookahead = reused_lookahead; - } else { + lookahead = ts_parser__get_next_lookahead(self, head); + if (!lookahead) { ts_lexer_reset(&self->lexer, position); TSStateId parse_state = ts_stack_top_state(self->stack, head); TSStateId lex_state = self->language->lex_states[parse_state]; diff --git a/src/runtime/tree.c b/src/runtime/tree.c index 6616335f..dc084407 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -18,10 +18,9 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .named_child_count = 0, .children = NULL, .padding = padding, - .options = - { - .visible = metadata.visible, .named = metadata.named, - }, + .options.visible = metadata.visible, + .options.named = metadata.named, + .context.lex_state = TSTREE_LEX_STATE_INDEPENDENT, }; if (sym == ts_builtin_sym_error) { @@ -92,6 +91,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { } if (child_count > 0) { + self->context.lex_state = children[0]->context.lex_state; if (children[0]->options.fragile_left) self->options.fragile_left = true; if (children[child_count - 1]->options.fragile_right) diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 166b7c11..c2693618 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -9,11 +9,14 @@ extern "C" { #include "tree_sitter/parser.h" #include "runtime/length.h" +#define TSTREE_LEX_STATE_INDEPENDENT (unsigned short)(-1) + struct TSTree { struct { struct TSTree *parent; size_t index; TSLength offset; + TSStateId lex_state; } context; size_t child_count; size_t visible_child_count; From f9e6fce62d3d12e1f19eb2dc2cdd35e390b2e15f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Dec 2015 11:20:21 -0800 Subject: [PATCH 3/7] Remove redundant tests for the same random edit operations --- spec/runtime/language_specs.cc | 22 +--------------------- 1 file changed, 1 insertion(+), 21 deletions(-) diff --git a/spec/runtime/language_specs.cc b/spec/runtime/language_specs.cc index 7967d5b7..d2bc0756 100644 --- a/spec/runtime/language_specs.cc +++ b/spec/runtime/language_specs.cc @@ -144,7 +144,7 @@ describe("Languages", [&]() { std::set> deletions; std::set> insertions; - for (size_t i = 0; i < 50; i++) { + for (size_t i = 0; i < 80; i++) { size_t edit_position = random() % SpyInput::char_count(entry.input); size_t deletion_size = random() % (SpyInput::char_count(entry.input) - edit_position); string inserted_text = random_words(random() % 4 + 1); @@ -159,16 +159,6 @@ describe("Languages", [&]() { ts_document_edit(doc, input->undo()); ts_document_parse(doc); }); - - it_handles_edit_sequence("performing and repairing an insertion of " + description, [&]() { - ts_document_parse(doc); - - ts_document_edit(doc, input->replace(edit_position, 0, inserted_text)); - ts_document_parse(doc); - - ts_document_edit(doc, input->undo()); - ts_document_parse(doc); - }); } if (deletions.insert({edit_position, deletion_size}).second) { @@ -181,16 +171,6 @@ describe("Languages", [&]() { ts_document_edit(doc, input->undo()); ts_document_parse(doc); }); - - it_handles_edit_sequence("performing and repairing a deletion of " + desription, [&]() { - ts_document_parse(doc); - - ts_document_edit(doc, input->replace(edit_position, deletion_size, "")); - ts_document_parse(doc); - - ts_document_edit(doc, input->undo()); - ts_document_parse(doc); - }); } } } From 6b11890d687f87fe01eb22935fa63d6c334373fb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Dec 2015 13:59:04 -0800 Subject: [PATCH 4/7] Reuse fragile parent nodes that were reduced in the current state --- src/runtime/parser.c | 13 ++++++++++--- src/runtime/tree.c | 4 +++- src/runtime/tree.h | 4 +++- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 4617eb18..9442d2c4 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -129,12 +129,14 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { return false; if (subtree->symbol == ts_builtin_sym_error) return false; - if (ts_tree_is_fragile(subtree)) - return false; + if (ts_tree_is_fragile(subtree)) { + if (subtree->context.parse_state != ts_stack_top_state(self->stack, head)) + return false; + } TSStateId state = ts_stack_top_state(self->stack, head); - if (subtree->context.lex_state != TSTREE_LEX_STATE_INDEPENDENT) { + if (subtree->context.lex_state != TSTREE_STATE_INDEPENDENT) { TSStateId lex_state = self->language->lex_states[state]; if (subtree->context.lex_state != lex_state) return false; @@ -335,6 +337,10 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, */ TSStateId state; TSStateId top_state = ts_stack_top_state(self->stack, new_head); + + if (parent->context.parse_state != TSTREE_STATE_ERROR) + parent->context.parse_state = top_state; + if (extra) { ts_tree_set_extra(parent); state = top_state; @@ -378,6 +384,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, TSTree **parent = vector_get(&self->reduce_parents, i); (*parent)->options.fragile_left = true; (*parent)->options.fragile_right = true; + (*parent)->context.parse_state = TSTREE_STATE_ERROR; } } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index dc084407..ba4ccc45 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -20,7 +20,8 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .padding = padding, .options.visible = metadata.visible, .options.named = metadata.named, - .context.lex_state = TSTREE_LEX_STATE_INDEPENDENT, + .context.lex_state = TSTREE_STATE_INDEPENDENT, + .context.parse_state = TSTREE_STATE_INDEPENDENT, }; if (sym == ts_builtin_sym_error) { @@ -87,6 +88,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (child->symbol == ts_builtin_sym_error) { self->options.fragile_left = self->options.fragile_right = true; + self->context.parse_state = TSTREE_STATE_ERROR; } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index c2693618..0e6a464e 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -9,7 +9,8 @@ extern "C" { #include "tree_sitter/parser.h" #include "runtime/length.h" -#define TSTREE_LEX_STATE_INDEPENDENT (unsigned short)(-1) +#define TSTREE_STATE_INDEPENDENT (unsigned short)(-1) +#define TSTREE_STATE_ERROR (TSTREE_STATE_INDEPENDENT - 1) struct TSTree { struct { @@ -17,6 +18,7 @@ struct TSTree { size_t index; TSLength offset; TSStateId lex_state; + TSStateId parse_state; } context; size_t child_count; size_t visible_child_count; From 30961143fe0ca92fc9f84f715a74e913bb79d36f Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Dec 2015 14:00:22 -0800 Subject: [PATCH 5/7] Only perform breakdown if next terminal node has changes --- src/runtime/parser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 9442d2c4..2e77088f 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -174,7 +174,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { } if (ts_tree_has_changes(state->reusable_subtree)) { - if (state->is_verifying) { + if (state->is_verifying && state->reusable_subtree->child_count == 0) { ts_parser__breakdown_top_of_stack(self, head); state->is_verifying = false; } From da1bc038e5b93364ec5371ca20fd2237634aa2a0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Dec 2015 14:20:58 -0800 Subject: [PATCH 6/7] Remove nested options structs in Tree --- spec/runtime/stack_spec.cc | 2 +- spec/runtime/tree_spec.cc | 64 +++++++++++++++---------------- src/runtime/document.c | 4 +- src/runtime/lexer.c | 2 +- src/runtime/node.c | 12 +++--- src/runtime/parser.c | 38 +++++++++---------- src/runtime/stack.c | 2 +- src/runtime/tree.c | 40 +++++++++---------- src/runtime/tree.h | 78 ++++++-------------------------------- 9 files changed, 93 insertions(+), 149 deletions(-) diff --git a/spec/runtime/stack_spec.cc b/spec/runtime/stack_spec.cc index c92227cd..63003440 100644 --- a/spec/runtime/stack_spec.cc +++ b/spec/runtime/stack_spec.cc @@ -128,7 +128,7 @@ describe("Stack", [&]() { }); it("does not count 'extra' trees toward the count", [&]() { - ts_tree_set_extra(trees[1]); + trees[1]->extra = true; Vector pop = ts_stack_pop(stack, 0, 2, false); StackPopResult pop1 = *(StackPopResult *)vector_get(&pop, 0); diff --git a/spec/runtime/tree_spec.cc b/spec/runtime/tree_spec.cc index ad829fd0..09e56105 100644 --- a/spec/runtime/tree_spec.cc +++ b/spec/runtime/tree_spec.cc @@ -47,8 +47,8 @@ describe("Tree", []() { describe("make_leaf(sym, size, padding, is_hidden)", [&]() { it("does not record that it is fragile", [&]() { - AssertThat(ts_tree_is_fragile_left(tree1), IsFalse()); - AssertThat(ts_tree_is_fragile_right(tree1), IsFalse()); + AssertThat(tree1->fragile_left, IsFalse()); + AssertThat(tree1->fragile_right, IsFalse()); }); }); @@ -59,8 +59,8 @@ describe("Tree", []() { ts_length_zero(), 'z'); - AssertThat(ts_tree_is_fragile_left(error_tree), IsTrue()); - AssertThat(ts_tree_is_fragile_right(error_tree), IsTrue()); + AssertThat(error_tree->fragile_left, IsTrue()); + AssertThat(error_tree->fragile_right, IsTrue()); }); }); @@ -81,8 +81,8 @@ describe("Tree", []() { TSTree *parent; before_each([&]() { - ts_tree_set_fragile_left(tree1); - ts_tree_set_extra(tree1); + tree1->fragile_left = true; + tree1->extra = true; parent = ts_tree_make_node(eel, 2, tree_array({ tree1, tree2, @@ -94,7 +94,7 @@ describe("Tree", []() { }); it("records that it is fragile on the left side", [&]() { - AssertThat(ts_tree_is_fragile_left(parent), IsTrue()); + AssertThat(parent->fragile_left, IsTrue()); }); }); @@ -102,8 +102,8 @@ describe("Tree", []() { TSTree *parent; before_each([&]() { - ts_tree_set_fragile_right(tree2); - ts_tree_set_extra(tree2); + tree2->fragile_right = true; + tree2->extra = true; parent = ts_tree_make_node(eel, 2, tree_array({ tree1, tree2, @@ -115,7 +115,7 @@ describe("Tree", []() { }); it("records that it is fragile on the right side", [&]() { - AssertThat(ts_tree_is_fragile_right(parent), IsTrue()); + AssertThat(parent->fragile_right, IsTrue()); }); }); @@ -123,8 +123,8 @@ describe("Tree", []() { TSTree *parent; before_each([&]() { - ts_tree_set_fragile_right(tree1); - ts_tree_set_fragile_left(tree2); + tree1->fragile_right = true; + tree2->fragile_left = true; parent = ts_tree_make_node(eel, 2, tree_array({ tree1, tree2, @@ -136,8 +136,8 @@ describe("Tree", []() { }); it("records that it is not fragile", [&]() { - AssertThat(ts_tree_is_fragile_left(parent), IsFalse()); - AssertThat(ts_tree_is_fragile_right(parent), IsFalse()); + AssertThat(parent->fragile_left, IsFalse()); + AssertThat(parent->fragile_right, IsFalse()); }); }); }); @@ -175,15 +175,15 @@ describe("Tree", []() { assert_consistent(tree); - AssertThat(tree->options.has_changes, IsTrue()); + AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({0, 3, 0, 0})); AssertThat(tree->size, Equals({13, 13, 0, 13})); - AssertThat(tree->children[0]->options.has_changes, IsTrue()); + AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({0, 3, 0, 0})); AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); - AssertThat(tree->children[1]->options.has_changes, IsFalse()); + AssertThat(tree->children[1]->has_changes, IsFalse()); AssertThat(tree->children[1]->padding, Equals({2, 2, 0, 2})); AssertThat(tree->children[1]->size, Equals({3, 3, 0, 3})); }); @@ -195,11 +195,11 @@ describe("Tree", []() { assert_consistent(tree); - AssertThat(tree->options.has_changes, IsTrue()); + AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({0, 5, 0, 0})); AssertThat(tree->size, Equals({0, 11, 0, 0})); - AssertThat(tree->children[0]->options.has_changes, IsTrue()); + AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({0, 5, 0, 0})); AssertThat(tree->children[0]->size, Equals({0, 1, 0, 0})); }); @@ -211,15 +211,15 @@ describe("Tree", []() { assert_consistent(tree); - AssertThat(tree->options.has_changes, IsTrue()); + AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({0, 4, 0, 0})); AssertThat(tree->size, Equals({13, 13, 0, 13})); - AssertThat(tree->children[0]->options.has_changes, IsTrue()); + AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); AssertThat(tree->children[0]->size, Equals({3, 3, 0, 3})); - AssertThat(tree->children[1]->options.has_changes, IsFalse()); + AssertThat(tree->children[1]->has_changes, IsFalse()); }); }); @@ -229,15 +229,15 @@ describe("Tree", []() { assert_consistent(tree); - AssertThat(tree->options.has_changes, IsTrue()); + AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({2, 2, 0, 2})); AssertThat(tree->size, Equals({0, 16, 0, 0})); - AssertThat(tree->children[0]->options.has_changes, IsTrue()); + AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({2, 2, 0, 2})); AssertThat(tree->children[0]->size, Equals({0, 6, 0, 0})); - AssertThat(tree->children[1]->options.has_changes, IsFalse()); + AssertThat(tree->children[1]->has_changes, IsFalse()); }); }); @@ -247,19 +247,19 @@ describe("Tree", []() { assert_consistent(tree); - AssertThat(tree->options.has_changes, IsTrue()); + AssertThat(tree->has_changes, IsTrue()); AssertThat(tree->padding, Equals({0, 4, 0, 0})); AssertThat(tree->size, Equals({0, 4, 0, 0})); - AssertThat(tree->children[0]->options.has_changes, IsTrue()); + AssertThat(tree->children[0]->has_changes, IsTrue()); AssertThat(tree->children[0]->padding, Equals({0, 4, 0, 0})); AssertThat(tree->children[0]->size, Equals({0, 0, 0, 0})); - AssertThat(tree->children[1]->options.has_changes, IsTrue()); + AssertThat(tree->children[1]->has_changes, IsTrue()); AssertThat(tree->children[1]->padding, Equals({0, 0, 0, 0})); AssertThat(tree->children[1]->size, Equals({0, 0, 0, 0})); - AssertThat(tree->children[2]->options.has_changes, IsTrue()); + AssertThat(tree->children[2]->has_changes, IsTrue()); AssertThat(tree->children[2]->padding, Equals({0, 1, 0, 0})); AssertThat(tree->children[2]->size, Equals({3, 3, 0, 3})); }); @@ -344,7 +344,7 @@ describe("Tree", []() { }); it("hides invisible nodes", [&]() { - tree2->options.visible = false; + tree2->visible = false; char *string1 = ts_tree_string(parent1, names, true); AssertThat(string(string1), Equals("(dog (cat))")); @@ -353,13 +353,13 @@ describe("Tree", []() { describe("when the root node is not visible", [&]() { it("still serializes it", [&]() { - parent1->options.visible = false; + parent1->visible = false; char *string1 = ts_tree_string(parent1, names, true); AssertThat(string(string1), Equals("(dog (cat) (cat))")); free(string1); - tree1->options.visible = false; + tree1->visible = false; char *string2 = ts_tree_string(tree1, names, true); AssertThat(string(string2), Equals("(cat)")); diff --git a/src/runtime/document.c b/src/runtime/document.c index 9747f912..40b509ec 100644 --- a/src/runtime/document.c +++ b/src/runtime/document.c @@ -67,7 +67,7 @@ void ts_document_parse(TSDocument *self) { return; TSTree *reusable_tree = self->valid ? self->tree : NULL; - if (reusable_tree && !reusable_tree->options.has_changes) + if (reusable_tree && !reusable_tree->has_changes) return; TSTree *tree = ts_parser_parse(&self->parser, self->input, reusable_tree); @@ -85,7 +85,7 @@ void ts_document_invalidate(TSDocument *self) { TSNode ts_document_root_node(const TSDocument *self) { TSNode result = ts_node_make(self->tree, 0, 0, 0); - while (result.data && !ts_tree_is_visible(result.data)) + while (result.data && !((TSTree *)result.data)->visible) result = ts_node_named_child(result, 0); return result; } diff --git a/src/runtime/lexer.c b/src/runtime/lexer.c index d859223b..7029cf91 100644 --- a/src/runtime/lexer.c +++ b/src/runtime/lexer.c @@ -103,7 +103,7 @@ static TSTree *ts_lexer__accept(TSLexer *self, TSSymbol symbol, } if (fragile) - result->context.lex_state = self->starting_state; + result->lex_state = self->starting_state; return result; } diff --git a/src/runtime/node.c b/src/runtime/node.c index e2eac026..1597cccd 100644 --- a/src/runtime/node.c +++ b/src/runtime/node.c @@ -33,7 +33,7 @@ static inline size_t ts_node__offset_row(TSNode self) { static inline bool ts_node__is_relevant(TSNode self, bool include_anonymous) { const TSTree *tree = ts_node__tree(self); - return include_anonymous ? tree->options.visible : tree->options.named; + return include_anonymous ? tree->visible : tree->named; } static inline size_t ts_node__relevant_child_count(TSNode self, @@ -110,7 +110,7 @@ static inline TSNode ts_node__prev_sibling(TSNode self, bool include_anonymous) if (grandchild_count > 0) return ts_node__child(child, grandchild_count - 1, include_anonymous); } - } while (!ts_tree_is_visible(ts_node__tree(result))); + } while (!ts_node__tree(result)->visible); return ts_node__null(); } @@ -133,7 +133,7 @@ static inline TSNode ts_node__next_sibling(TSNode self, bool include_anonymous) if (grandchild_count > 0) return ts_node__child(child, 0, include_anonymous); } - } while (!ts_tree_is_visible(ts_node__tree(result))); + } while (!ts_node__tree(result)->visible); return ts_node__null(); } @@ -218,11 +218,11 @@ bool ts_node_eq(TSNode self, TSNode other) { } bool ts_node_is_named(TSNode self) { - return ts_node__tree(self)->options.named; + return ts_node__tree(self)->named; } bool ts_node_has_changes(TSNode self) { - return ts_node__tree(self)->options.has_changes; + return ts_node__tree(self)->has_changes; } TSNode ts_node_parent(TSNode self) { @@ -233,7 +233,7 @@ TSNode ts_node_parent(TSNode self) { result = ts_node__direct_parent(result, &index); if (!result.data) return ts_node__null(); - } while (!ts_tree_is_visible(result.data)); + } while (!ts_node__tree(result)->visible); return result; } diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 2e77088f..0ce47f45 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -60,7 +60,7 @@ static void ts_parser__breakdown_top_of_stack(TSParser *self, int head) { TSStateId state = ts_stack_top_state(self->stack, head_index); for (size_t j = 0; j < parent->child_count; j++) { last_child = parent->children[j]; - if (!last_child->options.extra) { + if (!last_child->extra) { TSParseAction action = ts_language_last_action(self->language, state, last_child->symbol); assert(action.type == TSParseActionTypeShift); @@ -130,15 +130,15 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { if (subtree->symbol == ts_builtin_sym_error) return false; if (ts_tree_is_fragile(subtree)) { - if (subtree->context.parse_state != ts_stack_top_state(self->stack, head)) + if (subtree->parse_state != ts_stack_top_state(self->stack, head)) return false; } TSStateId state = ts_stack_top_state(self->stack, head); - if (subtree->context.lex_state != TSTREE_STATE_INDEPENDENT) { + if (subtree->lex_state != TSTREE_STATE_INDEPENDENT) { TSStateId lex_state = self->language->lex_states[state]; - if (subtree->context.lex_state != lex_state) + if (subtree->lex_state != lex_state) return false; } @@ -147,7 +147,7 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { if (action->type == TSParseActionTypeError || action->can_hide_split) return false; - if (ts_tree_is_extra(subtree) && !action->extra) + if (subtree->extra && !action->extra) return false; return true; @@ -173,7 +173,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { continue; } - if (ts_tree_has_changes(state->reusable_subtree)) { + if (state->reusable_subtree->has_changes) { if (state->is_verifying && state->reusable_subtree->child_count == 0) { ts_parser__breakdown_top_of_stack(self, head); state->is_verifying = false; @@ -193,7 +193,7 @@ static TSTree *ts_parser__get_next_lookahead(TSParser *self, int head) { TSTree *result = state->reusable_subtree; TSLength size = ts_tree_total_size(result); LOG("reuse sym:%s size:%lu extra:%d", SYM_NAME(result->symbol), size.chars, - result->options.extra); + result->extra); ts_parser__pop_reusable_subtree(state); return result; } @@ -249,7 +249,7 @@ static bool ts_parser__shift_extra(TSParser *self, int head, TSStateId state, TSSymbolMetadata metadata = self->language->symbol_metadata[lookahead->symbol]; if (metadata.structural && ts_stack_head_count(self->stack) > 1) lookahead = ts_tree_make_copy(lookahead); - ts_tree_set_extra(lookahead); + lookahead->extra = true; return ts_parser__shift(self, head, state, lookahead); } @@ -289,7 +289,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, */ if (!parent) { for (size_t j = pop_result->tree_count - 1; j + 1 > 0; j--) { - if (pop_result->trees[j]->options.extra) { + if (pop_result->trees[j]->extra) { trailing_extra_count++; } else break; @@ -338,11 +338,11 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, TSStateId state; TSStateId top_state = ts_stack_top_state(self->stack, new_head); - if (parent->context.parse_state != TSTREE_STATE_ERROR) - parent->context.parse_state = top_state; + if (parent->parse_state != TSTREE_STATE_ERROR) + parent->parse_state = top_state; if (extra) { - ts_tree_set_extra(parent); + parent->extra = true; state = top_state; } else { TSParseAction action = @@ -382,17 +382,16 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, if (self->is_split || ts_stack_head_count(self->stack) > 1) { for (size_t i = 0, size = self->reduce_parents.size; i < size; i++) { TSTree **parent = vector_get(&self->reduce_parents, i); - (*parent)->options.fragile_left = true; - (*parent)->options.fragile_right = true; - (*parent)->context.parse_state = TSTREE_STATE_ERROR; + (*parent)->fragile_left = true; + (*parent)->fragile_right = true; + (*parent)->parse_state = TSTREE_STATE_ERROR; } } if (fragile) { for (size_t i = 0; i < self->reduce_parents.size; i++) { TSTree **parent = vector_get(&self->reduce_parents, i); - ts_tree_set_fragile_left(*parent); - ts_tree_set_fragile_right(*parent); + (*parent)->fragile_left = (*parent)->fragile_right = true; } } @@ -409,9 +408,8 @@ static void ts_parser__reduce_error(TSParser *self, int head, stack_entry->position = ts_length_add(stack_entry->position, lookahead->padding); (*parent)->size = ts_length_add((*parent)->size, lookahead->padding); + (*parent)->fragile_left = (*parent)->fragile_right = true; lookahead->padding = ts_length_zero(); - ts_tree_set_fragile_left(*parent); - ts_tree_set_fragile_right(*parent); } } @@ -498,7 +496,7 @@ static TSTree *ts_parser__finish(TSParser *self, int finished_stack_head) { StackPopResult *pop_result = vector_get(&pop_results, 0); for (size_t i = 0; i < pop_result->tree_count; i++) { - if (!pop_result->trees[i]->options.extra) { + if (!pop_result->trees[i]->extra) { TSTree *root = pop_result->trees[i]; size_t leading_extra_count = i; size_t trailing_extra_count = pop_result->tree_count - 1 - i; diff --git a/src/runtime/stack.c b/src/runtime/stack.c index 4e8e7645..14f55610 100644 --- a/src/runtime/stack.c +++ b/src/runtime/stack.c @@ -283,7 +283,7 @@ Vector ts_stack_pop(Stack *self, int head_index, int child_count, /* * Children that are 'extra' do not count towards the total child count. */ - if (ts_tree_is_extra(node->entry.tree) && !count_extra) + if (node->entry.tree->extra && !count_extra) path->goal_tree_count++; /* diff --git a/src/runtime/tree.c b/src/runtime/tree.c index ba4ccc45..c55a6545 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -18,15 +18,15 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .named_child_count = 0, .children = NULL, .padding = padding, - .options.visible = metadata.visible, - .options.named = metadata.named, - .context.lex_state = TSTREE_STATE_INDEPENDENT, - .context.parse_state = TSTREE_STATE_INDEPENDENT, + .visible = metadata.visible, + .named = metadata.named, + .lex_state = TSTREE_STATE_INDEPENDENT, + .parse_state = TSTREE_STATE_INDEPENDENT, }; if (sym == ts_builtin_sym_error) { - result->options.fragile_left = true; - result->options.fragile_right = true; + result->fragile_left = true; + result->fragile_right = true; } return result; @@ -77,9 +77,9 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { self->size = ts_length_add(self->size, ts_tree_total_size(child)); } - if (child->options.visible) { + if (child->visible) { self->visible_child_count++; - if (child->options.named) + if (child->named) self->named_child_count++; } else { self->visible_child_count += child->visible_child_count; @@ -87,17 +87,17 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { } if (child->symbol == ts_builtin_sym_error) { - self->options.fragile_left = self->options.fragile_right = true; - self->context.parse_state = TSTREE_STATE_ERROR; + self->fragile_left = self->fragile_right = true; + self->parse_state = TSTREE_STATE_ERROR; } } if (child_count > 0) { - self->context.lex_state = children[0]->context.lex_state; - if (children[0]->options.fragile_left) - self->options.fragile_left = true; - if (children[child_count - 1]->options.fragile_right) - self->options.fragile_right = true; + self->lex_state = children[0]->lex_state; + if (children[0]->fragile_left) + self->fragile_left = true; + if (children[child_count - 1]->fragile_right) + self->fragile_right = true; } } @@ -155,9 +155,9 @@ bool ts_tree_eq(const TSTree *self, const TSTree *other) { if (self->symbol != other->symbol) return false; - if (self->options.visible != other->options.visible) + if (self->visible != other->visible) return false; - if (self->options.named != other->options.named) + if (self->named != other->named) return false; if (self->symbol == ts_builtin_sym_error) return self->lookahead_char == other->lookahead_char; @@ -216,8 +216,8 @@ static size_t ts_tree__write_to_string(const TSTree *self, char *cursor = string; char **writer = (limit > 0) ? &cursor : &string; - bool visible = is_root || (self->options.visible && - (include_anonymous || self->options.named)); + bool visible = is_root || (self->visible && + (include_anonymous || self->named)); if (visible && !is_root) cursor += snprintf(*writer, limit, " "); @@ -264,7 +264,7 @@ void ts_tree_edit(TSTree *self, TSInputEdit edit) { size_t old_end = edit.position + edit.chars_removed; assert(old_end <= ts_tree_total_chars(self)); - self->options.has_changes = true; + self->has_changes = true; if (start < self->padding.chars) { ts_length_set_unknown(&self->padding); diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 0e6a464e..6ac40289 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -17,9 +17,8 @@ struct TSTree { struct TSTree *parent; size_t index; TSLength offset; - TSStateId lex_state; - TSStateId parse_state; } context; + size_t child_count; size_t visible_child_count; size_t named_child_count; @@ -27,20 +26,20 @@ struct TSTree { struct TSTree **children; char lookahead_char; }; + TSLength padding; TSLength size; TSSymbol symbol; - - struct { - bool visible : 1; - bool named : 1; - bool extra : 1; - bool fragile_left : 1; - bool fragile_right : 1; - bool has_changes : 1; - } options; - unsigned short int ref_count; + TSStateId lex_state; + TSStateId parse_state; + unsigned short ref_count; + bool visible : 1; + bool named : 1; + bool extra : 1; + bool fragile_left : 1; + bool fragile_right : 1; + bool has_changes : 1; }; TSTree *ts_tree_make_leaf(TSSymbol, TSLength, TSLength, TSSymbolMetadata); @@ -68,61 +67,8 @@ static inline TSLength ts_tree_total_size(const TSTree *self) { return ts_length_add(self->padding, self->size); } -static inline TSPoint ts_tree_extent(const TSTree *tree) { - TSPoint result; - result.row = tree->size.rows; - result.column = tree->size.columns; - return result; -} - -static inline bool ts_tree_is_extra(const TSTree *tree) { - return tree->options.extra; -} - -static inline bool ts_tree_is_visible(const TSTree *tree) { - return tree->options.visible; -} - -static inline void ts_tree_set_extra(TSTree *tree) { - tree->options.extra = true; -} - -static inline void ts_tree_set_fragile_left(TSTree *tree) { - tree->options.fragile_left = true; -} - -static inline void ts_tree_set_fragile_right(TSTree *tree) { - tree->options.fragile_right = true; -} - -static inline void ts_tree_set_fragile(TSTree *tree) { - ts_tree_set_fragile_left(tree); - ts_tree_set_fragile_right(tree); -} - -static inline bool ts_tree_is_fragile_left(const TSTree *tree) { - return tree->options.fragile_left; -} - -static inline bool ts_tree_is_fragile_right(const TSTree *tree) { - return tree->options.fragile_right; -} - -static inline bool ts_tree_is_terminal(const TSTree *tree) { - return tree->child_count == 0; -} - -static inline bool ts_tree_has_changes(const TSTree *tree) { - return tree->options.has_changes; -} - -static inline bool ts_tree_is_empty(const TSTree *tree) { - return ts_tree_total_size(tree).chars == 0; -} - static inline bool ts_tree_is_fragile(const TSTree *tree) { - return ts_tree_is_empty(tree) || tree->options.fragile_left || - tree->options.fragile_right; + return tree->fragile_left || tree->fragile_right || ts_tree_total_chars(tree) == 0; } #ifdef __cplusplus From 7f1c5f75687f38498cbf236e4c0e30b4d11c62e7 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Tue, 22 Dec 2015 14:37:29 -0800 Subject: [PATCH 7/7] Don't use preprocesser for special tree state values --- src/runtime/parser.c | 6 +++--- src/runtime/tree.c | 10 +++++++--- src/runtime/tree.h | 4 ++-- 3 files changed, 12 insertions(+), 8 deletions(-) diff --git a/src/runtime/parser.c b/src/runtime/parser.c index 0ce47f45..4b2cbeba 100644 --- a/src/runtime/parser.c +++ b/src/runtime/parser.c @@ -136,7 +136,7 @@ static bool ts_parser__can_reuse(TSParser *self, int head, TSTree *subtree) { TSStateId state = ts_stack_top_state(self->stack, head); - if (subtree->lex_state != TSTREE_STATE_INDEPENDENT) { + if (subtree->lex_state != TS_TREE_STATE_INDEPENDENT) { TSStateId lex_state = self->language->lex_states[state]; if (subtree->lex_state != lex_state) return false; @@ -338,7 +338,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, TSStateId state; TSStateId top_state = ts_stack_top_state(self->stack, new_head); - if (parent->parse_state != TSTREE_STATE_ERROR) + if (parent->parse_state != TS_TREE_STATE_ERROR) parent->parse_state = top_state; if (extra) { @@ -384,7 +384,7 @@ static bool ts_parser__reduce(TSParser *self, int head, TSSymbol symbol, TSTree **parent = vector_get(&self->reduce_parents, i); (*parent)->fragile_left = true; (*parent)->fragile_right = true; - (*parent)->parse_state = TSTREE_STATE_ERROR; + (*parent)->parse_state = TS_TREE_STATE_ERROR; } } diff --git a/src/runtime/tree.c b/src/runtime/tree.c index c55a6545..0f9a279c 100644 --- a/src/runtime/tree.c +++ b/src/runtime/tree.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -6,6 +7,9 @@ #include "runtime/tree.h" #include "runtime/length.h" +TSStateId TS_TREE_STATE_INDEPENDENT = USHRT_MAX; +TSStateId TS_TREE_STATE_ERROR = USHRT_MAX - 1; + TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, TSSymbolMetadata metadata) { TSTree *result = malloc(sizeof(TSTree)); @@ -20,8 +24,8 @@ TSTree *ts_tree_make_leaf(TSSymbol sym, TSLength padding, TSLength size, .padding = padding, .visible = metadata.visible, .named = metadata.named, - .lex_state = TSTREE_STATE_INDEPENDENT, - .parse_state = TSTREE_STATE_INDEPENDENT, + .lex_state = TS_TREE_STATE_INDEPENDENT, + .parse_state = TS_TREE_STATE_INDEPENDENT, }; if (sym == ts_builtin_sym_error) { @@ -88,7 +92,7 @@ void ts_tree_set_children(TSTree *self, size_t child_count, TSTree **children) { if (child->symbol == ts_builtin_sym_error) { self->fragile_left = self->fragile_right = true; - self->parse_state = TSTREE_STATE_ERROR; + self->parse_state = TS_TREE_STATE_ERROR; } } diff --git a/src/runtime/tree.h b/src/runtime/tree.h index 6ac40289..615d6f6e 100644 --- a/src/runtime/tree.h +++ b/src/runtime/tree.h @@ -9,8 +9,8 @@ extern "C" { #include "tree_sitter/parser.h" #include "runtime/length.h" -#define TSTREE_STATE_INDEPENDENT (unsigned short)(-1) -#define TSTREE_STATE_ERROR (TSTREE_STATE_INDEPENDENT - 1) +extern TSStateId TS_TREE_STATE_INDEPENDENT; +extern TSStateId TS_TREE_STATE_ERROR; struct TSTree { struct {