In lexer, always prefer the longest match

Only use rules' precedence to decide between two tokens
that match the same string
This commit is contained in:
Max Brunsfeld 2014-07-03 08:57:35 -07:00
parent c85841364e
commit 77df7fe511
5 changed files with 252 additions and 43 deletions

View file

@ -589,6 +589,11 @@ LEX_FN() {
ADVANCE(54);
ACCEPT_TOKEN(ts_sym__identifier);
case 54:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_interface);
case 55:
if (('0' <= lookahead && lookahead <= '9') ||
@ -610,6 +615,11 @@ LEX_FN() {
ADVANCE(57);
ACCEPT_TOKEN(ts_sym__identifier);
case 57:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_map);
case 58:
if (('0' <= lookahead && lookahead <= '9') ||
@ -662,6 +672,11 @@ LEX_FN() {
ADVANCE(63);
ACCEPT_TOKEN(ts_sym__identifier);
case 63:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_struct);
case 64:
START_TOKEN();
@ -678,6 +693,10 @@ LEX_FN() {
LEX_ERROR();
case 65:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(64);
if (lookahead == '\n')
ADVANCE(65);
if (lookahead == '/')
@ -896,6 +915,10 @@ LEX_FN() {
LEX_ERROR();
case 85:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(84);
if (lookahead == '\n')
ADVANCE(85);
if (lookahead == '/')
@ -974,6 +997,10 @@ LEX_FN() {
LEX_ERROR();
case 92:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(91);
if (lookahead == '\n')
ADVANCE(92);
if (lookahead == '&')
@ -1155,6 +1182,11 @@ LEX_FN() {
ADVANCE(114);
ACCEPT_TOKEN(ts_sym__identifier);
case 114:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_for);
case 115:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1167,6 +1199,11 @@ LEX_FN() {
ADVANCE(116);
ACCEPT_TOKEN(ts_sym__identifier);
case 116:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_if);
case 117:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1219,6 +1256,11 @@ LEX_FN() {
ADVANCE(122);
ACCEPT_TOKEN(ts_sym__identifier);
case 122:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_return);
case 123:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1240,6 +1282,11 @@ LEX_FN() {
ADVANCE(125);
ACCEPT_TOKEN(ts_sym__identifier);
case 125:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_var);
case 126:
START_TOKEN();
@ -1280,6 +1327,10 @@ LEX_FN() {
LEX_ERROR();
case 127:
START_TOKEN();
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(126);
if (lookahead == '\n')
ADVANCE(127);
if (lookahead == '&')
@ -1572,6 +1623,11 @@ LEX_FN() {
ADVANCE(143);
ACCEPT_TOKEN(ts_sym__identifier);
case 143:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_else);
case 144:
START_TOKEN();
@ -1694,6 +1750,10 @@ LEX_FN() {
START_TOKEN();
if (lookahead == '\0')
ADVANCE(12);
if ((lookahead == '\t') ||
(lookahead == '\r') ||
(lookahead == ' '))
ADVANCE(148);
if (lookahead == '\n')
ADVANCE(149);
if (lookahead == '!')
@ -1806,6 +1866,11 @@ LEX_FN() {
ADVANCE(154);
ACCEPT_TOKEN(ts_sym__identifier);
case 154:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_func);
case 155:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1863,6 +1928,11 @@ LEX_FN() {
ADVANCE(160);
ACCEPT_TOKEN(ts_sym__identifier);
case 160:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_import);
case 161:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1923,6 +1993,11 @@ LEX_FN() {
ADVANCE(167);
ACCEPT_TOKEN(ts_sym__identifier);
case 167:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_package);
case 168:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1967,6 +2042,11 @@ LEX_FN() {
ADVANCE(172);
ACCEPT_TOKEN(ts_sym__identifier);
case 172:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_range);
case 173:
if (('0' <= lookahead && lookahead <= '9') ||
@ -1999,6 +2079,11 @@ LEX_FN() {
ADVANCE(176);
ACCEPT_TOKEN(ts_sym__identifier);
case 176:
if (('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(42);
ACCEPT_TOKEN(ts_aux_sym_type);
case ts_lex_state_error:
START_TOKEN();

View file

@ -655,6 +655,12 @@ LEX_FN() {
ADVANCE(47);
ACCEPT_TOKEN(ts_sym_identifier);
case 47:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_break);
case 48:
if ((lookahead == '$') ||
@ -712,6 +718,12 @@ LEX_FN() {
ADVANCE(53);
ACCEPT_TOKEN(ts_sym_identifier);
case 53:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_delete);
case 54:
if ((lookahead == '$') ||
@ -763,6 +775,12 @@ LEX_FN() {
ADVANCE(58);
ACCEPT_TOKEN(ts_sym_identifier);
case 58:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_sym_false);
case 59:
if ((lookahead == '$') ||
@ -776,6 +794,12 @@ LEX_FN() {
ADVANCE(60);
ACCEPT_TOKEN(ts_sym_identifier);
case 60:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_for);
case 61:
if ((lookahead == '$') ||
@ -844,6 +868,12 @@ LEX_FN() {
ADVANCE(67);
ACCEPT_TOKEN(ts_sym_identifier);
case 67:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_function);
case 68:
if ((lookahead == '$') ||
@ -857,6 +887,12 @@ LEX_FN() {
ADVANCE(69);
ACCEPT_TOKEN(ts_sym_identifier);
case 69:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_if);
case 70:
if ((lookahead == '$') ||
@ -884,6 +920,12 @@ LEX_FN() {
ADVANCE(72);
ACCEPT_TOKEN(ts_sym_identifier);
case 72:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_new);
case 73:
if ((lookahead == '$') ||
@ -908,6 +950,12 @@ LEX_FN() {
ADVANCE(75);
ACCEPT_TOKEN(ts_sym_identifier);
case 75:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_sym_null);
case 76:
if ((lookahead == '$') ||
@ -965,6 +1013,12 @@ LEX_FN() {
ADVANCE(81);
ACCEPT_TOKEN(ts_sym_identifier);
case 81:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_return);
case 82:
if ((lookahead == '$') ||
@ -1022,6 +1076,12 @@ LEX_FN() {
ADVANCE(87);
ACCEPT_TOKEN(ts_sym_identifier);
case 87:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_switch);
case 88:
if ((lookahead == '$') ||
@ -1074,6 +1134,12 @@ LEX_FN() {
ADVANCE(92);
ACCEPT_TOKEN(ts_sym_identifier);
case 92:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_throw);
case 93:
if ((lookahead == '$') ||
@ -1101,8 +1167,20 @@ LEX_FN() {
ADVANCE(95);
ACCEPT_TOKEN(ts_sym_identifier);
case 95:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_sym_true);
case 96:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_try);
case 97:
if ((lookahead == '$') ||
@ -1149,6 +1227,12 @@ LEX_FN() {
ADVANCE(101);
ACCEPT_TOKEN(ts_sym_identifier);
case 101:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_typeof);
case 102:
if ((lookahead == '$') ||
@ -1172,6 +1256,12 @@ LEX_FN() {
ADVANCE(104);
ACCEPT_TOKEN(ts_sym_identifier);
case 104:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_var);
case 105:
if ((lookahead == '$') ||
@ -1218,6 +1308,12 @@ LEX_FN() {
ADVANCE(109);
ACCEPT_TOKEN(ts_sym_identifier);
case 109:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_while);
case 110:
ACCEPT_TOKEN(ts_aux_sym_1);
@ -1304,6 +1400,10 @@ LEX_FN() {
case 120:
ACCEPT_TOKEN(ts_aux_sym_30);
case 121:
if (lookahead == '*')
ADVANCE(31);
if (lookahead == '/')
ADVANCE(37);
ACCEPT_TOKEN(ts_aux_sym_13);
case 122:
ACCEPT_TOKEN(ts_aux_sym_5);
@ -1465,6 +1565,16 @@ LEX_FN() {
ADVANCE(115);
ACCEPT_TOKEN(ts_aux_sym_27);
case 146:
if (!((lookahead == '*') ||
(lookahead == '/') ||
(lookahead == '\\')))
ADVANCE(19);
if (lookahead == '*')
ADVANCE(25);
if (lookahead == '/')
ADVANCE(36);
if (lookahead == '\\')
ADVANCE(22);
ACCEPT_TOKEN(ts_aux_sym_13);
case 147:
if ((lookahead == '$') ||
@ -1481,6 +1591,13 @@ LEX_FN() {
ADVANCE(148);
ACCEPT_TOKEN(ts_sym_identifier);
case 148:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'r') ||
('t' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
if (lookahead == 's')
ADVANCE(149);
ACCEPT_TOKEN(ts_aux_sym_in);
@ -1561,6 +1678,12 @@ LEX_FN() {
ADVANCE(156);
ACCEPT_TOKEN(ts_sym_identifier);
case 156:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_instanceof);
case 157:
START_TOKEN();
@ -2882,6 +3005,12 @@ LEX_FN() {
ADVANCE(203);
ACCEPT_TOKEN(ts_sym_identifier);
case 203:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_else);
case 204:
START_TOKEN();
@ -2991,6 +3120,12 @@ LEX_FN() {
ADVANCE(209);
ACCEPT_TOKEN(ts_sym_identifier);
case 209:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_catch);
case 210:
if ((lookahead == '$') ||
@ -3066,6 +3201,12 @@ LEX_FN() {
ADVANCE(216);
ACCEPT_TOKEN(ts_sym_identifier);
case 216:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_finally);
case 217:
START_TOKEN();
@ -3284,6 +3425,12 @@ LEX_FN() {
ADVANCE(234);
ACCEPT_TOKEN(ts_sym_identifier);
case 234:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_case);
case 235:
if ((lookahead == '$') ||
@ -3354,6 +3501,12 @@ LEX_FN() {
ADVANCE(241);
ACCEPT_TOKEN(ts_sym_identifier);
case 241:
if ((lookahead == '$') ||
('0' <= lookahead && lookahead <= '9') ||
('A' <= lookahead && lookahead <= 'Z') ||
(lookahead == '_') ||
('a' <= lookahead && lookahead <= 'z'))
ADVANCE(9);
ACCEPT_TOKEN(ts_aux_sym_default);
case 242:
START_TOKEN();

View file

@ -46,38 +46,12 @@ describe("resolving parse conflicts", []() {
});
describe("accept-token/advance conflicts", [&]() {
describe("when the the accept-token has higher precedence", [&]() {
it("prefers the accept", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -1 }));
AssertThat(update, IsFalse());
it("prefers the advance", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 3), LexAction::Advance(1, { 0 }));
AssertThat(update, IsTrue());
update = manager->resolve_lex_action(LexAction::Advance(1, { -1 }), LexAction::Accept(sym3, 2));
AssertThat(update, IsTrue());
});
});
describe("when the the actions have the same precedence", [&]() {
it("prefers the advance", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { 0 }));
AssertThat(update, IsTrue());
update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 0));
AssertThat(update, IsFalse());
});
});
describe("when the advance has conflicting precedences compared to the accept", [&]() {
it("prefers the advance", [&]() {
update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
AssertThat(update, IsTrue());
update = manager->resolve_lex_action(LexAction::Advance(1, { -2, 2 }), LexAction::Accept(sym3, 0));
AssertThat(update, IsFalse());
});
it_skip("records a conflict", [&]() {
manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
});
update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 3));
AssertThat(update, IsFalse());
});
});

View file

@ -81,3 +81,11 @@ var thing = {
(comment)
(expression_statement (function_call (identifier)))))))))
==========================================
parses comments within expressions
==========================================
y // comment
* z;
---
(program
(expression_statement (math_op (expression (identifier) (comment)) (identifier))))

View file

@ -39,18 +39,7 @@ namespace tree_sitter {
}
}
case LexActionTypeAdvance: {
// int min_precedence = *new_action.precedence_values.begin();
int max_precedence = *new_action.precedence_values.rbegin();
if (max_precedence > old_precedence) {
// if (min_precedence < old_precedence)
return true;
} else if (max_precedence < old_precedence) {
return false;
} else {
return true;
}
return false;
return true;
}
default:
return false;