In lexer, prefer tokens to skipped separator characters

This was causing newlines in go and javascript to be parsed as meaningless separator characters instead of statement terminators
2014-05-30 13:29:54 -07:00 · 2014-05-30 13:29:54 -07:00 · e93e254518
commit e93e254518
parent 220e081c49
26 changed files with 5559 additions and 6650 deletions
--- a/examples/grammars/golang.cc
+++ b/examples/grammars/golang.cc
@ -6,6 +6,10 @@ namespace tree_sitter_examples {
    using tree_sitter::GrammarOptions;
    using namespace tree_sitter::rules;

+    static rule_ptr terminated(rule_ptr rule) {
+        return seq({ rule, sym("_terminator") });
+    }
+
    extern const Grammar golang({
        { "program", seq({
            sym("package_directive"),
@ -20,28 +24,26 @@ namespace tree_sitter_examples {
                in_parens(err(repeat(sym("package_import")))),
                sym("package_import") }) }) },
        { "package_import", sym("string") },
-        { "declaration", seq({
-            choice({
-                sym("type_declaration"),
-                sym("var_declaration"),
-                sym("func_declaration") }),
-            blank() }) },
+        { "declaration", choice({
+            sym("type_declaration"),
+            sym("var_declaration"),
+            sym("func_declaration") }) },

        // Declarations
-        { "type_declaration", seq({
+        { "type_declaration", terminated(seq({
            keyword("type"),
            sym("type_name"),
-            sym("type_expression") }) },
-        { "var_declaration", seq({
+            sym("type_expression") })) },
+        { "var_declaration", terminated(seq({
            keyword("var"),
            sym("var_name"),
            str("="),
-            sym("expression") }) },
-        { "func_declaration", seq({
+            sym("expression") })) },
+        { "func_declaration", terminated(seq({
            keyword("func"),
            sym("var_name"),
            sym("_func_signature"),
-            sym("statement_block") }) },
+            sym("statement_block") })) },
        { "statement_block", in_braces(blank()) },
        { "type_expression", choice({
            sym("pointer_type"),
@ -108,6 +110,10 @@ namespace tree_sitter_examples {
                sym("type_name"),
                blank() }) }) },

+        { "_terminator", token(choice({
+            str("\n"),
+            str(";") })) },
+
        { "string", delimited("\"") },
        { "package_name", sym("_identifier") },
        { "var_name", sym("_identifier") },
--- a/examples/parsers/golang.c
+++ b/examples/parsers/golang.c
--- a/examples/parsers/javascript.c
+++ b/examples/parsers/javascript.c
--- a/include/tree_sitter/parser.h
+++ b/include/tree_sitter/parser.h
@ -31,6 +31,7 @@ static ts_state_id ts_lex_states[STATE_COUNT]
 static ts_tree * ts_lex(ts_lexer *lexer, ts_state_id lex_state)

 #define START_LEXER() \
+DEBUG_LEX("LEX %d", lex_state); \
 char lookahead; \
 next_state: \
 lookahead = ts_lexer_lookahead_char(lexer); \
@ -46,7 +47,7 @@ ts_lexer_start_token(lexer);
 { DEBUG_LEX("TOKEN %s", ts_symbol_names[symbol]); return ts_lexer_build_node(lexer, symbol); }

 #define LEX_ERROR() \
-{ return ts_lexer_build_node(lexer, ts_builtin_sym_error); }
+{ DEBUG_LEX("ERROR"); return ts_lexer_build_node(lexer, ts_builtin_sym_error); }

 #define LEX_PANIC() \
 { DEBUG_LEX("LEX ERROR: unexpected state %d", lex_state); return NULL; }
--- a/spec/compiler/build_tables/conflict_manager_spec.cc
+++ b/spec/compiler/build_tables/conflict_manager_spec.cc
@ -8,7 +8,7 @@ using namespace build_tables;
 START_TEST

 describe("resolving parse conflicts", []() {
-    bool should_update;
+    bool update;

    PreparedGrammar parse_grammar({
        { "rule1", seq({ sym("rule2"), sym("token2") }) },
@ -37,31 +37,67 @@ describe("resolving parse conflicts", []() {
        });

        it("favors non-errors over lexical errors", [&]() {
-            should_update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2));
-            AssertThat(should_update, IsTrue());
+            update = manager->resolve_lex_action(LexAction::Error(), LexAction::Advance(2, {0}));
+            AssertThat(update, IsTrue());

-            should_update = manager->resolve_lex_action(LexAction::Advance(2), LexAction::Error());
-            AssertThat(should_update, IsFalse());
+            update = manager->resolve_lex_action(LexAction::Advance(2, {0}), LexAction::Error());
+            AssertThat(update, IsFalse());
+        });
+
+        describe("accept-token/advance conflicts", [&]() {
+            describe("when the the accept-token has higher precedence", [&]() {
+                it("prefers the accept", [&]() {
+                    update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -1 }));
+                    AssertThat(update, IsFalse());
+
+                    update = manager->resolve_lex_action(LexAction::Advance(1, { -1 }), LexAction::Accept(sym3, 2));
+                    AssertThat(update, IsTrue());
+                });
+            });
+
+            describe("when the the actions have the same precedence", [&]() {
+                it("prefers the advance", [&]() {
+                    update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { 0 }));
+                    AssertThat(update, IsTrue());
+
+                    update = manager->resolve_lex_action(LexAction::Advance(1, { 0 }), LexAction::Accept(sym3, 0));
+                    AssertThat(update, IsFalse());
+                });
+            });
+
+            describe("when the advance has conflicting precedences compared to the accept", [&]() {
+                it("prefers the advance", [&]() {
+                    update = manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
+                    AssertThat(update, IsTrue());
+
+                    update = manager->resolve_lex_action(LexAction::Advance(1, { -2, 2 }), LexAction::Accept(sym3, 0));
+                    AssertThat(update, IsFalse());
+                });
+
+                it_skip("records a conflict", [&]() {
+                    manager->resolve_lex_action(LexAction::Accept(sym3, 0), LexAction::Advance(1, { -2, 2 }));
+                });
+            });
        });

        describe("accept-token/accept-token conflicts", [&]() {
            describe("when one token has a higher precedence than the other", [&]() {
                it("prefers the token with the higher precedence", [&]() {
-                    should_update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
-                    AssertThat(should_update, IsFalse());
+                    update = manager->resolve_lex_action(LexAction::Accept(sym3, 2), LexAction::Accept(sym2, 0));
+                    AssertThat(update, IsFalse());

-                    should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
-                    AssertThat(should_update, IsTrue());
+                    update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym3, 2));
+                    AssertThat(update, IsTrue());
                });
            });

            describe("when both tokens have the same precedence", [&]() {
                it("prefers the token listed earlier in the grammar", [&]() {
-                    should_update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
-                    AssertThat(should_update, IsFalse());
+                    update = manager->resolve_lex_action(LexAction::Accept(sym1, 0), LexAction::Accept(sym2, 0));
+                    AssertThat(update, IsFalse());

-                    should_update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
-                    AssertThat(should_update, IsTrue());
+                    update = manager->resolve_lex_action(LexAction::Accept(sym2, 0), LexAction::Accept(sym1, 0));
+                    AssertThat(update, IsTrue());
                });
            });
        });
@ -81,11 +117,11 @@ describe("resolving parse conflicts", []() {
        });

        it("favors non-errors over parse errors", [&]() {
-            should_update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
-            AssertThat(should_update, IsTrue());
+            update = manager->resolve_parse_action(sym1, ParseAction::Error(), ParseAction::Shift(2, { 0 }));
+            AssertThat(update, IsTrue());

-            should_update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
-            AssertThat(should_update, IsFalse());
+            update = manager->resolve_parse_action(sym1, ParseAction::Shift(2, { 0 }), ParseAction::Error());
+            AssertThat(update, IsFalse());
        });

        describe("shift/reduce conflicts", [&]() {
--- a/spec/compiler/helpers/rule_helpers.cc
+++ b/spec/compiler/helpers/rule_helpers.cc
@ -34,7 +34,7 @@ namespace tree_sitter {
        rule_ptr i_aux_token(size_t index) {
            return make_shared<rules::Symbol>(index, SymbolOption(SymbolOptionAuxiliary|SymbolOptionToken));
        }
-        
+
        rule_ptr metadata(rule_ptr rule, map<MetadataKey, int> values) {
            return make_shared<Metadata>(rule, values);
        }
--- a/spec/compiler/prepare_grammar/expand_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/expand_tokens_spec.cc
@ -15,9 +15,9 @@ describe("expanding token rules", []() {
                pattern("x*"),
                i_sym(11) }) },
        }, {});
-        
+
        auto result = expand_tokens(grammar);
-        
+
        AssertThat(result.second, Equals((const GrammarError *)nullptr));
        AssertThat(result.first, Equals(PreparedGrammar({
            { "rule_A", seq({
@ -26,7 +26,7 @@ describe("expanding token rules", []() {
                i_sym(11) }) },
        }, {})));
    });
-    
+
    it("replaces string rules with a sequence of characters", [&]() {
        PreparedGrammar grammar({
            { "rule_A", seq({
@ -34,9 +34,9 @@ describe("expanding token rules", []() {
                str("xyz"),
                i_sym(11) }) },
        }, {});
-        
+
        auto result = expand_tokens(grammar);
-        
+
        AssertThat(result.second, Equals((const GrammarError *)nullptr));
        AssertThat(result.first, Equals(PreparedGrammar({
            { "rule_A", seq({
@ -45,7 +45,7 @@ describe("expanding token rules", []() {
                i_sym(11) }) },
        }, {})));
    });
-    
+
    it("returns an error when the grammar contains an invalid regex", [&]() {
        PreparedGrammar grammar({
            { "rule_A", seq({
@ -53,7 +53,7 @@ describe("expanding token rules", []() {
                str("xyz"),
                pattern("[") }) },
        }, {});
-        
+
        auto result = expand_tokens(grammar);

        AssertThat(result.second, EqualsPointer(new GrammarError(GrammarErrorTypeRegex, "unmatched open paren")));
--- a/spec/compiler/prepare_grammar/extract_tokens_spec.cc
+++ b/spec/compiler/prepare_grammar/extract_tokens_spec.cc
@ -82,11 +82,11 @@ describe("extracting tokens from a grammar", []() {
        pair<PreparedGrammar, PreparedGrammar> result = extract_tokens(PreparedGrammar({}, {
            { "rule_A", seq({ str("ab"), i_sym(0) }) }
        }));
-        
+
        AssertThat(result.first, Equals(PreparedGrammar({}, {
            { "rule_A", seq({ i_aux_token(0), i_sym(0) }) }
        })));
-        
+
        AssertThat(result.second, Equals(PreparedGrammar({}, {
            { "'ab'", str("ab") },
        })));
@ -99,34 +99,34 @@ describe("extracting tokens from a grammar", []() {
                { "rule_B", pattern("a|b") },
                { "rule_C", token(seq({ str("a"), str("b") })) },
            }, {}));
-            
+
            AssertThat(result.first, Equals(PreparedGrammar({
                { "rule_A", i_token(0) }
            }, {})));
-            
+
            AssertThat(result.second, Equals(PreparedGrammar({
                { "rule_B", pattern("a|b") },
                { "rule_C", token(seq({ str("a"), str("b") })) },
            }, {})));
        });
-        
+
        it("updates symbols whose indices need to change due to deleted rules", [&]() {
            auto result = extract_tokens(PreparedGrammar({
                { "rule_A", str("ab") },
                { "rule_B", i_sym(0) },
                { "rule_C", i_sym(1) },
            }, {}));
-            
+
            AssertThat(result.first, Equals(PreparedGrammar({
                { "rule_B", i_token(0) },
                { "rule_C", i_sym(0) },
            }, {})));
-            
+
            AssertThat(result.second, Equals(PreparedGrammar({
                { "rule_A", str("ab") },
            }, {})));
        });
-        
+
        it("updates the grammar's ubiquitous_tokens", [&]() {
            auto result = extract_tokens(PreparedGrammar({
                { "rule_A", str("ab") },
@ -135,24 +135,24 @@ describe("extracting tokens from a grammar", []() {
            }, {}, PreparedGrammarOptions({
                { Symbol(0) }
            })));
-            
+
            AssertThat(result.first.options.ubiquitous_tokens, Equals(vector<Symbol>({
                { Symbol(0, SymbolOptionToken) }
            })));
        });
-        
+
        it("extracts entire auxiliary rules", [&]() {
            auto result = extract_tokens(PreparedGrammar({}, {
                { "rule_A", str("ab") },
                { "rule_B", i_aux_sym(0) },
                { "rule_C", i_aux_sym(1) },
            }));
-            
+
            AssertThat(result.first, Equals(PreparedGrammar({}, {
                { "rule_B", i_aux_token(0) },
                { "rule_C", i_aux_sym(0) },
            })));
-            
+
            AssertThat(result.second, Equals(PreparedGrammar({}, {
                { "rule_A", str("ab") },
            })));
--- a/spec/compiler/prepare_grammar/parse_regex_spec.cc
+++ b/spec/compiler/prepare_grammar/parse_regex_spec.cc
@ -13,13 +13,13 @@ describe("parsing regex patterns", []() {
            "[aAeE]",
            character({ 'a', 'A', 'e', 'E' })
        },
-        
+
        {
            "'.' characters as wildcards",
            ".",
            CharacterSet({'\n'}).complement().copy()
        },
-        
+
        {
            "character classes",
            "\\w-\\d",
@ -28,7 +28,7 @@ describe("parsing regex patterns", []() {
                character({ '-' }),
                character({ {'0', '9'} }) })
        },
-        
+
        {
            "choices",
            "ab|cd|ef",
@ -47,7 +47,7 @@ describe("parsing regex patterns", []() {
                })
            })
        },
-        
+
        {
            "simple sequences",
            "abc",
@ -56,25 +56,25 @@ describe("parsing regex patterns", []() {
                character({ 'b' }),
                character({ 'c' }) })
        },
-        
+
        {
            "character ranges",
            "[12a-dA-D3]",
            character({ {'1', '3'}, {'a', 'd'}, { 'A', 'D' }, })
        },
-        
+
        {
            "negated characters",
            "[^a\\d]",
            character({ {'a'}, {'0', '9'} }, false)
        },
-        
+
        {
            "backslashes",
            "\\\\",
            character({ '\\' })
        },
-        
+
        {
            "character groups in sequences",
            "x([^x]|\\\\x)*x",
@ -87,7 +87,7 @@ describe("parsing regex patterns", []() {
                character({ 'x' })
            })
        },
-        
+
        {
            "choices in sequences",
            "(a|b)cd",
@ -100,7 +100,7 @@ describe("parsing regex patterns", []() {
                character({ 'd' })
            })
        },
-        
+
        {
            "escaped parentheses",
            "a\\(b",
@ -110,7 +110,7 @@ describe("parsing regex patterns", []() {
                character({ 'b' })
            })
        },
-        
+
        {
            "escaped periods",
            "a\\.",
@ -119,7 +119,7 @@ describe("parsing regex patterns", []() {
                character({ '.' })
            })
        },
-        
+
        {
            "plus repeats",
            "(ab)+(cd)+",
@ -134,7 +134,7 @@ describe("parsing regex patterns", []() {
                }),
            })
        },
-        
+
        {
            "asterix repeats",
            "(ab)*(cd)*",
@ -143,7 +143,7 @@ describe("parsing regex patterns", []() {
                repeat(seq({ character({ 'c' }), character({ 'd' }) })),
            })
        },
-        
+
        {
            "optional rules",
            "a(bc)?",
@ -156,7 +156,7 @@ describe("parsing regex patterns", []() {
            })
        }
    };
-    
+
    vector<tuple<string, string, const char *>> invalid_inputs = {
        {
            "mismatched open parens",
@ -189,23 +189,23 @@ describe("parsing regex patterns", []() {
            "unmatched close square bracket",
        },
    };
-    
+
    for (auto &triple : valid_inputs) {
        string description = get<0>(triple);
        string regex = get<1>(triple);
        rule_ptr rule = get<2>(triple);
-        
+
        it(("parses " + description).c_str(), [&]() {
            auto result = parse_regex(regex);
            AssertThat(result.first, EqualsPointer(rule));
        });
    }
-    
+
    for (auto &triple : invalid_inputs) {
        string description = get<0>(triple);
        string regex = get<1>(triple);
        const char *expected_message = get<2>(triple);
-        
+
        it(("handles invalid regexes with " + description).c_str(), [&]() {
            auto result = parse_regex(regex);
            AssertThat(result.second, !Equals((const GrammarError *)nullptr));
--- a/spec/runtime/languages/golang/main.txt
+++ b/spec/runtime/languages/golang/main.txt
@ -27,3 +27,15 @@ func main() {
    (func_declaration (var_name) (statement_block
        (comment))))

+==========================================
+handles indented code after blocks
+=========================================
+package trivial
+
+  func one() {}
+  func two() {}
+---
+(program
+    (package_directive (package_name))
+    (func_declaration (var_name) (statement_block))
+    (func_declaration (var_name) (statement_block)))
--- a/spec/runtime/languages/javascript/control_flow.txt
+++ b/spec/runtime/languages/javascript/control_flow.txt
@ -80,6 +80,16 @@ try {
    (statement_block (expression_statement (function_call (identifier) (identifier))))))

 ===========================================
+parses indented code after blocks
+===========================================
+  function x() {}
+  return z;
+---
+(program
+    (expression_statement
+        (function_expression (identifier) (formal_parameters) (statement_block)))
+    (return_statement (identifier)))
+===========================================
 parses switch statements
 ===========================================
 switch(x) {
--- a/src/compiler/build_tables/build_lex_table.cc
+++ b/src/compiler/build_tables/build_lex_table.cc
@ -70,7 +70,9 @@ namespace tree_sitter {
                    CharacterSet rule = transition.first;
                    LexItemSet new_item_set = transition.second;
                    LexStateId new_state_id = add_lex_state(new_item_set);
-                    lex_table.state(state_id).actions[rule] = LexAction::Advance(new_state_id);
+                    auto action = LexAction::Advance(new_state_id, precedence_values_for_item_set(new_item_set));
+                    if (conflict_manager.resolve_lex_action(lex_table.state(state_id).default_action, action))
+                        lex_table.state(state_id).actions[rule] = action;
                }
            }

@ -93,14 +95,24 @@ namespace tree_sitter {

            rules::rule_ptr after_separators(rules::rule_ptr rule) {
                return rules::Seq::Build({
-                    make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
-                    make_shared<rules::Metadata>(make_shared<rules::Blank>(), map<rules::MetadataKey, int>({
+                    make_shared<rules::Metadata>(rules::Seq::Build({
+                        make_shared<rules::Repeat>(CharacterSet({ ' ', '\t', '\n', '\r' }).copy()),
+                        make_shared<rules::Blank>(),
+                    }), map<rules::MetadataKey, int>({
                        {rules::START_TOKEN, 1},
+                        {rules::PRECEDENCE, -1},
                    })),
-                    rule
+                    rule,
                });
            }

+            set<int> precedence_values_for_item_set(const LexItemSet &item_set) const {
+                set<int> result;
+                for (const auto &item : item_set)
+                    result.insert(item.precedence());
+                return result;
+            }
+
        public:
            LexTableBuilder(ParseTable *parse_table, const PreparedGrammar &lex_grammar) :
                lex_grammar(lex_grammar),
--- a/src/compiler/build_tables/build_parse_table.cc
+++ b/src/compiler/build_tables/build_parse_table.cc
@ -58,7 +58,7 @@ namespace tree_sitter {
                    }
                }
            }
-            
+
            void add_ubiquitous_token_actions(const ParseItemSet &item_set, ParseStateId state_id) {
                for (const Symbol &symbol : grammar.options.ubiquitous_tokens) {
                    auto &actions = parse_table.states[state_id].actions;
--- a/src/compiler/build_tables/get_metadata.cc
+++ b/src/compiler/build_tables/get_metadata.cc
@ -1,5 +1,6 @@
 #include "compiler/build_tables/get_metadata.h"
 #include "compiler/rules/visitor.h"
+#include "compiler/rules/seq.h"

 namespace tree_sitter {
    namespace build_tables {
@ -11,6 +12,13 @@ namespace tree_sitter {
                    return rule->value_for(metadata_key);
                }

+                // TODO -
+                // Remove this. It is currently needed to make the rule generated
+                // by `LexTableBuilder::after_separators` have the right precedence.
+                int apply_to(const rules::Seq *rule) {
+                    return apply(rule->left);
+                }
+
            public:
                explicit GetMetadata(rules::MetadataKey key) : metadata_key(key) {}
            };
--- a/src/compiler/build_tables/lex_conflict_manager.cc
+++ b/src/compiler/build_tables/lex_conflict_manager.cc
@ -18,17 +18,45 @@ namespace tree_sitter {

        bool LexConflictManager::resolve_lex_action(const LexAction &old_action,
                                                    const LexAction &new_action) {
+            if (new_action.type < old_action.type)
+                return !resolve_lex_action(new_action, old_action);
+
            switch (old_action.type) {
                case LexActionTypeError:
                    return true;
-                case LexActionTypeAccept:
-                    if (new_action.precedence > old_action.precedence) {
-                        return true;
-                    } else if (new_action.precedence < old_action.precedence) {
-                        return false;
-                    } else {
-                        return new_action.symbol.index < old_action.symbol.index;
+                case LexActionTypeAccept: {
+                    int old_precedence = *old_action.precedence_values.begin();
+                    switch (new_action.type) {
+                        case LexActionTypeAccept: {
+                            int new_precedence = *new_action.precedence_values.begin();
+                            if (new_precedence > old_precedence) {
+                                return true;
+                            } else if (new_precedence < old_precedence) {
+                                return false;
+                            } else {
+                                return new_action.symbol.index < old_action.symbol.index;
+                            }
+                        }
+                        case LexActionTypeAdvance: {
+//                            int min_precedence = *new_action.precedence_values.begin();
+                            int max_precedence = *new_action.precedence_values.rbegin();
+                            if (max_precedence > old_precedence) {
+//                                if (min_precedence < old_precedence)
+                                return true;
+                            } else if (max_precedence < old_precedence) {
+                                return false;
+                            } else {
+                                return true;
+                            }
+
+                            return false;
+                        }
+                        default:
+                            return false;
                    }
+
+                    return true;
+                }
                default:
                    return false;
            }
--- a/src/compiler/generate_code/c_code.cc
+++ b/src/compiler/generate_code/c_code.cc
@ -88,7 +88,7 @@ namespace tree_sitter {
            const PreparedGrammar & grammar_for_symbol(const rules::Symbol &symbol) {
                return symbol.is_token() ? lexical_grammar : syntax_grammar;
            }
-            
+
            string sanitize_name(string name) {
                auto existing = sanitized_names.find(name);
                if (existing != sanitized_names.end())
@ -115,7 +115,7 @@ namespace tree_sitter {
                    }
                }
            }
-            
+
            bool has_sanitized_name(string name) {
                for (auto &pair : sanitized_names)
                    if (pair.second == name)
--- a/src/compiler/grammar.cc
+++ b/src/compiler/grammar.cc
@ -48,7 +48,7 @@ namespace tree_sitter {
    GrammarError::GrammarError(GrammarErrorType type, std::string message) :
        type(type),
        message(message) {}
-    
+
    bool GrammarError::operator==(const GrammarError &other) const {
        return type == other.type && message == other.message;
    }
--- a/src/compiler/lex_table.cc
+++ b/src/compiler/lex_table.cc
@ -12,24 +12,25 @@ namespace tree_sitter {
    LexAction::LexAction() :
        type(LexActionTypeError),
        symbol(Symbol(-1)),
-        state_index(-1) {}
+        state_index(-1),
+        precedence_values({0}) {}

-    LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, int precedence) :
+    LexAction::LexAction(LexActionType type, size_t state_index, Symbol symbol, set<int> precedence_values) :
        type(type),
        symbol(symbol),
        state_index(state_index),
-        precedence(precedence) {}
+        precedence_values(precedence_values) {}

    LexAction LexAction::Error() {
-        return LexAction(LexActionTypeError, -1, Symbol(-1), 0);
+        return LexAction(LexActionTypeError, -1, Symbol(-1), {0});
    }

-    LexAction LexAction::Advance(size_t state_index) {
-        return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), 0);
+    LexAction LexAction::Advance(size_t state_index, set<int> precedence_values) {
+        return LexAction(LexActionTypeAdvance, state_index, Symbol(-1), precedence_values);
    }

    LexAction LexAction::Accept(Symbol symbol, int precedence) {
-        return LexAction(LexActionTypeAccept, -1, symbol, precedence);
+        return LexAction(LexActionTypeAccept, -1, symbol, { precedence });
    }

    bool LexAction::operator==(const LexAction &other) const {
--- a/src/compiler/lex_table.h
+++ b/src/compiler/lex_table.h
@ -16,18 +16,18 @@ namespace tree_sitter {
    } LexActionType;

    class LexAction {
-        LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, int precedence);
+        LexAction(LexActionType type, size_t state_index, rules::Symbol symbol, std::set<int> precedence_values);
    public:
        LexAction();
        static LexAction Accept(rules::Symbol symbol, int precedence);
        static LexAction Error();
-        static LexAction Advance(size_t state_index);
+        static LexAction Advance(size_t state_index, std::set<int> precedence_values);
        bool operator==(const LexAction &action) const;

        LexActionType type;
        rules::Symbol symbol;
        size_t state_index;
-        int precedence;
+        std::set<int> precedence_values;
    };

    std::ostream& operator<<(std::ostream &stream, const LexAction &item);
--- a/src/compiler/prepare_grammar/expand_tokens.cc
+++ b/src/compiler/prepare_grammar/expand_tokens.cc
@ -19,7 +19,7 @@ namespace tree_sitter {
    using rules::rule_ptr;
    using rules::String;
    using rules::Pattern;
-    
+
    namespace prepare_grammar {
        class ExpandTokens : public rules::IdentityRuleFn {
            using rules::IdentityRuleFn::apply_to;
@ -30,38 +30,38 @@ namespace tree_sitter {
                    elements.push_back(rules::CharacterSet({ val }).copy());
                return rules::Seq::Build(elements);
            }
-            
+
            rule_ptr apply_to(const Pattern *rule) {
                auto pair = parse_regex(rule->value);
                if (!error)
                    error = pair.second;
                return pair.first;
            }
-            
+
        public:
            const GrammarError *error;
            ExpandTokens() : error(nullptr) {}
        };
-        
+
        pair<PreparedGrammar, const GrammarError *>
        expand_tokens(const PreparedGrammar &grammar) {
            vector<pair<string, rule_ptr>> rules, aux_rules;
            ExpandTokens expander;
-            
+
            for (auto &pair : grammar.rules) {
                auto rule = expander.apply(pair.second);
                if (expander.error)
                    return { PreparedGrammar(), expander.error };
                rules.push_back({ pair.first, rule });
            }
-            
+
            for (auto &pair : grammar.aux_rules) {
                auto rule = expander.apply(pair.second);
                if (expander.error)
                    return { PreparedGrammar(), expander.error };
                aux_rules.push_back({ pair.first, rule });
            }
-            
+
            return { PreparedGrammar(rules, aux_rules, grammar.options), nullptr };
        }
    }
--- a/src/compiler/prepare_grammar/expand_tokens.h
+++ b/src/compiler/prepare_grammar/expand_tokens.h
@ -5,7 +5,7 @@

 namespace tree_sitter {
    class PreparedGrammar;
-    
+
    namespace prepare_grammar {
        std::pair<PreparedGrammar, const GrammarError *>
        expand_tokens(const PreparedGrammar &);
--- a/src/compiler/prepare_grammar/extract_tokens.cc
+++ b/src/compiler/prepare_grammar/extract_tokens.cc
@ -57,7 +57,7 @@ namespace tree_sitter {

            SymbolInliner(const map<Symbol, Symbol> &replacements) : replacements(replacements) {}
        };
-        
+
        const rules::SymbolOption SymbolOptionAuxToken = rules::SymbolOption(rules::SymbolOptionToken|rules::SymbolOptionAuxiliary);

        class TokenExtractor : public rules::IdentityRuleFn {
@ -71,7 +71,7 @@ namespace tree_sitter {
                return make_shared<Symbol>(index, SymbolOptionAuxToken);

            }
-            
+
            rule_ptr default_apply(const rules::Rule *rule) {
                auto result = rule->copy();
                if (IsToken().apply(rule->copy())) {
@ -80,7 +80,7 @@ namespace tree_sitter {
                    return result;
                }
            }
-            
+
            rule_ptr apply_to(const rules::Metadata *rule) {
                auto result = rule->copy();
                if (IsToken().apply(rule->copy())) {
--- a/src/compiler/prepare_grammar/parse_regex.cc
+++ b/src/compiler/prepare_grammar/parse_regex.cc
@ -21,7 +21,7 @@ namespace tree_sitter {
    using rules::Repeat;
    using rules::CharacterRange;
    using rules::blank;
-    
+
    namespace prepare_grammar {
        class PatternParser {
        public:
@ -29,7 +29,7 @@ namespace tree_sitter {
                input(input),
                length(input.length()),
                position(0) {}
-            
+
            pair<rule_ptr, const GrammarError *> rule(bool nested) {
                vector<rule_ptr> choices = {};
                do {
@ -47,7 +47,7 @@ namespace tree_sitter {
                auto rule = (choices.size() > 1) ? make_shared<Choice>(choices) : choices.front();
                return { rule, nullptr };
            }
-            
+
        private:
            pair<rule_ptr, const GrammarError *> term(bool nested) {
                rule_ptr result = blank();
@ -63,7 +63,7 @@ namespace tree_sitter {
                } while (has_more_input());
                return { result, nullptr };
            }
-            
+
            pair<rule_ptr, const GrammarError *> factor() {
                auto pair = atom();
                if (pair.second)
@ -87,7 +87,7 @@ namespace tree_sitter {
                }
                return { result, nullptr };
            }
-            
+
            pair<rule_ptr, const GrammarError *> atom() {
                switch (peek()) {
                    case '(': {
@ -103,7 +103,7 @@ namespace tree_sitter {
                    case '[': {
                        next();
                        auto pair = char_set();
-                        if (pair.second) 
+                        if (pair.second)
                            return { blank(), pair.second };
                        if (peek() != ']')
                            return error("unmatched open square bracket");
@ -122,13 +122,13 @@ namespace tree_sitter {
                    }
                    default: {
                        auto pair = single_char();
-                        if (pair.second) 
+                        if (pair.second)
                            return { blank(), pair.second };
                        return { pair.first.copy(), nullptr };
                    }
                }
            }
-            
+
            pair<CharacterSet, const GrammarError *> char_set() {
                bool is_affirmative = true;
                if (peek() == '^') {
@ -146,7 +146,7 @@ namespace tree_sitter {
                    result = result.complement();
                return { result, nullptr };
            }
-            
+
            pair<CharacterSet, const GrammarError *> single_char() {
                CharacterSet value;
                switch (peek()) {
@ -168,7 +168,7 @@ namespace tree_sitter {
                }
                return { value, nullptr };
            }
-            
+
            CharacterSet escaped_char(char value) {
                switch (value) {
                    case 'a':
@ -181,23 +181,23 @@ namespace tree_sitter {
                        return CharacterSet({ value });
                }
            }
-            
+
            void next() {
                position++;
            }
-            
+
            char peek() {
                return input[position];
            }
-            
+
            bool has_more_input() {
                return position < length;
            }
-            
+
            pair<rule_ptr, const GrammarError *> error(string msg) {
                return { blank(), new GrammarError(GrammarErrorTypeRegex, msg) };
            }
-            
+
            const string input;
            const size_t length;
            size_t position;
--- a/src/compiler/prepare_grammar/prepare_grammar.cc
+++ b/src/compiler/prepare_grammar/prepare_grammar.cc
@ -24,10 +24,10 @@ namespace tree_sitter {
            auto expand_tokens_result = expand_tokens(grammars.second);
            const PreparedGrammar &lex_grammar = expand_tokens_result.first;
            error = expand_tokens_result.second;
-            
+
            if (error)
                return make_tuple(PreparedGrammar(), PreparedGrammar(), error);
-            
+
            return make_tuple(rule_grammar, lex_grammar, nullptr);
        }
    }
--- a/src/compiler/prepare_grammar/token_description.cc
+++ b/src/compiler/prepare_grammar/token_description.cc
@ -25,11 +25,11 @@ namespace tree_sitter {
            string apply_to(const rules::Metadata *rule) {
                return apply(rule->rule);
            }
-            
+
            string apply_to(const rules::Seq *rule) {
                return "(seq " + apply(rule->left) + " " + apply(rule->right) + ")";
            }
-            
+
            string apply_to(const rules::Choice *rule) {
                string result = "(choice";
                for (auto &element : rule->elements)
--- a/src/compiler/prepared_grammar.cc
+++ b/src/compiler/prepared_grammar.cc
@ -10,7 +10,7 @@ namespace tree_sitter {
    using std::ostream;
    using rules::rule_ptr;
    using rules::Symbol;
-    
+
    PreparedGrammar::PreparedGrammar() : Grammar({}), aux_rules({}), options({}) {}

    PreparedGrammar::PreparedGrammar(const std::vector<std::pair<std::string, rules::rule_ptr>> &rules,