Compute conflicting tokens more precisely

While generating the parse table, keep track of which tokens can follow one another. Then use this information to evaluate token conflicts more precisely. This will result in a smaller parse table than the previous, overly-conservative approach.
2017-07-07 17:54:24 -07:00 · 2017-07-07 17:54:24 -07:00 · 1586d70cbe
commit 1586d70cbe
parent a98abde529
6 changed files with 160 additions and 61 deletions
--- a/test/compiler/build_tables/lex_table_builder_test.cc
+++ b/test/compiler/build_tables/lex_table_builder_test.cc
@ -16,7 +16,7 @@ describe("LexTableBuilder::detect_conflict", []() {
    auto builder = LexTableBuilder::create(LexicalGrammar{
      {
        LexicalVariable{
-          "token_1",
+          "token_0",
          VariableTypeNamed,
          Rule::seq({
            CharacterSet({ 'a' }),
@ -26,7 +26,7 @@ describe("LexTableBuilder::detect_conflict", []() {
          false
        },
        LexicalVariable{
-          "token_2",
+          "token_1",
          VariableTypeNamed,
          Rule::seq({
            CharacterSet({ 'b' }),
@ -39,22 +39,22 @@ describe("LexTableBuilder::detect_conflict", []() {
      separators
    });

-    AssertThat(builder->detect_conflict(0, 1), IsFalse());
-    AssertThat(builder->detect_conflict(1, 0), IsFalse());
+    AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsFalse());
+    AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
  });

-  it("returns true when one token matches a string that the other matches, "
-     "plus some addition content that begins with a separator character", [&]() {
+  it("returns true when the left token can match a string that the right token matches, "
+     "plus a separator character", [&]() {
    LexicalGrammar grammar{
      {
        LexicalVariable{
-          "token_1",
+          "token_0",
          VariableTypeNamed,
          Rule::repeat(CharacterSet().include_all().exclude('\n')), // regex: /.+/
          false
        },
        LexicalVariable{
-          "token_2",
+          "token_1",
          VariableTypeNamed,
          Rule::seq({ CharacterSet({ 'a' }), CharacterSet({ 'b' }), CharacterSet({ 'c' }) }), // string: 'abc'
          true
@ -64,24 +64,32 @@ describe("LexTableBuilder::detect_conflict", []() {
    };

    auto builder = LexTableBuilder::create(grammar);
-    AssertThat(builder->detect_conflict(0, 1), IsTrue());
-    AssertThat(builder->detect_conflict(1, 0), IsFalse());
+    AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
+    AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());

    grammar.variables[1].is_string = false;
-    AssertThat(builder->detect_conflict(0, 1), IsTrue());
-    AssertThat(builder->detect_conflict(1, 0), IsFalse());
+    AssertThat(builder->detect_conflict(0, 1, {{}, {}}), IsTrue());
+    AssertThat(builder->detect_conflict(1, 0, {{}, {}}), IsFalse());
  });

-  it("returns true when one token matches a string that the other matches, "
-     "plus some addition content that matches another one-character token", [&]() {
+  it("returns true when the left token matches a string that the right token matches, "
+     "plus the first character of some token that can follow the right token", [&]() {
    LexicalGrammar grammar{
      {
+        LexicalVariable{
+          "token_0",
+          VariableTypeNamed,
+          Rule::seq({
+            CharacterSet({ '>' }),
+            CharacterSet({ '=' }),
+          }),
+          true
+        },
        LexicalVariable{
          "token_1",
          VariableTypeNamed,
          Rule::seq({
            CharacterSet({ '>' }),
-            CharacterSet({ '>' }),
          }),
          true
        },
@ -89,7 +97,7 @@ describe("LexTableBuilder::detect_conflict", []() {
          "token_2",
          VariableTypeNamed,
          Rule::seq({
-            CharacterSet({ '>' }),
+            CharacterSet({ '=' }),
          }),
          true
        },
@ -97,9 +105,17 @@ describe("LexTableBuilder::detect_conflict", []() {
      separators
    };

+    // If no tokens can follow token_1, then there's no conflict
    auto builder = LexTableBuilder::create(grammar);
-    AssertThat(builder->detect_conflict(0, 1), IsTrue());
-    AssertThat(builder->detect_conflict(1, 0), IsFalse());
+    vector<set<Symbol::Index>> following_tokens_by_token_index(3);
+    AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsFalse());
+    AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
+
+    // If token_2 can follow token_1, then token_0 conflicts with token_1
+    builder = LexTableBuilder::create(grammar);
+    following_tokens_by_token_index[1].insert(2);
+    AssertThat(builder->detect_conflict(0, 1, following_tokens_by_token_index), IsTrue());
+    AssertThat(builder->detect_conflict(1, 0, following_tokens_by_token_index), IsFalse());
  });
 });