Merge pull request #204 from tree-sitter/property-sheets

Add a system for assigning properties to syntax nodes
2018-10-15 23:18:33 -07:00 · 2018-10-15 23:18:33 -07:00 · fa1f4aa86a
commit fa1f4aa86a
parent 257522372f 2b6857bb45
22 changed files with 924 additions and 103 deletions
--- a/externals/bandit
+++ b/externals/bandit
@ -1 +1 @@
-Subproject commit f14ade4fbba72f2e8ba3a7f47318eb07614a39d4
+Subproject commit bfdb8a3322a2e54b11aea64d84f9788d83477e83
--- a/include/tree_sitter/compiler.h
+++ b/include/tree_sitter/compiler.h
@ -29,6 +29,7 @@ typedef struct {
 } TSCompileResult;

 TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
+TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file);

 #ifdef __cplusplus
 }
--- a/project.gyp
+++ b/project.gyp
@ -18,12 +18,14 @@
        'src/compiler/build_tables/parse_item.cc',
        'src/compiler/build_tables/parse_item_set_builder.cc',
        'src/compiler/build_tables/parse_table_builder.cc',
+        'src/compiler/build_tables/property_table_builder.cc',
        'src/compiler/build_tables/rule_can_be_blank.cc',
        'src/compiler/compile.cc',
        'src/compiler/generate_code/c_code.cc',
+        'src/compiler/generate_code/property_table_json.cc',
        'src/compiler/lex_table.cc',
        'src/compiler/log.cc',
-        'src/compiler/parse_grammar.cc',
+        'src/compiler/parse_json.cc',
        'src/compiler/parse_table.cc',
        'src/compiler/precedence_range.cc',
        'src/compiler/prepare_grammar/expand_repeats.cc',
@ -122,6 +124,7 @@
        'cflags': [ '-g' ],
        'ldflags': [ '-g' ],
        'xcode_settings': {
+          'ARCHS': ['x86_64'],
          'OTHER_LDFLAGS': ['-g'],
          'GCC_OPTIMIZATION_LEVEL': '0',
        },
@ -131,6 +134,7 @@
        'cflags': [ '-g' ],
        'ldflags': [ '-g' ],
        'xcode_settings': {
+          'ARCHS': ['x86_64'],
          'OTHER_LDFLAGS': ['-g'],
          'GCC_OPTIMIZATION_LEVEL': '0',
          'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'],
@ -143,6 +147,9 @@
      'Release': {
        'cflags': [ '-O2', '-fno-strict-aliasing' ],
        'cflags!': [ '-O3', '-fstrict-aliasing' ],
+        'xcode_settings': {
+          'ARCHS': ['x86_64'],
+        },
      },
    },

--- a/src/compiler/build_tables/property_table_builder.cc
+++ b/src/compiler/build_tables/property_table_builder.cc
@ -0,0 +1,447 @@
+#include <vector>
+#include <deque>
+#include <algorithm>
+#include <map>
+#include <unordered_map>
+#include <set>
+#include "compiler/property_sheet.h"
+#include "compiler/property_table.h"
+#include "compiler/build_tables/property_table_builder.h"
+#include "compiler/util/hash_combine.h"
+
+using std::deque;
+using std::vector;
+using std::pair;
+using std::unordered_map;
+using std::set;
+using std::move;
+using std::map;
+
+namespace tree_sitter {
+namespace build_tables {
+
+// A position within a selector for a particular rule set.
+// For example, in a selector like `a > b`, this might
+// describe the state of having descended into an `a`,
+// but not a `b`.
+struct PropertyItem {
+  unsigned rule_id;
+  unsigned selector_id;
+  unsigned step_id;
+
+  bool operator==(const PropertyItem &other) const {
+    return
+      rule_id == other.rule_id &&
+      selector_id == other.selector_id &&
+      step_id == other.step_id;
+  }
+
+  bool operator<(const PropertyItem &other) const {
+    if (rule_id < other.rule_id) return true;
+    if (rule_id > other.rule_id) return false;
+    if (selector_id < other.selector_id) return true;
+    if (selector_id > other.selector_id) return false;
+    return step_id < other.step_id;
+  }
+};
+
+// A set of possible positions within different selectors.
+// This directly represents a state of the property-matching
+// state machine.
+struct PropertyItemSet {
+  set<PropertyItem> entries;
+
+  bool operator==(const PropertyItemSet &other) const {
+    return entries == other.entries;
+  }
+};
+
+// A set of properties that matched via a certain selector.
+// These are ordered according to the usual CSS rules:
+// specificity, falling back to the order in the original sheet.
+struct PropertySelectorMatch {
+  unsigned specificity;
+  unsigned rule_id;
+  unsigned selector_id;
+  const PropertySet *property_set;
+
+  bool operator<(const PropertySelectorMatch &other) const {
+    if (specificity < other.specificity) return true;
+    if (specificity > other.specificity) return false;
+    if (rule_id < other.rule_id) return true;
+    if (rule_id > other.rule_id) return false;
+    return selector_id < other.selector_id;
+  }
+};
+
+struct PropertyTransitionEntry {
+  PropertyTransition transition;
+  unsigned latest_matching_rule_id;
+
+  unsigned specificity() const {
+    return
+      (transition.index == -1 ? 0 : 1) +
+      (transition.text_pattern.empty() ? 0 : 1);
+  }
+
+  // When using the final state machine, the runtime library computes
+  // a node's property by descending from the root of the syntax
+  // tree to that node. For each ancestor node on the way, it should
+  // update its state using the *first* matching entry of the
+  // `transitions` list. Therefore, the order of the transitions
+  // must match the normal tie-breaking rules of CSS.
+  bool operator<(const PropertyTransitionEntry &other) const {
+    // If two transitions match different node types, they can't
+    // both match a given node, so their order is arbitrary.
+    if (transition.type < other.transition.type) return true;
+    if (transition.type > other.transition.type) return false;
+    if (transition.named && !other.transition.named) return true;
+    if (!transition.named && other.transition.named) return false;
+
+    // More specific transitions should be considered before less
+    // specific ones.
+    if (specificity() > other.specificity()) return true;
+    if (specificity() < other.specificity()) return false;
+
+    // If there are two transitions with a specificity tie (e.g. one
+    // with an `:nth-child` pseudo-class and a one with a `:text`
+    // pseudo-class), then the one whose matching properties appeared
+    // later in the cascade should be considered first.
+    return latest_matching_rule_id > other.latest_matching_rule_id;
+  }
+};
+
+}  // namespace build_tables
+}  // namespace tree_sitter
+
+namespace std {
+
+using tree_sitter::util::hash_combine;
+
+// PropertyItemSets must be hashed because in the process of building
+// the table, we maintain a map of existing property item sets to
+// state ids.
+template <>
+struct hash<tree_sitter::build_tables::PropertyItemSet> {
+  size_t operator()(const tree_sitter::build_tables::PropertyItemSet &item_set) const {
+    size_t result = 0;
+    hash_combine(&result, item_set.entries.size());
+    for (const auto &item : item_set.entries) {
+      hash_combine(&result, item.rule_id);
+      hash_combine(&result, item.selector_id);
+      hash_combine(&result, item.step_id);
+    }
+    return result;
+  }
+};
+
+// PropertyTransitions must be hashed because we represent state
+// transitions as a map of PropertyTransitions to successor PropertyItemSets.
+template <>
+struct hash<tree_sitter::PropertyTransition> {
+  size_t operator()(const tree_sitter::PropertyTransition &transition) const {
+    size_t result = 0;
+    hash_combine(&result, transition.type);
+    hash_combine(&result, transition.named);
+    hash_combine(&result, transition.index);
+    hash_combine(&result, transition.text_pattern);
+    hash_combine(&result, transition.state_id);
+    return result;
+  }
+};
+
+// PropertySets must be hashed so that we can use a map to dedup them.
+template <>
+struct hash<tree_sitter::PropertySet> {
+  size_t operator()(const tree_sitter::PropertySet &set) const {
+    size_t result = 0;
+    hash_combine(&result, set.size());
+    for (const auto &pair : set) {
+      hash_combine(&result, pair.first);
+      hash_combine(&result, pair.second);
+    }
+    return result;
+  }
+};
+
+}  // namespace std
+
+namespace tree_sitter {
+namespace build_tables {
+
+typedef unsigned StateId;
+typedef unsigned PropertySetId;
+
+struct PropertyTableBuilder {
+  PropertySheet sheet;
+  PropertyTable result;
+  unordered_map<PropertyItemSet, StateId> ids_by_item_set;
+  unordered_map<PropertySet, PropertySetId> ids_by_property_set;
+  deque<pair<PropertyItemSet, StateId>> item_set_queue;
+
+  PropertyTableBuilder(const PropertySheet &sheet) : sheet(sheet) {}
+
+  PropertyTable build() {
+    PropertyItemSet start_item_set;
+    for (unsigned i = 0; i < sheet.size(); i++) {
+      PropertyRule &rule = sheet[i];
+      for (unsigned j = 0; j < rule.selectors.size(); j++) {
+        start_item_set.entries.insert(PropertyItem {i, j, 0});
+      }
+    }
+
+    add_state(start_item_set);
+    while (!item_set_queue.empty()) {
+      auto entry = item_set_queue.front();
+      PropertyItemSet item_set = move(entry.first);
+      StateId state_id = entry.second;
+      item_set_queue.pop_front();
+      populate_state(item_set, state_id);
+    }
+
+    remove_duplicate_states();
+
+    return result;
+  }
+
+  // Different item sets can actually produce the same state, so the
+  // states need to be explicitly deduped as a post-processing step.
+  void remove_duplicate_states() {
+    map<StateId, StateId> replacements;
+
+    while (true) {
+      map<StateId, StateId> duplicates;
+      for (StateId i = 0, size = result.states.size(); i < size; i++) {
+        for (StateId j = 0; j < i; j++) {
+          if (!duplicates.count(j) && result.states[j] == result.states[i]) {
+            duplicates.insert({ i, j });
+            break;
+          }
+        }
+      }
+
+      if (duplicates.empty()) break;
+
+      map<StateId, StateId> new_replacements;
+      for (StateId i = 0, size = result.states.size(); i < size; i++) {
+        StateId new_state_index = i;
+        auto duplicate = duplicates.find(i);
+        if (duplicate != duplicates.end()) {
+          new_state_index = duplicate->second;
+        }
+
+        size_t prior_removed = 0;
+        for (const auto &duplicate : duplicates) {
+          if (duplicate.first >= new_state_index) break;
+          prior_removed++;
+        }
+
+        new_state_index -= prior_removed;
+        new_replacements.insert({i, new_state_index});
+        replacements.insert({ i, new_state_index });
+        for (auto &replacement : replacements) {
+          if (replacement.second == i) {
+            replacement.second = new_state_index;
+          }
+        }
+      }
+
+      for (auto &state : result.states) {
+        for (auto &transition : state.transitions) {
+          auto new_replacement = new_replacements.find(transition.state_id);
+          if (new_replacement != new_replacements.end()) {
+            transition.state_id = new_replacement->second;
+          }
+        }
+
+        auto new_replacement = new_replacements.find(state.default_next_state_id);
+        if (new_replacement != new_replacements.end()) {
+          state.default_next_state_id = new_replacement->second;
+        }
+      }
+
+      for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) {
+        result.states.erase(result.states.begin() + i->first);
+      }
+    }
+  }
+
+  // Get the next part of the selector that needs to be matched for a given item.
+  // This returns null if the item has consumed its entire selector.
+  const PropertySelectorStep *next_step_for_item(const PropertyItem &item) {
+    const PropertySelector &selector = sheet[item.rule_id].selectors[item.selector_id];
+    if (item.step_id < selector.size()) {
+      return &selector[item.step_id];
+    } else {
+      return nullptr;
+    }
+  }
+
+  // Get the previous part of the selector that was matched for a given item.
+  // This returns null if the item has not consumed anything.
+  const PropertySelectorStep *prev_step_for_item(const PropertyItem &item) {
+    if (item.step_id > 0) {
+      return &sheet[item.rule_id].selectors[item.selector_id][item.step_id];
+    } else {
+      return nullptr;
+    }
+  }
+
+  unsigned specificity_for_selector(const PropertySelector &selector) {
+    unsigned result = selector.size();
+    for (const PropertySelectorStep &step : selector) {
+      if (step.index != -1) result++;
+      if (!step.text_pattern.empty()) result++;
+    }
+    return result;
+  }
+
+  // Check if the given state transition matches the given part of a selector.
+  bool step_matches_transition(const PropertySelectorStep &step, const PropertyTransition &transition) {
+    return
+      step.type == transition.type &&
+      step.named == transition.named &&
+      (step.index == transition.index || step.index == -1) &&
+      (step.text_pattern == transition.text_pattern || step.text_pattern.empty());
+  }
+
+  void populate_state(const PropertyItemSet &item_set, StateId state_id) {
+    unordered_map<PropertyTransition, PropertyItemSet> transitions;
+    vector<PropertySelectorMatch> selector_matches;
+
+    for (const PropertyItem &item : item_set.entries) {
+      const PropertySelectorStep *next_step = next_step_for_item(item);
+
+      // If this item has more elements to match for its selector, then
+      // there's a state transition for elements that match the next
+      // part of the selector.
+      if (next_step) {
+        transitions[PropertyTransition{
+          next_step->type,
+          next_step->named,
+          next_step->index,
+          next_step->text_pattern,
+          0
+        }] = PropertyItemSet();
+      }
+
+      // If the item has matched its entire selector, then the property set
+      // for the item's rule applies in this state.
+      else {
+        const PropertyRule &rule = sheet[item.rule_id];
+        selector_matches.push_back(PropertySelectorMatch {
+          specificity_for_selector(rule.selectors[item.selector_id]),
+          item.rule_id,
+          item.selector_id,
+          &rule.properties,
+        });
+      }
+    }
+
+    // For each element that follows an item in this set,
+    // compute the next item set after descending through that element.
+    vector<PropertyTransitionEntry> transition_list;
+    for (auto &pair : transitions) {
+      PropertyTransition transition = pair.first;
+      PropertyItemSet &next_item_set = pair.second;
+      unsigned latest_matching_rule_id = 0;
+
+      for (const PropertyItem &item : item_set.entries) {
+        const PropertySelectorStep *next_step = next_step_for_item(item);
+        const PropertySelectorStep *prev_step = prev_step_for_item(item);
+        if (next_step) {
+
+          // If the element matches the next part of the item, advance the
+          // item to the next part of its selector.
+          if (step_matches_transition(*next_step, transition)) {
+            PropertyItem next_item = item;
+            next_item.step_id++;
+            next_item_set.entries.insert(next_item);
+
+            // If the item is at the end of its selector, record its rule id
+            // so that it can be used when sorting the transitions.
+            if (!next_step_for_item(next_item) && next_item.rule_id > latest_matching_rule_id) {
+              latest_matching_rule_id = item.rule_id;
+            }
+          }
+
+          // If the element does not match, and the item is in the middle
+          // of an immediate child selector, then remove it from the
+          // next item set. Otherwise, keep it unchanged.
+          if (!prev_step || !prev_step->is_immediate) {
+            next_item_set.entries.insert(item);
+          }
+        }
+      }
+
+      transition.state_id = add_state(next_item_set);
+      transition_list.push_back(PropertyTransitionEntry {transition, latest_matching_rule_id});
+    }
+
+    std::sort(transition_list.begin(), transition_list.end());
+    for (auto &entry : transition_list) {
+      result.states[state_id].transitions.push_back(entry.transition);
+    }
+
+    // Compute the default successor item set - the item set that
+    // we should advance to if the next element doesn't match any
+    // of the next elements in the item set's selectors.
+    PropertyItemSet default_next_item_set;
+    for (const PropertyItem &item : item_set.entries) {
+      const PropertySelectorStep *next_step = next_step_for_item(item);
+      const PropertySelectorStep *prev_step = prev_step_for_item(item);
+      if (next_step && (!prev_step || !prev_step->is_immediate)) {
+        default_next_item_set.entries.insert(item);
+      }
+    }
+
+    StateId default_next_state_id = add_state(default_next_item_set);
+    result.states[state_id].default_next_state_id = default_next_state_id;
+
+    // Sort the matching property sets by ascending specificity and by
+    // their order in the sheet. This way, more specific selectors and later
+    // rules will override less specific selectors and earlier rules.
+    PropertySet properties;
+    std::sort(selector_matches.begin(), selector_matches.end());
+    for (auto &match : selector_matches) {
+      for (auto &pair : *match.property_set) {
+        properties[pair.first] = pair.second;
+      }
+    }
+
+    // Add the final property set to the deduped list.
+    result.states[state_id].property_set_id = add_property_set(properties);
+  }
+
+  StateId add_state(const PropertyItemSet &item_set) {
+    auto entry = ids_by_item_set.find(item_set);
+    if (entry == ids_by_item_set.end()) {
+      StateId id = result.states.size();
+      ids_by_item_set[item_set] = id;
+      result.states.push_back(PropertyState {});
+      item_set_queue.push_back({item_set, id});
+      return id;
+    } else {
+      return entry->second;
+    }
+  }
+
+  PropertySetId add_property_set(const PropertySet &property_set) {
+    auto entry = ids_by_property_set.find(property_set);
+    if (entry == ids_by_property_set.end()) {
+      PropertySetId id = result.property_sets.size();
+      ids_by_property_set[property_set] = id;
+      result.property_sets.push_back(property_set);
+      return id;
+    } else {
+      return entry->second;
+    }
+  }
+};
+
+PropertyTable build_property_table(const PropertySheet &sheet) {
+  return PropertyTableBuilder(sheet).build();
+}
+
+}  // namespace build_tables
+}  // namespace tree_sitter
--- a/src/compiler/build_tables/property_table_builder.h
+++ b/src/compiler/build_tables/property_table_builder.h
@ -0,0 +1,15 @@
+#ifndef COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_
+#define COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_
+
+#include <memory>
+#include "compiler/property_table.h"
+
+namespace tree_sitter {
+namespace build_tables {
+
+PropertyTable build_property_table(const PropertySheet &);
+
+}  // namespace build_tables
+}  // namespace tree_sitter
+
+#endif  // COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_
--- a/src/compiler/compile.cc
+++ b/src/compiler/compile.cc
@ -1,11 +1,13 @@
 #include "tree_sitter/compiler.h"
 #include "compiler/prepare_grammar/prepare_grammar.h"
 #include "compiler/build_tables/parse_table_builder.h"
+#include "compiler/build_tables/property_table_builder.h"
 #include "compiler/generate_code/c_code.h"
+#include "compiler/generate_code/property_table_json.h"
 #include "compiler/syntax_grammar.h"
 #include "compiler/log.h"
 #include "compiler/lexical_grammar.h"
-#include "compiler/parse_grammar.h"
+#include "compiler/parse_json.h"
 #include "json.h"

 namespace tree_sitter {
@ -20,7 +22,7 @@ using std::make_tuple;
 extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) {
  set_log_file(log_file);

-  ParseGrammarResult parse_result = parse_grammar(string(input));
+  ParseGrammarResult parse_result = parse_grammar_json(string(input));
  if (!parse_result.error_message.empty()) {
    return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar};
  }
@ -60,4 +62,15 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
  return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone};
 }

+extern "C" TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file) {
+  set_log_file(log_file);
+  auto parse_result = parse_property_sheet_json(string(input));
+  if (!parse_result.ok()) {
+    return {nullptr, strdup(parse_result.error.c_str()), TSCompileErrorTypeInvalidGrammar};
+  }
+  PropertyTable table = build_tables::build_property_table(parse_result.value);
+  string code = generate_code::property_table_json(table);
+  return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone};
+}
+
 }  // namespace tree_sitter
--- a/src/compiler/generate_code/property_table_json.cc
+++ b/src/compiler/generate_code/property_table_json.cc
@ -0,0 +1,117 @@
+#include "compiler/generate_code/property_table_json.h"
+#include <vector>
+#include <string>
+
+using std::string;
+using std::to_string;
+using std::vector;
+
+namespace tree_sitter {
+namespace generate_code {
+
+class CodeGenerator {
+  string buffer;
+
+ public:
+  string generate(const PropertyTable &table) {
+    add("{");
+    add("\"states\":");
+    add("[");
+    for (unsigned i = 0; i < table.states.size(); i++) {
+      const PropertyState &state = table.states[i];
+      if (i != 0) add(",");
+      add_state(i, state);
+    }
+    add("],");
+    add("\"property_sets\":");
+    add("[");
+    bool first = true;
+    for (const PropertySet &property_set : table.property_sets) {
+      if (!first) add(",");
+      first = false;
+      add_property_set(property_set);
+    }
+    add("]");
+    add("}");
+    return buffer;
+  }
+
+ private:
+  void add_state(unsigned i, const PropertyState &state) {
+    add("{");
+    add("\"id\":");
+    add(to_string(i));
+    add(",\"property_set_id\":");
+    add(to_string(state.property_set_id));
+    add(",");
+    add("\"transitions\":[");
+    bool first = true;
+    for (const auto &transition : state.transitions) {
+      if (!first) add(",");
+      first = false;
+      add_transition(transition);
+    }
+    add("],");
+    add("\"default_next_state_id\":");
+    add(to_string(state.default_next_state_id));
+    add("}");
+  }
+
+  void add_property_set(const PropertySet &property_set) {
+    add("{");
+    bool first = true;
+    for (const auto &pair : property_set) {
+      if (!first) add(",");
+      first = false;
+      add("\"");
+      add(pair.first);
+      add("\":\"");
+      add(pair.second);
+      add("\"");
+    }
+    add("}");
+  }
+
+  void add_transition(const PropertyTransition &transition) {
+    add("{");
+    add("\"type\":");
+    add_string(transition.type);
+    add(",\"named\":");
+    add(transition.named ? "true" : "false");
+    if (transition.index != -1) {
+      add(",\"index\":");
+      add(to_string(transition.index));
+    }
+    if (!transition.text_pattern.empty()) {
+      add(",\"text\":");
+      add_string(transition.text_pattern);
+    }
+    add(",\"state_id\": ");
+    add(to_string(transition.state_id));
+    add("}");
+  }
+
+  void add_string(const string &s) {
+    add("\"");
+    for (const char c : s) {
+      if (c == '"') add("\\");
+      add(c);
+    }
+    add("\"");
+  }
+
+  void add(string input) {
+    buffer += input;
+  }
+
+  void add(char c) {
+    buffer += c;
+  }
+};
+
+string property_table_json(PropertyTable table) {
+  return CodeGenerator().generate(table);
+}
+
+}  // namespace generate_code
+}  // namespace tree_sitter
--- a/src/compiler/generate_code/property_table_json.h
+++ b/src/compiler/generate_code/property_table_json.h
@ -0,0 +1,15 @@
+#ifndef COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_
+#define COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_
+
+#include <string>
+#include "compiler/property_table.h"
+
+namespace tree_sitter {
+namespace generate_code {
+
+std::string property_table_json(PropertyTable);
+
+}  // namespace generate_code
+}  // namespace tree_sitter
+
+#endif  // COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_
--- a/src/compiler/parse_grammar.h
+++ b/src/compiler/parse_grammar.h
@ -1,21 +0,0 @@
-#ifndef COMPILER_GRAMMAR_JSON_H_
-#define COMPILER_GRAMMAR_JSON_H_
-
-#include <string>
-#include <unordered_set>
-#include "tree_sitter/compiler.h"
-#include "compiler/grammar.h"
-
-namespace tree_sitter {
-
-struct ParseGrammarResult {
-  std::string name;
-  InputGrammar grammar;
-  std::string error_message;
-};
-
-ParseGrammarResult parse_grammar(const std::string &);
-
-}  // namespace tree_sitter
-
-#endif  // COMPILER_GRAMMAR_JSON_H_
--- a/src/compiler/parse_grammar.cc
+++ b/src/compiler/parse_grammar.cc
@ -1,10 +1,11 @@
-#include "compiler/parse_grammar.h"
+#include "compiler/parse_json.h"
 #include <string>
 #include <vector>
 #include <unordered_set>
 #include <utility>
 #include "json.h"
 #include "compiler/rule.h"
+#include "compiler/util/result.h"

 namespace tree_sitter {

@ -19,17 +20,9 @@ using rules::Metadata;
 using rules::Pattern;
 using rules::String;
 using rules::NamedSymbol;
+using util::Result;

-struct ParseRuleResult {
-  Rule rule;
-  string error_message;
-
-  ParseRuleResult(const string &error_message) : error_message(error_message) {}
-  ParseRuleResult(const char *error_message) : error_message(error_message) {}
-  ParseRuleResult(Rule rule) : rule(rule) {}
-};
-
-ParseRuleResult parse_rule(json_value *rule_json) {
+Result<Rule> parse_rule_json(json_value *rule_json) {
  string error_message;
  json_value rule_type_json;
  string type;
@ -62,11 +55,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    vector<Rule> members;
    for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
      json_value *member_json = members_json.u.array.values[i];
-      auto result = parse_rule(member_json);
-      if (!result.error_message.empty()) {
-        return "Invalid choice member: " + result.error_message;
+      auto result = parse_rule_json(member_json);
+      if (!result.ok()) {
+        return "Invalid choice member: " + result.error;
      }
-      members.push_back(result.rule);
+      members.push_back(result.value);
    }
    return Rule::choice(members);
  }
@ -80,49 +73,49 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    vector<Rule> members;
    for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
      json_value *member_json = members_json.u.array.values[i];
-      auto result = parse_rule(member_json);
-      if (!result.error_message.empty()) {
-        return "Invalid choice member: " + result.error_message;
+      auto result = parse_rule_json(member_json);
+      if (!result.ok()) {
+        return "Invalid choice member: " + result.error;
      }
-      members.push_back(result.rule);
+      members.push_back(result.value);
    }
    return Rule::seq(members);
  }

  if (type == "REPEAT") {
    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid repeat content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid repeat content: " + result.error;
    }
-    return Rule::choice({Rule::repeat(result.rule), Blank{}});
+    return Rule::choice({Rule::repeat(result.value), Blank{}});
  }

  if (type == "REPEAT1") {
    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid repeat content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid repeat content: " + result.error;
    }
-    return Rule::repeat(result.rule);
+    return Rule::repeat(result.value);
  }

  if (type == "TOKEN") {
    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid token content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid token content: " + result.error;
    }
-      return Rule(Metadata::token(move(result.rule)));
+      return Rule(Metadata::token(move(result.value)));
  }

  if (type == "IMMEDIATE_TOKEN") {
    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid token content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid token content: " + result.error;
    }
-      return Rule(Metadata::immediate_token(move(result.rule)));
+      return Rule(Metadata::immediate_token(move(result.value)));
  }

  if (type == "PATTERN") {
@ -159,11 +152,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    }

    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid precedence content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid precedence content: " + result.error;
    }
-    return Rule(Metadata::prec(precedence_json.u.integer, move(result.rule)));
+    return Rule(Metadata::prec(precedence_json.u.integer, move(result.value)));
  }

  if (type == "PREC_LEFT") {
@ -173,11 +166,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    }

    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid precedence content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid precedence content: " + result.error;
    }
-    return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.rule)));
+    return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.value)));
  }

  if (type == "PREC_RIGHT") {
@ -187,11 +180,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    }

    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid precedence content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid precedence content: " + result.error;
    }
-    return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.rule)));
+    return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.value)));
  }

  if (type == "PREC_DYNAMIC") {
@ -201,11 +194,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    }

    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid precedence content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid precedence content: " + result.error;
    }
-    return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.rule)));
+    return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.value)));
  }

  if (type == "ALIAS") {
@ -220,21 +213,21 @@ ParseRuleResult parse_rule(json_value *rule_json) {
    }

    json_value content_json = rule_json->operator[]("content");
-    auto result = parse_rule(&content_json);
-    if (!result.error_message.empty()) {
-      return "Invalid rename content: " + result.error_message;
+    auto result = parse_rule_json(&content_json);
+    if (!result.ok()) {
+      return "Invalid rename content: " + result.error;
    }
    return Rule(Metadata::alias(
      string(value_json.u.string.ptr),
      is_named_json.u.boolean,
-      move(result.rule)
+      move(result.value)
    ));
  }

  return "Unknown rule type: " + type;
 }

-ParseGrammarResult parse_grammar(const string &input) {
+ParseGrammarResult parse_grammar_json(const string &input) {
  string error_message;
  string name;
  InputGrammar grammar;
@ -242,8 +235,8 @@ ParseGrammarResult parse_grammar(const string &input) {
    name_json, rules_json, extras_json, conflicts_json, external_tokens_json,
    inline_rules_json, word_rule_json;

-  json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
  char parse_error[json_error_max];
+  json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
  json_value *grammar_json =
    json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
  if (!grammar_json) {
@ -272,15 +265,15 @@ ParseGrammarResult parse_grammar(const string &input) {

  for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
    json_object_entry entry_json = rules_json.u.object.values[i];
-    auto result = parse_rule(entry_json.value);
-    if (!result.error_message.empty()) {
-      error_message = result.error_message;
+    auto result = parse_rule_json(entry_json.value);
+    if (!result.ok()) {
+      error_message = result.error;
      goto error;
    }
    grammar.variables.push_back(Variable{
      string(entry_json.name),
      VariableTypeNamed,
-      result.rule
+      result.value
    });
  }

@ -293,12 +286,12 @@ ParseGrammarResult parse_grammar(const string &input) {

    for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
      json_value *extra_json = extras_json.u.array.values[i];
-      auto result = parse_rule(extra_json);
-      if (!result.error_message.empty()) {
-        error_message = "Invalid extra token: " + result.error_message;
+      auto result = parse_rule_json(extra_json);
+      if (!result.ok()) {
+        error_message = "Invalid extra token: " + result.error;
        goto error;
      }
-      grammar.extra_tokens.push_back(result.rule);
+      grammar.extra_tokens.push_back(result.value);
    }
  }

@ -361,12 +354,12 @@ ParseGrammarResult parse_grammar(const string &input) {

    for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
      json_value *external_token_json = external_tokens_json.u.array.values[i];
-      auto result = parse_rule(external_token_json);
-      if (!result.error_message.empty()) {
-        error_message = "Invalid external token: " + result.error_message;
+      auto result = parse_rule_json(external_token_json);
+      if (!result.ok()) {
+        error_message = "Invalid external token: " + result.error;
        goto error;
      }
-      grammar.external_tokens.push_back(result.rule);
+      grammar.external_tokens.push_back(result.value);
    }
  }

@ -384,11 +377,93 @@ ParseGrammarResult parse_grammar(const string &input) {
  return { name, grammar, "" };

 error:
-  if (grammar_json) {
-    json_value_free(grammar_json);
-  }
-
+  if (grammar_json) json_value_free(grammar_json);
  return { "", InputGrammar(), error_message };
 }

+Result<PropertyRule> parse_property_rule_json(json_value *rule_json) {
+  PropertyRule result;
+
+  if (rule_json->type != json_object) return "Rule must be an object";
+
+  json_value selectors_json = rule_json->operator[]("selectors");
+  if (selectors_json.type != json_array) return "Selectors must be an array";
+
+  for (unsigned i = 0; i < selectors_json.u.array.length; i++) {
+    PropertySelector selector;
+    json_value *selector_json = selectors_json.u.array.values[i];
+    if (selector_json->type != json_array) return "Each selector must be an array";
+
+    for (unsigned j = 0; j < selector_json->u.array.length; j++) {
+      json_value *selector_step_json = selector_json->u.array.values[j];
+      if (selector_step_json->type != json_object) return "Each selector must be an array of objects";
+      PropertySelectorStep step;
+      step.type = selector_step_json->operator[]("type").u.string.ptr;
+      step.named = selector_step_json->operator[]("named").u.boolean;
+      step.is_immediate = selector_step_json->operator[]("immediate").u.boolean;
+
+      json_value index_json = selector_step_json->operator[]("index");
+      if (index_json.type == json_integer) {
+        step.index = index_json.u.integer;
+      } else {
+        step.index = -1;
+      }
+
+      json_value text_pattern_json = selector_step_json->operator[]("text");
+      if (text_pattern_json.type == json_string) {
+        step.text_pattern = text_pattern_json.u.string.ptr;
+      }
+
+      selector.push_back(step);
+    }
+
+    result.selectors.push_back(selector);
+  }
+
+  json_value properties_json = rule_json->operator[]("properties");
+  if (properties_json.type != json_object) return "Properties must be an object";
+
+  for (unsigned i = 0; i < properties_json.u.object.length; i++) {
+    json_object_entry entry_json = properties_json.u.object.values[i];
+    json_value *value_json = entry_json.value;
+    if (value_json->type != json_string) return "Property values must be strings";
+    result.properties[entry_json.name] = value_json->u.string.ptr;
+  }
+
+  return result;
+}
+
+Result<PropertySheet> parse_property_sheet_json(const string &input) {
+  PropertySheet sheet;
+  string error_message;
+  char parse_error[json_error_max];
+  json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
+  json_value *sheet_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
+  if (!sheet_json) {
+    error_message = string("Invalid JSON at ") + parse_error;
+    goto error;
+  }
+
+  if (sheet_json->type != json_array) {
+    error_message = "Property sheet must be an array";
+    goto error;
+  }
+
+  for (unsigned i = 0; i < sheet_json->u.array.length; i++) {
+    json_value *rule_json = sheet_json->u.array.values[i];
+    auto result = parse_property_rule_json(rule_json);
+    if (!result.ok()) {
+      error_message = "Invalid external token: " + result.error;
+      goto error;
+    }
+    sheet.push_back(result.value);
+  }
+
+  return sheet;
+
+error:
+  if (sheet_json) json_value_free(sheet_json);
+  return error_message.c_str();
+}
+
 }  // namespace tree_sitter
--- a/src/compiler/parse_json.h
+++ b/src/compiler/parse_json.h
@ -0,0 +1,29 @@
+#ifndef COMPILER_PARSE_JSON_H_
+#define COMPILER_PARSE_JSON_H_
+
+#include <string>
+#include <unordered_set>
+#include "tree_sitter/compiler.h"
+#include "compiler/grammar.h"
+#include "compiler/property_sheet.h"
+#include "compiler/util/result.h"
+
+namespace tree_sitter {
+
+struct ParseGrammarResult {
+  std::string name;
+  InputGrammar grammar;
+  std::string error_message;
+};
+
+struct ParsePropertySheetResult {
+  PropertySheet property_sheet;
+  std::string error_message;
+};
+
+ParseGrammarResult parse_grammar_json(const std::string &);
+util::Result<PropertySheet> parse_property_sheet_json(const std::string &);
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_PARSE_JSON_H_
--- a/src/compiler/property_sheet.h
+++ b/src/compiler/property_sheet.h
@ -0,0 +1,39 @@
+#ifndef COMPILER_PROPERTY_SHEET_H_
+#define COMPILER_PROPERTY_SHEET_H_
+
+#include <vector>
+#include <map>
+#include <string>
+
+namespace tree_sitter {
+
+struct PropertySelectorStep {
+  std::string type;
+  bool named;
+  bool is_immediate;
+  int index;
+  std::string text_pattern;
+
+  inline bool operator==(const PropertySelectorStep &other) const {
+    return
+      type == other.type &&
+      named == other.named &&
+      is_immediate == other.is_immediate &&
+      index == other.index;
+  }
+};
+
+typedef std::vector<PropertySelectorStep> PropertySelector;
+
+typedef std::map<std::string, std::string> PropertySet;
+
+struct PropertyRule {
+  std::vector<PropertySelector> selectors;
+  PropertySet properties;
+};
+
+typedef std::vector<PropertyRule> PropertySheet;
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_PROPERTY_SHEET_H_
--- a/src/compiler/property_table.h
+++ b/src/compiler/property_table.h
@ -0,0 +1,48 @@
+#ifndef COMPILER_PROPERTY_TABLE_H_
+#define COMPILER_PROPERTY_TABLE_H_
+
+#include <vector>
+#include <map>
+#include <string>
+#include "compiler/property_sheet.h"
+
+namespace tree_sitter {
+
+struct PropertyTransition {
+  std::string type;
+  bool named;
+  int index;
+  std::string text_pattern;
+  unsigned state_id;
+
+  bool operator==(const PropertyTransition &other) const {
+    return
+      type == other.type &&
+      named == other.named &&
+      index == other.index &&
+      text_pattern == other.text_pattern &&
+      state_id == other.state_id;
+  }
+};
+
+struct PropertyState {
+  std::vector<PropertyTransition> transitions;
+  unsigned default_next_state_id;
+  unsigned property_set_id;
+
+  bool operator==(const PropertyState &other) const {
+    return
+      transitions == other.transitions &&
+      default_next_state_id == other.default_next_state_id &&
+      property_set_id == other.property_set_id;
+  }
+};
+
+struct PropertyTable {
+  std::vector<PropertyState> states;
+  std::vector<PropertySet> property_sets;
+};
+
+}  // namespace tree_sitter
+
+#endif  // COMPILER_PROPERTY_TABLE_H_
--- a/src/compiler/util/result.h
+++ b/src/compiler/util/result.h
@ -0,0 +1,24 @@
+#ifndef COMPILER_UTIL_RESULT_H_
+#define COMPILER_UTIL_RESULT_H_
+
+#include <string>
+
+namespace tree_sitter {
+namespace util {
+
+template <typename Value>
+struct Result {
+  Value value;
+  std::string error;
+
+  inline Result() : error("Empty") {}
+  inline Result(Value &&v) : value(v) {}
+  inline Result(const std::string &message) : error(message) {}
+  inline Result(const char *message) : error(message) {}
+  inline bool ok() const { return error.empty(); }
+};
+
+}  // namespace util
+}  // namespace tree_sitter
+
+#endif  // COMPILER_UTIL_RESULT_H_
--- a/test/compiler/prepare_grammar/extract_tokens_test.cc
+++ b/test/compiler/prepare_grammar/extract_tokens_test.cc
@ -181,7 +181,7 @@ describe("extract_tokens", []() {
        Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
        true
      },
-    }))
+    }));
  });

  it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {
--- a/test/compiler/prepare_grammar/intern_symbols_test.cc
+++ b/test/compiler/prepare_grammar/intern_symbols_test.cc
@ -84,7 +84,7 @@ describe("intern_symbols", []() {
        VariableTypeNamed,
        Symbol::non_terminal(2),
      },
-    }))
+    }));
  });
 });

--- a/test/fixtures/error_corpus/javascript_errors.txt
+++ b/test/fixtures/error_corpus/javascript_errors.txt
@ -138,7 +138,8 @@ var x = !!!
 Errors inside of a template string substitution
 =========================================================

-const a = `b c ${d +} f g`
+const a = `b c ${d += } f g`
+const h = `i ${j(k} l`

 ---

@ -146,7 +147,13 @@ const a = `b c ${d +} f g`
  (lexical_declaration
    (variable_declarator
      (identifier)
-      (template_string (template_substitution (identifier) (ERROR))))))
+      (template_string (template_substitution (identifier) (ERROR)))))
+  (lexical_declaration
+    (variable_declarator
+      (identifier)
+      (template_string (template_substitution (call_expression
+        (identifier)
+        (arguments (identifier) (MISSING))))))))

 =========================================================
 Long sequences of invalid tokens
--- a/test/helpers/scope_sequence.cc
+++ b/test/helpers/scope_sequence.cc
@ -7,6 +7,7 @@

 using std::string;
 using std::cout;
+using namespace snowhouse;

 static void append_text_to_scope_sequence(ScopeSequence *sequence,
                                          ScopeStack *current_scopes,
--- a/test/helpers/tree_helpers.cc
+++ b/test/helpers/tree_helpers.cc
@ -2,6 +2,7 @@
 #include "helpers/tree_helpers.h"
 #include "helpers/point_helpers.h"
 #include <ostream>
+#include <cstring>

 using std::string;
 using std::vector;
--- a/test/runtime/node_test.cc
+++ b/test/runtime/node_test.cc
@ -310,7 +310,7 @@ describe("Node", [&]() {
      AssertThat(ts_node_start_point(child7), Equals<TSPoint>({ 8, 0 }));
      AssertThat(ts_node_end_point(child7), Equals<TSPoint>({ 8, 1 }));

-      AssertThat(ts_node_child_count(child6), Equals<size_t>(3))
+      AssertThat(ts_node_child_count(child6), Equals<size_t>(3));

      TSNode left_brace = ts_node_child(child6, 0);
      TSNode pair = ts_node_child(child6, 1);
--- a/test/test_helper.h
+++ b/test/test_helper.h
@ -11,6 +11,7 @@ namespace tree_sitter {}

 using namespace std;
 using namespace bandit;
+using namespace snowhouse;
 using namespace tree_sitter;

 #define START_TEST go_bandit([]() {
--- a/tests.gyp
+++ b/tests.gyp
@ -84,6 +84,7 @@
      ],
      'ldflags': ['-g'],
      'xcode_settings': {
+        'ARCHS': ['x86_64'],
        'OTHER_LDFLAGS': ['-g'],
        'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'],
        'GCC_OPTIMIZATION_LEVEL': '0',
@ -114,6 +115,7 @@
    ],

    'xcode_settings': {
+      'ARCHS': ['x86_64'],
      'CLANG_CXX_LANGUAGE_STANDARD': 'c++14',
      'ALWAYS_SEARCH_USER_PATHS': 'NO',
    }