From e65403930feba35b84c203fe0f5623d2b028abd1 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 17:38:18 -0700 Subject: [PATCH 01/10] Add ts_compile_property_sheet API --- include/tree_sitter/compiler.h | 1 + project.gyp | 4 +- .../build_tables/property_table_builder.cc | 345 ++++++++++++++++++ .../build_tables/property_table_builder.h | 15 + src/compiler/compile.cc | 17 +- .../generate_code/property_table_json.cc | 105 ++++++ .../generate_code/property_table_json.h | 15 + src/compiler/parse_grammar.h | 21 -- .../{parse_grammar.cc => parse_json.cc} | 216 +++++++---- src/compiler/parse_json.h | 29 ++ src/compiler/property_sheet.h | 38 ++ src/compiler/property_table.h | 46 +++ src/compiler/util/result.h | 24 ++ 13 files changed, 779 insertions(+), 97 deletions(-) create mode 100644 src/compiler/build_tables/property_table_builder.cc create mode 100644 src/compiler/build_tables/property_table_builder.h create mode 100644 src/compiler/generate_code/property_table_json.cc create mode 100644 src/compiler/generate_code/property_table_json.h delete mode 100644 src/compiler/parse_grammar.h rename src/compiler/{parse_grammar.cc => parse_json.cc} (62%) create mode 100644 src/compiler/parse_json.h create mode 100644 src/compiler/property_sheet.h create mode 100644 src/compiler/property_table.h create mode 100644 src/compiler/util/result.h diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 3db2f7ca..a84d8a75 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -29,6 +29,7 @@ typedef struct { } TSCompileResult; TSCompileResult ts_compile_grammar(const char *input, FILE *log_file); +TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file); #ifdef __cplusplus } diff --git a/project.gyp b/project.gyp index 38f6e551..9c70a9dc 100644 --- a/project.gyp +++ b/project.gyp @@ -18,12 +18,14 @@ 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_item_set_builder.cc', 'src/compiler/build_tables/parse_table_builder.cc', + 'src/compiler/build_tables/property_table_builder.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', 'src/compiler/compile.cc', 'src/compiler/generate_code/c_code.cc', + 'src/compiler/generate_code/property_table_json.cc', 'src/compiler/lex_table.cc', 'src/compiler/log.cc', - 'src/compiler/parse_grammar.cc', + 'src/compiler/parse_json.cc', 'src/compiler/parse_table.cc', 'src/compiler/precedence_range.cc', 'src/compiler/prepare_grammar/expand_repeats.cc', diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc new file mode 100644 index 00000000..813c7fc5 --- /dev/null +++ b/src/compiler/build_tables/property_table_builder.cc @@ -0,0 +1,345 @@ +#include +#include +#include +#include +#include +#include +#include "compiler/property_sheet.h" +#include "compiler/property_table.h" +#include "compiler/build_tables/property_table_builder.h" +#include "compiler/util/hash_combine.h" + +using std::deque; +using std::vector; +using std::pair; +using std::unordered_map; +using std::set; +using std::move; +using std::map; + +namespace tree_sitter { +namespace build_tables { + +struct PropertyItem { + unsigned rule_id; + unsigned selector_id; + unsigned step_id; + + bool operator==(const PropertyItem &other) const { + return + rule_id == other.rule_id && + selector_id == other.selector_id && + step_id == other.step_id; + } + + bool operator<(const PropertyItem &other) const { + if (rule_id < other.rule_id) return true; + if (rule_id > other.rule_id) return false; + if (selector_id < other.selector_id) return true; + if (selector_id > other.selector_id) return false; + return step_id < other.step_id; + } +}; + +struct PropertyItemSet { + set entries; + + bool operator==(const PropertyItemSet &other) const { + return entries == other.entries; + } +}; + +} // namespace build_tables +} // namespace tree_sitter + +namespace std { + +using tree_sitter::util::hash_combine; + +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::PropertyItemSet &item_set) const { + size_t result = 0; + hash_combine(&result, item_set.entries.size()); + for (const auto &item : item_set.entries) { + hash_combine(&result, item.rule_id); + hash_combine(&result, item.selector_id); + hash_combine(&result, item.step_id); + } + return result; + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::PropertyTransition &transition) const { + size_t result = 0; + hash_combine(&result, transition.type); + hash_combine(&result, transition.named); + hash_combine(&result, transition.index); + hash_combine(&result, transition.state_id); + return result; + } +}; + +template <> +struct hash { + size_t operator()(const tree_sitter::PropertySet &set) const { + size_t result = 0; + hash_combine(&result, set.size()); + for (const auto &pair : set) { + hash_combine(&result, pair.first); + hash_combine(&result, pair.second); + } + return result; + } +}; + +} // namespace std + +namespace tree_sitter { +namespace build_tables { + +typedef unsigned StateId; +typedef unsigned PropertySetId; + +struct PropertySelectorMatch { + unsigned specificity; + unsigned rule_id; + unsigned selector_id; + const PropertySet *property_set; + + bool operator<(const PropertySelectorMatch &other) const { + if (specificity < other.specificity) return true; + if (specificity > other.specificity) return false; + if (rule_id < other.rule_id) return true; + if (rule_id > other.rule_id) return false; + return selector_id < other.selector_id; + } +}; + +struct PropertyTableBuilder { + PropertySheet sheet; + PropertyTable result; + unordered_map ids_by_item_set; + unordered_map ids_by_property_set; + deque> item_set_queue; + + PropertyTableBuilder(const PropertySheet &sheet) : sheet(sheet) {} + + PropertyTable build() { + PropertyItemSet start_item_set; + for (unsigned i = 0; i < sheet.size(); i++) { + PropertyRule &rule = sheet[i]; + for (unsigned j = 0; j < rule.selectors.size(); j++) { + start_item_set.entries.insert(PropertyItem {i, j, 0}); + } + } + + add_state(start_item_set); + while (!item_set_queue.empty()) { + auto entry = item_set_queue.front(); + PropertyItemSet item_set = move(entry.first); + StateId state_id = entry.second; + item_set_queue.pop_front(); + populate_state(item_set, state_id); + } + + remove_duplicate_states(); + + return result; + } + + void remove_duplicate_states() { + map replacements; + + while (true) { + map duplicates; + for (StateId i = 0, size = result.states.size(); i < size; i++) { + for (StateId j = 0; j < i; j++) { + if (!duplicates.count(j) && result.states[j] == result.states[i]) { + duplicates.insert({ i, j }); + break; + } + } + } + + if (duplicates.empty()) break; + + map new_replacements; + for (StateId i = 0, size = result.states.size(); i < size; i++) { + StateId new_state_index = i; + auto duplicate = duplicates.find(i); + if (duplicate != duplicates.end()) { + new_state_index = duplicate->second; + } + + size_t prior_removed = 0; + for (const auto &duplicate : duplicates) { + if (duplicate.first >= new_state_index) break; + prior_removed++; + } + + new_state_index -= prior_removed; + new_replacements.insert({i, new_state_index}); + replacements.insert({ i, new_state_index }); + for (auto &replacement : replacements) { + if (replacement.second == i) { + replacement.second = new_state_index; + } + } + } + + for (auto &state : result.states) { + for (auto &transition : state.transitions) { + auto new_replacement = new_replacements.find(transition.state_id); + if (new_replacement != new_replacements.end()) { + transition.state_id = new_replacement->second; + } + } + + auto new_replacement = new_replacements.find(state.default_next_state_id); + if (new_replacement != new_replacements.end()) { + state.default_next_state_id = new_replacement->second; + } + } + + for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) { + result.states.erase(result.states.begin() + i->first); + } + } + } + + const PropertySelectorStep *next_step_for_item(const PropertyItem &item) { + const PropertySelector &selector = sheet[item.rule_id].selectors[item.selector_id]; + if (item.step_id < selector.size()) { + return &selector[item.step_id]; + } else { + return nullptr; + } + } + + const PropertySelectorStep *prev_step_for_item(const PropertyItem &item) { + if (item.step_id > 0) { + return &sheet[item.rule_id].selectors[item.selector_id][item.step_id]; + } else { + return nullptr; + } + } + + unsigned specificity_for_selector(const PropertySelector &selector) { + unsigned result = selector.size(); + for (const PropertySelectorStep &step : selector) { + if (step.index != -1) result++; + } + return result; + } + + bool step_is_superset(const PropertySelectorStep &step, const PropertyTransition &transition) { + return + step.type == transition.type && + step.named == transition.named && + (step.index == transition.index || step.index == -1); + } + + void populate_state(const PropertyItemSet &item_set, StateId state_id) { + std::unordered_map transitions; + std::vector selector_matches; + + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + if (next_step) { + transitions[PropertyTransition{ + next_step->type, + next_step->named, + next_step->index, + 0 + }] = PropertyItemSet(); + } else { + const PropertyRule &rule = sheet[item.rule_id]; + selector_matches.push_back(PropertySelectorMatch { + specificity_for_selector(rule.selectors[item.selector_id]), + item.rule_id, + item.selector_id, + &rule.properties, + }); + } + } + + for (auto &pair : transitions) { + PropertyTransition transition = pair.first; + PropertyItemSet &next_item_set = pair.second; + + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + const PropertySelectorStep *prev_step = prev_step_for_item(item); + if (next_step) { + if (step_is_superset(*next_step, transition)) { + PropertyItem next_item = item; + next_item.step_id++; + next_item_set.entries.insert(next_item); + } + if (!prev_step || !prev_step->is_immediate) { + next_item_set.entries.insert(item); + } + } + } + + transition.state_id = add_state(next_item_set); + result.states[state_id].transitions.push_back(transition); + } + + PropertyItemSet default_next_item_set; + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + const PropertySelectorStep *prev_step = prev_step_for_item(item); + if (next_step && (!prev_step || !prev_step->is_immediate)) { + default_next_item_set.entries.insert(item); + } + } + + result.states[state_id].default_next_state_id = add_state(default_next_item_set); + + PropertySet properties; + std::sort(selector_matches.begin(), selector_matches.end()); + for (auto &match : selector_matches) { + for (auto &pair : *match.property_set) { + properties[pair.first] = pair.second; + } + } + + result.states[state_id].property_set_id = add_property_set(properties); + } + + StateId add_state(const PropertyItemSet &item_set) { + auto entry = ids_by_item_set.find(item_set); + if (entry == ids_by_item_set.end()) { + StateId id = result.states.size(); + ids_by_item_set[item_set] = id; + result.states.push_back(PropertyState {}); + item_set_queue.push_back({item_set, id}); + return id; + } else { + return entry->second; + } + } + + PropertySetId add_property_set(const PropertySet &property_set) { + auto entry = ids_by_property_set.find(property_set); + if (entry == ids_by_property_set.end()) { + PropertySetId id = result.property_sets.size(); + ids_by_property_set[property_set] = id; + result.property_sets.push_back(property_set); + return id; + } else { + return entry->second; + } + } +}; + +PropertyTable build_property_table(const PropertySheet &sheet) { + return PropertyTableBuilder(sheet).build(); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/property_table_builder.h b/src/compiler/build_tables/property_table_builder.h new file mode 100644 index 00000000..25b94ce7 --- /dev/null +++ b/src/compiler/build_tables/property_table_builder.h @@ -0,0 +1,15 @@ +#ifndef COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ +#define COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ + +#include +#include "compiler/property_table.h" + +namespace tree_sitter { +namespace build_tables { + +PropertyTable build_property_table(const PropertySheet &); + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 83bdbcc2..4a9bd663 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -1,11 +1,13 @@ #include "tree_sitter/compiler.h" #include "compiler/prepare_grammar/prepare_grammar.h" #include "compiler/build_tables/parse_table_builder.h" +#include "compiler/build_tables/property_table_builder.h" #include "compiler/generate_code/c_code.h" +#include "compiler/generate_code/property_table_json.h" #include "compiler/syntax_grammar.h" #include "compiler/log.h" #include "compiler/lexical_grammar.h" -#include "compiler/parse_grammar.h" +#include "compiler/parse_json.h" #include "json.h" namespace tree_sitter { @@ -20,7 +22,7 @@ using std::make_tuple; extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) { set_log_file(log_file); - ParseGrammarResult parse_result = parse_grammar(string(input)); + ParseGrammarResult parse_result = parse_grammar_json(string(input)); if (!parse_result.error_message.empty()) { return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar}; } @@ -60,4 +62,15 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone}; } +extern "C" TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file) { + set_log_file(log_file); + auto parse_result = parse_property_sheet_json(string(input)); + if (!parse_result.ok()) { + return {nullptr, strdup(parse_result.error.c_str()), TSCompileErrorTypeInvalidGrammar}; + } + PropertyTable table = build_tables::build_property_table(parse_result.value); + string code = generate_code::property_table_json(table); + return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone}; +} + } // namespace tree_sitter diff --git a/src/compiler/generate_code/property_table_json.cc b/src/compiler/generate_code/property_table_json.cc new file mode 100644 index 00000000..82dc00dd --- /dev/null +++ b/src/compiler/generate_code/property_table_json.cc @@ -0,0 +1,105 @@ +#include "compiler/generate_code/property_table_json.h" +#include +#include + +using std::string; +using std::to_string; +using std::vector; + +namespace tree_sitter { +namespace generate_code { + +class CodeGenerator { + string buffer; + + public: + string generate(const PropertyTable &table) { + add("{"); + add("\"states\":"); + add("["); + for (unsigned i = 0; i < table.states.size(); i++) { + const PropertyState &state = table.states[i]; + if (i != 0) add(","); + add_state(i, state); + } + add("],"); + add("\"property_sets\":"); + add("["); + bool first = true; + for (const PropertySet &property_set : table.property_sets) { + if (!first) add(","); + first = false; + add_property_set(property_set); + } + add("]"); + add("}"); + return buffer; + } + + private: + void add_state(unsigned i, const PropertyState &state) { + add("{"); + add("\"id\":"); + add(to_string(i)); + add(",\"property_set_id\":"); + add(to_string(state.property_set_id)); + add(","); + add("\"transitions\":["); + bool first = true; + for (const auto &transition : state.transitions) { + if (!first) add(","); + first = false; + add_transition(transition); + } + add("],"); + add("\"default_next_state_id\":"); + add(to_string(state.default_next_state_id)); + add("}"); + } + + void add_property_set(const PropertySet &property_set) { + add("{"); + bool first = true; + for (const auto &pair : property_set) { + if (!first) add(","); + first = false; + add("\""); + add(pair.first); + add("\":\""); + add(pair.second); + add("\""); + } + add("}"); + } + + void add_transition(const PropertyTransition &transition) { + add("{"); + add("\"type\":\""); + add(transition.type); + add("\",\"named\":"); + add(transition.named ? "true" : "false"); + add(",\"index\":"); + add(transition.index == -1 ? "null" : to_string(transition.index)); + add(",\"state_id\": "); + add(to_string(transition.state_id)); + add("}"); + } + + void add_string(const string &s) { + add("\""); + add(s); + add("\""); + } + + void add(string input) { + buffer += input; + } +}; + +string property_table_json(PropertyTable table) { + CodeGenerator g; + return g.generate(table); +} + +} // namespace generate_code +} // namespace tree_sitter diff --git a/src/compiler/generate_code/property_table_json.h b/src/compiler/generate_code/property_table_json.h new file mode 100644 index 00000000..7b6af342 --- /dev/null +++ b/src/compiler/generate_code/property_table_json.h @@ -0,0 +1,15 @@ +#ifndef COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ +#define COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ + +#include +#include "compiler/property_table.h" + +namespace tree_sitter { +namespace generate_code { + +std::string property_table_json(PropertyTable); + +} // namespace generate_code +} // namespace tree_sitter + +#endif // COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h deleted file mode 100644 index c24cd9ca..00000000 --- a/src/compiler/parse_grammar.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef COMPILER_GRAMMAR_JSON_H_ -#define COMPILER_GRAMMAR_JSON_H_ - -#include -#include -#include "tree_sitter/compiler.h" -#include "compiler/grammar.h" - -namespace tree_sitter { - -struct ParseGrammarResult { - std::string name; - InputGrammar grammar; - std::string error_message; -}; - -ParseGrammarResult parse_grammar(const std::string &); - -} // namespace tree_sitter - -#endif // COMPILER_GRAMMAR_JSON_H_ diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_json.cc similarity index 62% rename from src/compiler/parse_grammar.cc rename to src/compiler/parse_json.cc index 7b69ed61..5ac8ffc4 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_json.cc @@ -1,10 +1,11 @@ -#include "compiler/parse_grammar.h" +#include "compiler/parse_json.h" #include #include #include #include #include "json.h" #include "compiler/rule.h" +#include "compiler/util/result.h" namespace tree_sitter { @@ -19,17 +20,9 @@ using rules::Metadata; using rules::Pattern; using rules::String; using rules::NamedSymbol; +using util::Result; -struct ParseRuleResult { - Rule rule; - string error_message; - - ParseRuleResult(const string &error_message) : error_message(error_message) {} - ParseRuleResult(const char *error_message) : error_message(error_message) {} - ParseRuleResult(Rule rule) : rule(rule) {} -}; - -ParseRuleResult parse_rule(json_value *rule_json) { +Result parse_rule_json(json_value *rule_json) { string error_message; json_value rule_type_json; string type; @@ -62,11 +55,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - auto result = parse_rule(member_json); - if (!result.error_message.empty()) { - return "Invalid choice member: " + result.error_message; + auto result = parse_rule_json(member_json); + if (!result.ok()) { + return "Invalid choice member: " + result.error; } - members.push_back(result.rule); + members.push_back(result.value); } return Rule::choice(members); } @@ -80,49 +73,49 @@ ParseRuleResult parse_rule(json_value *rule_json) { vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - auto result = parse_rule(member_json); - if (!result.error_message.empty()) { - return "Invalid choice member: " + result.error_message; + auto result = parse_rule_json(member_json); + if (!result.ok()) { + return "Invalid choice member: " + result.error; } - members.push_back(result.rule); + members.push_back(result.value); } return Rule::seq(members); } if (type == "REPEAT") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid repeat content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid repeat content: " + result.error; } - return Rule::choice({Rule::repeat(result.rule), Blank{}}); + return Rule::choice({Rule::repeat(result.value), Blank{}}); } if (type == "REPEAT1") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid repeat content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid repeat content: " + result.error; } - return Rule::repeat(result.rule); + return Rule::repeat(result.value); } if (type == "TOKEN") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid token content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid token content: " + result.error; } - return Rule(Metadata::token(move(result.rule))); + return Rule(Metadata::token(move(result.value))); } if (type == "IMMEDIATE_TOKEN") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid token content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid token content: " + result.error; } - return Rule(Metadata::immediate_token(move(result.rule))); + return Rule(Metadata::immediate_token(move(result.value))); } if (type == "PATTERN") { @@ -159,11 +152,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec(precedence_json.u.integer, move(result.value))); } if (type == "PREC_LEFT") { @@ -173,11 +166,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.value))); } if (type == "PREC_RIGHT") { @@ -187,11 +180,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.value))); } if (type == "PREC_DYNAMIC") { @@ -201,11 +194,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.value))); } if (type == "ALIAS") { @@ -220,21 +213,21 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid rename content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid rename content: " + result.error; } return Rule(Metadata::alias( string(value_json.u.string.ptr), is_named_json.u.boolean, - move(result.rule) + move(result.value) )); } return "Unknown rule type: " + type; } -ParseGrammarResult parse_grammar(const string &input) { +ParseGrammarResult parse_grammar_json(const string &input) { string error_message; string name; InputGrammar grammar; @@ -242,8 +235,8 @@ ParseGrammarResult parse_grammar(const string &input) { name_json, rules_json, extras_json, conflicts_json, external_tokens_json, inline_rules_json, word_rule_json; - json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; char parse_error[json_error_max]; + json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error); if (!grammar_json) { @@ -272,15 +265,15 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) { json_object_entry entry_json = rules_json.u.object.values[i]; - auto result = parse_rule(entry_json.value); - if (!result.error_message.empty()) { - error_message = result.error_message; + auto result = parse_rule_json(entry_json.value); + if (!result.ok()) { + error_message = result.error; goto error; } grammar.variables.push_back(Variable{ string(entry_json.name), VariableTypeNamed, - result.rule + result.value }); } @@ -293,12 +286,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) { json_value *extra_json = extras_json.u.array.values[i]; - auto result = parse_rule(extra_json); - if (!result.error_message.empty()) { - error_message = "Invalid extra token: " + result.error_message; + auto result = parse_rule_json(extra_json); + if (!result.ok()) { + error_message = "Invalid extra token: " + result.error; goto error; } - grammar.extra_tokens.push_back(result.rule); + grammar.extra_tokens.push_back(result.value); } } @@ -361,12 +354,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) { json_value *external_token_json = external_tokens_json.u.array.values[i]; - auto result = parse_rule(external_token_json); - if (!result.error_message.empty()) { - error_message = "Invalid external token: " + result.error_message; + auto result = parse_rule_json(external_token_json); + if (!result.ok()) { + error_message = "Invalid external token: " + result.error; goto error; } - grammar.external_tokens.push_back(result.rule); + grammar.external_tokens.push_back(result.value); } } @@ -384,11 +377,88 @@ ParseGrammarResult parse_grammar(const string &input) { return { name, grammar, "" }; error: - if (grammar_json) { - json_value_free(grammar_json); - } - + if (grammar_json) json_value_free(grammar_json); return { "", InputGrammar(), error_message }; } +Result parse_property_rule_json(json_value *rule_json) { + PropertyRule result; + + if (rule_json->type != json_object) return "Rule must be an object"; + + json_value selectors_json = rule_json->operator[]("selectors"); + if (selectors_json.type != json_array) return "Selectors must be an array"; + + for (unsigned i = 0; i < selectors_json.u.array.length; i++) { + PropertySelector selector; + json_value *selector_json = selectors_json.u.array.values[i]; + if (selector_json->type != json_array) return "Each selector must be an array"; + + for (unsigned j = 0; j < selector_json->u.array.length; j++) { + json_value *selector_step_json = selector_json->u.array.values[j]; + if (selector_step_json->type != json_object) return "Each selector must be an array of objects"; + PropertySelectorStep step; + step.type = selector_step_json->operator[]("type").u.string.ptr; + step.named = selector_step_json->operator[]("named").u.boolean; + step.is_immediate = selector_step_json->operator[]("immediate").u.boolean; + + json_value index_json = selector_step_json->operator[]("index"); + if (index_json.type == json_integer) { + step.index = index_json.u.integer; + } else { + step.index = -1; + } + + selector.push_back(step); + } + + result.selectors.push_back(selector); + } + + json_value properties_json = rule_json->operator[]("properties"); + if (properties_json.type != json_object) return "Properties must be an object"; + + for (unsigned i = 0; i < properties_json.u.object.length; i++) { + json_object_entry entry_json = properties_json.u.object.values[i]; + json_value *value_json = entry_json.value; + if (value_json->type != json_string) return "Property values must be strings"; + result.properties[entry_json.name] = value_json->u.string.ptr; + } + + return result; +} + +Result parse_property_sheet_json(const string &input) { + PropertySheet sheet; + string error_message; + char parse_error[json_error_max]; + json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; + json_value *sheet_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error); + if (!sheet_json) { + error_message = string("Invalid JSON at ") + parse_error; + goto error; + } + + if (sheet_json->type != json_array) { + error_message = "Property sheet must be an array"; + goto error; + } + + for (unsigned i = 0; i < sheet_json->u.array.length; i++) { + json_value *rule_json = sheet_json->u.array.values[i]; + auto result = parse_property_rule_json(rule_json); + if (!result.ok()) { + error_message = "Invalid external token: " + result.error; + goto error; + } + sheet.push_back(result.value); + } + + return sheet; + +error: + if (sheet_json) json_value_free(sheet_json); + return error_message.c_str(); +} + } // namespace tree_sitter diff --git a/src/compiler/parse_json.h b/src/compiler/parse_json.h new file mode 100644 index 00000000..fda7378c --- /dev/null +++ b/src/compiler/parse_json.h @@ -0,0 +1,29 @@ +#ifndef COMPILER_PARSE_JSON_H_ +#define COMPILER_PARSE_JSON_H_ + +#include +#include +#include "tree_sitter/compiler.h" +#include "compiler/grammar.h" +#include "compiler/property_sheet.h" +#include "compiler/util/result.h" + +namespace tree_sitter { + +struct ParseGrammarResult { + std::string name; + InputGrammar grammar; + std::string error_message; +}; + +struct ParsePropertySheetResult { + PropertySheet property_sheet; + std::string error_message; +}; + +ParseGrammarResult parse_grammar_json(const std::string &); +util::Result parse_property_sheet_json(const std::string &); + +} // namespace tree_sitter + +#endif // COMPILER_PARSE_JSON_H_ diff --git a/src/compiler/property_sheet.h b/src/compiler/property_sheet.h new file mode 100644 index 00000000..c7663175 --- /dev/null +++ b/src/compiler/property_sheet.h @@ -0,0 +1,38 @@ +#ifndef COMPILER_PROPERTY_SHEET_H_ +#define COMPILER_PROPERTY_SHEET_H_ + +#include +#include +#include + +namespace tree_sitter { + +struct PropertySelectorStep { + std::string type; + bool named; + bool is_immediate; + int index; + + inline bool operator==(const PropertySelectorStep &other) const { + return + type == other.type && + named == other.named && + is_immediate == other.is_immediate && + index == other.index; + } +}; + +typedef std::vector PropertySelector; + +typedef std::map PropertySet; + +struct PropertyRule { + std::vector selectors; + PropertySet properties; +}; + +typedef std::vector PropertySheet; + +} // namespace tree_sitter + +#endif // COMPILER_PROPERTY_SHEET_H_ diff --git a/src/compiler/property_table.h b/src/compiler/property_table.h new file mode 100644 index 00000000..e7fb4b9f --- /dev/null +++ b/src/compiler/property_table.h @@ -0,0 +1,46 @@ +#ifndef COMPILER_PROPERTY_TABLE_H_ +#define COMPILER_PROPERTY_TABLE_H_ + +#include +#include +#include +#include "compiler/property_sheet.h" + +namespace tree_sitter { + +struct PropertyTransition { + std::string type; + bool named; + int index; + unsigned state_id; + + bool operator==(const PropertyTransition &other) const { + return + type == other.type && + named == other.named && + index == other.index && + state_id == other.state_id; + } +}; + +struct PropertyState { + std::vector transitions; + unsigned default_next_state_id; + unsigned property_set_id; + + bool operator==(const PropertyState &other) const { + return + transitions == other.transitions && + default_next_state_id == other.default_next_state_id && + property_set_id == other.property_set_id; + } +}; + +struct PropertyTable { + std::vector states; + std::vector property_sets; +}; + +} // namespace tree_sitter + +#endif // COMPILER_PROPERTY_TABLE_H_ diff --git a/src/compiler/util/result.h b/src/compiler/util/result.h new file mode 100644 index 00000000..8e444dc7 --- /dev/null +++ b/src/compiler/util/result.h @@ -0,0 +1,24 @@ +#ifndef COMPILER_UTIL_RESULT_H_ +#define COMPILER_UTIL_RESULT_H_ + +#include + +namespace tree_sitter { +namespace util { + +template +struct Result { + Value value; + std::string error; + + inline Result() : error("Empty") {} + inline Result(Value &&v) : value(v) {} + inline Result(const std::string &message) : error(message) {} + inline Result(const char *message) : error(message) {} + inline bool ok() const { return error.empty(); } +}; + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_RESULT_H_ From 5fdbb2483d7fe3a55e8aada1d813b994912c4212 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 21:37:48 -0700 Subject: [PATCH 02/10] Always build 64-bit on macOS --- project.gyp | 5 +++++ tests.gyp | 2 ++ 2 files changed, 7 insertions(+) diff --git a/project.gyp b/project.gyp index 9c70a9dc..53f95958 100644 --- a/project.gyp +++ b/project.gyp @@ -124,6 +124,7 @@ 'cflags': [ '-g' ], 'ldflags': [ '-g' ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', }, @@ -133,6 +134,7 @@ 'cflags': [ '-g' ], 'ldflags': [ '-g' ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', 'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'], @@ -145,6 +147,9 @@ 'Release': { 'cflags': [ '-O2', '-fno-strict-aliasing' ], 'cflags!': [ '-O3', '-fstrict-aliasing' ], + 'xcode_settings': { + 'ARCHS': ['x86_64'], + }, }, }, diff --git a/tests.gyp b/tests.gyp index ed97c3e4..11c7769f 100644 --- a/tests.gyp +++ b/tests.gyp @@ -83,6 +83,7 @@ ], 'ldflags': ['-g'], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'], 'GCC_OPTIMIZATION_LEVEL': '0', @@ -104,6 +105,7 @@ }] ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'ALWAYS_SEARCH_USER_PATHS': 'NO', } From 00a94ac0407a0b985aca5cf42b569a612d5ae748 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 21:38:03 -0700 Subject: [PATCH 03/10] Fix javascript error fixture --- test/fixtures/error_corpus/javascript_errors.txt | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index fb31d9e2..1717b85d 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -138,7 +138,8 @@ var x = !!! Errors inside of a template string substitution ========================================================= -const a = `b c ${d +} f g` +const a = `b c ${d += } f g` +const h = `i ${j(k} l` --- @@ -146,7 +147,13 @@ const a = `b c ${d +} f g` (lexical_declaration (variable_declarator (identifier) - (template_string (template_substitution (identifier) (ERROR)))))) + (template_string (template_substitution (identifier) (ERROR))))) + (lexical_declaration + (variable_declarator + (identifier) + (template_string (template_substitution (call_expression + (identifier) + (arguments (identifier) (MISSING)))))))) ========================================================= Long sequences of invalid tokens From 1129759b52ce934ac226692ef4afa0959b079211 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 21:38:19 -0700 Subject: [PATCH 04/10] Don't include null index in property table json --- src/compiler/generate_code/property_table_json.cc | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/src/compiler/generate_code/property_table_json.cc b/src/compiler/generate_code/property_table_json.cc index 82dc00dd..dabe2b2f 100644 --- a/src/compiler/generate_code/property_table_json.cc +++ b/src/compiler/generate_code/property_table_json.cc @@ -78,8 +78,10 @@ class CodeGenerator { add(transition.type); add("\",\"named\":"); add(transition.named ? "true" : "false"); - add(",\"index\":"); - add(transition.index == -1 ? "null" : to_string(transition.index)); + if (transition.index != -1) { + add(",\"index\":"); + add(to_string(transition.index)); + } add(",\"state_id\": "); add(to_string(transition.state_id)); add("}"); @@ -97,8 +99,7 @@ class CodeGenerator { }; string property_table_json(PropertyTable table) { - CodeGenerator g; - return g.generate(table); + return CodeGenerator().generate(table); } } // namespace generate_code From 0aac2ab7226116ad8b64342953aea8d290b396d0 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 21:59:28 -0700 Subject: [PATCH 05/10] :arrow_up: bandit --- externals/bandit | 2 +- test/compiler/prepare_grammar/extract_tokens_test.cc | 2 +- test/compiler/prepare_grammar/intern_symbols_test.cc | 2 +- test/helpers/scope_sequence.cc | 1 + test/runtime/node_test.cc | 2 +- test/test_helper.h | 1 + 6 files changed, 6 insertions(+), 4 deletions(-) diff --git a/externals/bandit b/externals/bandit index f14ade4f..bfdb8a33 160000 --- a/externals/bandit +++ b/externals/bandit @@ -1 +1 @@ -Subproject commit f14ade4fbba72f2e8ba3a7f47318eb07614a39d4 +Subproject commit bfdb8a3322a2e54b11aea64d84f9788d83477e83 diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index da2ae60a..5ea6e469 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -181,7 +181,7 @@ describe("extract_tokens", []() { Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, true }, - })) + })); }); it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() { diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 7b7f3624..6816dad4 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -84,7 +84,7 @@ describe("intern_symbols", []() { VariableTypeNamed, Symbol::non_terminal(2), }, - })) + })); }); }); diff --git a/test/helpers/scope_sequence.cc b/test/helpers/scope_sequence.cc index 1121b80e..34109b76 100644 --- a/test/helpers/scope_sequence.cc +++ b/test/helpers/scope_sequence.cc @@ -7,6 +7,7 @@ using std::string; using std::cout; +using namespace snowhouse; static void append_text_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, diff --git a/test/runtime/node_test.cc b/test/runtime/node_test.cc index d0aa69e3..3a856403 100644 --- a/test/runtime/node_test.cc +++ b/test/runtime/node_test.cc @@ -310,7 +310,7 @@ describe("Node", [&]() { AssertThat(ts_node_start_point(child7), Equals({ 8, 0 })); AssertThat(ts_node_end_point(child7), Equals({ 8, 1 })); - AssertThat(ts_node_child_count(child6), Equals(3)) + AssertThat(ts_node_child_count(child6), Equals(3)); TSNode left_brace = ts_node_child(child6, 0); TSNode pair = ts_node_child(child6, 1); diff --git a/test/test_helper.h b/test/test_helper.h index 99db971a..04fe1ffb 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -11,6 +11,7 @@ namespace tree_sitter {} using namespace std; using namespace bandit; +using namespace snowhouse; using namespace tree_sitter; #define START_TEST go_bandit([]() { From c79929f8ea81915c7027c19dd732bf1105c4f652 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 22:09:45 -0700 Subject: [PATCH 06/10] Include cstring for memcmp --- test/helpers/tree_helpers.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index e8b3ace4..fdbcc6db 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -2,6 +2,7 @@ #include "helpers/tree_helpers.h" #include "helpers/point_helpers.h" #include +#include using std::string; using std::to_string; From 9c6f5c98368be30a28ff2494fe55c2b722f7d6a4 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Wed, 3 Oct 2018 23:39:40 -0700 Subject: [PATCH 07/10] Add comments to PropertyTableBuilder --- .../build_tables/property_table_builder.cc | 87 ++++++++++++++----- 1 file changed, 67 insertions(+), 20 deletions(-) diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc index 813c7fc5..d19b471f 100644 --- a/src/compiler/build_tables/property_table_builder.cc +++ b/src/compiler/build_tables/property_table_builder.cc @@ -20,6 +20,10 @@ using std::map; namespace tree_sitter { namespace build_tables { +// A position within a selector for a particular rule set. +// For example, in a selector like `a > b`, this might +// describe the state of having descended into an `a`, +// but not a `b`. struct PropertyItem { unsigned rule_id; unsigned selector_id; @@ -41,6 +45,9 @@ struct PropertyItem { } }; +// A set of possible positions within different selectors. +// This directly represents a state of the property-matching +// state machine. struct PropertyItemSet { set entries; @@ -49,6 +56,24 @@ struct PropertyItemSet { } }; +// A set of properties that matched via a certain selector. +// These are ordered according to the usual CSS rules: +// specificity, falling back to the order in the original sheet. +struct PropertySelectorMatch { + unsigned specificity; + unsigned rule_id; + unsigned selector_id; + const PropertySet *property_set; + + bool operator<(const PropertySelectorMatch &other) const { + if (specificity < other.specificity) return true; + if (specificity > other.specificity) return false; + if (rule_id < other.rule_id) return true; + if (rule_id > other.rule_id) return false; + return selector_id < other.selector_id; + } +}; + } // namespace build_tables } // namespace tree_sitter @@ -56,6 +81,9 @@ namespace std { using tree_sitter::util::hash_combine; +// PropertyItemSets must be hashed because in the process of building +// the table, we maintain a map of existing property item sets to +// state ids. template <> struct hash { size_t operator()(const tree_sitter::build_tables::PropertyItemSet &item_set) const { @@ -70,6 +98,8 @@ struct hash { } }; +// PropertyTransitions must be hashed because we represent state +// transitions as a map of PropertyTransitions to successor PropertyItemSets. template <> struct hash { size_t operator()(const tree_sitter::PropertyTransition &transition) const { @@ -82,6 +112,7 @@ struct hash { } }; +// PropertySets must be hashed so that we can use a map to dedup them. template <> struct hash { size_t operator()(const tree_sitter::PropertySet &set) const { @@ -103,21 +134,6 @@ namespace build_tables { typedef unsigned StateId; typedef unsigned PropertySetId; -struct PropertySelectorMatch { - unsigned specificity; - unsigned rule_id; - unsigned selector_id; - const PropertySet *property_set; - - bool operator<(const PropertySelectorMatch &other) const { - if (specificity < other.specificity) return true; - if (specificity > other.specificity) return false; - if (rule_id < other.rule_id) return true; - if (rule_id > other.rule_id) return false; - return selector_id < other.selector_id; - } -}; - struct PropertyTableBuilder { PropertySheet sheet; PropertyTable result; @@ -150,6 +166,8 @@ struct PropertyTableBuilder { return result; } + // Different item sets can actually produce the same state, so the + // states need to be explicitly deduped as a post-processing step. void remove_duplicate_states() { map replacements; @@ -210,6 +228,8 @@ struct PropertyTableBuilder { } } + // Get the next part of the selector that needs to be matched for a given item. + // This returns null if the item has consumed its entire selector. const PropertySelectorStep *next_step_for_item(const PropertyItem &item) { const PropertySelector &selector = sheet[item.rule_id].selectors[item.selector_id]; if (item.step_id < selector.size()) { @@ -219,6 +239,8 @@ struct PropertyTableBuilder { } } + // Get the previous part of the selector that was matched for a given item. + // This returns null if the item has not consumed anything. const PropertySelectorStep *prev_step_for_item(const PropertyItem &item) { if (item.step_id > 0) { return &sheet[item.rule_id].selectors[item.selector_id][item.step_id]; @@ -235,7 +257,8 @@ struct PropertyTableBuilder { return result; } - bool step_is_superset(const PropertySelectorStep &step, const PropertyTransition &transition) { + // Check if the given state transition matches the given part of a selector. + bool step_matches_transition(const PropertySelectorStep &step, const PropertyTransition &transition) { return step.type == transition.type && step.named == transition.named && @@ -243,11 +266,15 @@ struct PropertyTableBuilder { } void populate_state(const PropertyItemSet &item_set, StateId state_id) { - std::unordered_map transitions; - std::vector selector_matches; + unordered_map transitions; + vector selector_matches; for (const PropertyItem &item : item_set.entries) { const PropertySelectorStep *next_step = next_step_for_item(item); + + // If this item has more elements to match for its selector, then + // there's a state transition for elements that match the next + // part of the selector. if (next_step) { transitions[PropertyTransition{ next_step->type, @@ -255,7 +282,11 @@ struct PropertyTableBuilder { next_step->index, 0 }] = PropertyItemSet(); - } else { + } + + // If the item has matched its entire selector, then the property set + // for the item's rule applies in this state. + else { const PropertyRule &rule = sheet[item.rule_id]; selector_matches.push_back(PropertySelectorMatch { specificity_for_selector(rule.selectors[item.selector_id]), @@ -266,6 +297,8 @@ struct PropertyTableBuilder { } } + // For each element that follows an item in this set, + // compute the next item set after descending through that element. for (auto &pair : transitions) { PropertyTransition transition = pair.first; PropertyItemSet &next_item_set = pair.second; @@ -274,11 +307,18 @@ struct PropertyTableBuilder { const PropertySelectorStep *next_step = next_step_for_item(item); const PropertySelectorStep *prev_step = prev_step_for_item(item); if (next_step) { - if (step_is_superset(*next_step, transition)) { + + // If the element matches the next part of the item, advance the + // item to the next part of its selector. + if (step_matches_transition(*next_step, transition)) { PropertyItem next_item = item; next_item.step_id++; next_item_set.entries.insert(next_item); } + + // If the element does not match, and the item is in the middle + // of an immediate child selector, then remove it from the + // next item set. Otherwise, keep it unchanged. if (!prev_step || !prev_step->is_immediate) { next_item_set.entries.insert(item); } @@ -289,6 +329,9 @@ struct PropertyTableBuilder { result.states[state_id].transitions.push_back(transition); } + // Compute the default successor item set - the item set that + // we should advance to if the next element doesn't match any + // of the next elements in the item set's selectors. PropertyItemSet default_next_item_set; for (const PropertyItem &item : item_set.entries) { const PropertySelectorStep *next_step = next_step_for_item(item); @@ -300,6 +343,9 @@ struct PropertyTableBuilder { result.states[state_id].default_next_state_id = add_state(default_next_item_set); + // Sort the matching property sets by ascending specificity and by + // their order in the sheet. This way, more specific selectors and later + // rules will override less specific selectors and earlier rules. PropertySet properties; std::sort(selector_matches.begin(), selector_matches.end()); for (auto &match : selector_matches) { @@ -308,6 +354,7 @@ struct PropertyTableBuilder { } } + // Add the final property set to the deduped list. result.states[state_id].property_set_id = add_property_set(properties); } From 277bf2f03d8a9b7742cf59cc5b0f03ed06cf6338 Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Thu, 4 Oct 2018 15:03:17 -0700 Subject: [PATCH 08/10] Add text pattern handling to property table generation --- .../build_tables/property_table_builder.cc | 9 +++++++- .../generate_code/property_table_json.cc | 19 +++++++++++---- src/compiler/parse_json.cc | 5 ++++ src/compiler/property_sheet.h | 1 + src/compiler/property_table.h | 23 +++++++++++++++++++ 5 files changed, 52 insertions(+), 5 deletions(-) diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc index d19b471f..ec9d79db 100644 --- a/src/compiler/build_tables/property_table_builder.cc +++ b/src/compiler/build_tables/property_table_builder.cc @@ -107,6 +107,7 @@ struct hash { hash_combine(&result, transition.type); hash_combine(&result, transition.named); hash_combine(&result, transition.index); + hash_combine(&result, transition.text_pattern); hash_combine(&result, transition.state_id); return result; } @@ -253,6 +254,7 @@ struct PropertyTableBuilder { unsigned result = selector.size(); for (const PropertySelectorStep &step : selector) { if (step.index != -1) result++; + if (!step.text_pattern.empty()) result++; } return result; } @@ -262,7 +264,8 @@ struct PropertyTableBuilder { return step.type == transition.type && step.named == transition.named && - (step.index == transition.index || step.index == -1); + (step.index == transition.index || step.index == -1) && + (step.text_pattern == transition.text_pattern || step.text_pattern.empty()); } void populate_state(const PropertyItemSet &item_set, StateId state_id) { @@ -280,6 +283,7 @@ struct PropertyTableBuilder { next_step->type, next_step->named, next_step->index, + next_step->text_pattern, 0 }] = PropertyItemSet(); } @@ -329,6 +333,9 @@ struct PropertyTableBuilder { result.states[state_id].transitions.push_back(transition); } + auto &transition_list = result.states[state_id].transitions; + std::sort(transition_list.begin(), transition_list.end()); + // Compute the default successor item set - the item set that // we should advance to if the next element doesn't match any // of the next elements in the item set's selectors. diff --git a/src/compiler/generate_code/property_table_json.cc b/src/compiler/generate_code/property_table_json.cc index dabe2b2f..f2ca1061 100644 --- a/src/compiler/generate_code/property_table_json.cc +++ b/src/compiler/generate_code/property_table_json.cc @@ -74,14 +74,18 @@ class CodeGenerator { void add_transition(const PropertyTransition &transition) { add("{"); - add("\"type\":\""); - add(transition.type); - add("\",\"named\":"); + add("\"type\":"); + add_string(transition.type); + add(",\"named\":"); add(transition.named ? "true" : "false"); if (transition.index != -1) { add(",\"index\":"); add(to_string(transition.index)); } + if (!transition.text_pattern.empty()) { + add(",\"text\":"); + add_string(transition.text_pattern); + } add(",\"state_id\": "); add(to_string(transition.state_id)); add("}"); @@ -89,13 +93,20 @@ class CodeGenerator { void add_string(const string &s) { add("\""); - add(s); + for (const char c : s) { + if (c == '"') add("\\"); + add(c); + } add("\""); } void add(string input) { buffer += input; } + + void add(char c) { + buffer += c; + } }; string property_table_json(PropertyTable table) { diff --git a/src/compiler/parse_json.cc b/src/compiler/parse_json.cc index 5ac8ffc4..0b32ad6f 100644 --- a/src/compiler/parse_json.cc +++ b/src/compiler/parse_json.cc @@ -409,6 +409,11 @@ Result parse_property_rule_json(json_value *rule_json) { step.index = -1; } + json_value text_pattern_json = selector_step_json->operator[]("text"); + if (text_pattern_json.type == json_string) { + step.text_pattern = text_pattern_json.u.string.ptr; + } + selector.push_back(step); } diff --git a/src/compiler/property_sheet.h b/src/compiler/property_sheet.h index c7663175..38427d3d 100644 --- a/src/compiler/property_sheet.h +++ b/src/compiler/property_sheet.h @@ -12,6 +12,7 @@ struct PropertySelectorStep { bool named; bool is_immediate; int index; + std::string text_pattern; inline bool operator==(const PropertySelectorStep &other) const { return diff --git a/src/compiler/property_table.h b/src/compiler/property_table.h index e7fb4b9f..eb1e0cb5 100644 --- a/src/compiler/property_table.h +++ b/src/compiler/property_table.h @@ -12,6 +12,7 @@ struct PropertyTransition { std::string type; bool named; int index; + std::string text_pattern; unsigned state_id; bool operator==(const PropertyTransition &other) const { @@ -19,8 +20,30 @@ struct PropertyTransition { type == other.type && named == other.named && index == other.index && + text_pattern == other.text_pattern && state_id == other.state_id; } + + bool operator<(const PropertyTransition &other) const { + if (type < other.type) return true; + if (type > other.type) return false; + if (named && !other.named) return true; + if (!named && other.named) return false; + + // The lack of a specific child index is represented as -1. + // It should be sorted *after* transitions with a specific + // child index. + if (index > other.index) return true; + if (index < other.index) return false; + + // The lack of a text pattern is represented as the empty string. + // This should be sorted *after* transitions with a specific + // text pattern. + if (text_pattern.size() > other.text_pattern.size()) return true; + if (text_pattern.size() < other.text_pattern.size()) return false; + + return state_id < other.state_id; + } }; struct PropertyState { From 91fb64984ff27b9e4181f5e11e66b9fb04ba2f3e Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sat, 13 Oct 2018 20:15:07 -0700 Subject: [PATCH 09/10] Fix order of operations in PropertyTableBuilder --- src/compiler/build_tables/property_table_builder.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc index ec9d79db..c4263b8e 100644 --- a/src/compiler/build_tables/property_table_builder.cc +++ b/src/compiler/build_tables/property_table_builder.cc @@ -348,7 +348,8 @@ struct PropertyTableBuilder { } } - result.states[state_id].default_next_state_id = add_state(default_next_item_set); + StateId default_next_state_id = add_state(default_next_item_set); + result.states[state_id].default_next_state_id = default_next_state_id; // Sort the matching property sets by ascending specificity and by // their order in the sheet. This way, more specific selectors and later From 2b6857bb45024cd55fd568fd9cb4d26bbd3feffb Mon Sep 17 00:00:00 2001 From: Max Brunsfeld Date: Sun, 14 Oct 2018 14:36:33 -0700 Subject: [PATCH 10/10] Fix breaking of property selector specificity ties --- .../build_tables/property_table_builder.cc | 51 ++++++++++++++++++- src/compiler/property_table.h | 21 -------- 2 files changed, 49 insertions(+), 23 deletions(-) diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc index c4263b8e..37aa6b83 100644 --- a/src/compiler/build_tables/property_table_builder.cc +++ b/src/compiler/build_tables/property_table_builder.cc @@ -74,6 +74,43 @@ struct PropertySelectorMatch { } }; +struct PropertyTransitionEntry { + PropertyTransition transition; + unsigned latest_matching_rule_id; + + unsigned specificity() const { + return + (transition.index == -1 ? 0 : 1) + + (transition.text_pattern.empty() ? 0 : 1); + } + + // When using the final state machine, the runtime library computes + // a node's property by descending from the root of the syntax + // tree to that node. For each ancestor node on the way, it should + // update its state using the *first* matching entry of the + // `transitions` list. Therefore, the order of the transitions + // must match the normal tie-breaking rules of CSS. + bool operator<(const PropertyTransitionEntry &other) const { + // If two transitions match different node types, they can't + // both match a given node, so their order is arbitrary. + if (transition.type < other.transition.type) return true; + if (transition.type > other.transition.type) return false; + if (transition.named && !other.transition.named) return true; + if (!transition.named && other.transition.named) return false; + + // More specific transitions should be considered before less + // specific ones. + if (specificity() > other.specificity()) return true; + if (specificity() < other.specificity()) return false; + + // If there are two transitions with a specificity tie (e.g. one + // with an `:nth-child` pseudo-class and a one with a `:text` + // pseudo-class), then the one whose matching properties appeared + // later in the cascade should be considered first. + return latest_matching_rule_id > other.latest_matching_rule_id; + } +}; + } // namespace build_tables } // namespace tree_sitter @@ -303,9 +340,11 @@ struct PropertyTableBuilder { // For each element that follows an item in this set, // compute the next item set after descending through that element. + vector transition_list; for (auto &pair : transitions) { PropertyTransition transition = pair.first; PropertyItemSet &next_item_set = pair.second; + unsigned latest_matching_rule_id = 0; for (const PropertyItem &item : item_set.entries) { const PropertySelectorStep *next_step = next_step_for_item(item); @@ -318,6 +357,12 @@ struct PropertyTableBuilder { PropertyItem next_item = item; next_item.step_id++; next_item_set.entries.insert(next_item); + + // If the item is at the end of its selector, record its rule id + // so that it can be used when sorting the transitions. + if (!next_step_for_item(next_item) && next_item.rule_id > latest_matching_rule_id) { + latest_matching_rule_id = item.rule_id; + } } // If the element does not match, and the item is in the middle @@ -330,11 +375,13 @@ struct PropertyTableBuilder { } transition.state_id = add_state(next_item_set); - result.states[state_id].transitions.push_back(transition); + transition_list.push_back(PropertyTransitionEntry {transition, latest_matching_rule_id}); } - auto &transition_list = result.states[state_id].transitions; std::sort(transition_list.begin(), transition_list.end()); + for (auto &entry : transition_list) { + result.states[state_id].transitions.push_back(entry.transition); + } // Compute the default successor item set - the item set that // we should advance to if the next element doesn't match any diff --git a/src/compiler/property_table.h b/src/compiler/property_table.h index eb1e0cb5..58c17d62 100644 --- a/src/compiler/property_table.h +++ b/src/compiler/property_table.h @@ -23,27 +23,6 @@ struct PropertyTransition { text_pattern == other.text_pattern && state_id == other.state_id; } - - bool operator<(const PropertyTransition &other) const { - if (type < other.type) return true; - if (type > other.type) return false; - if (named && !other.named) return true; - if (!named && other.named) return false; - - // The lack of a specific child index is represented as -1. - // It should be sorted *after* transitions with a specific - // child index. - if (index > other.index) return true; - if (index < other.index) return false; - - // The lack of a text pattern is represented as the empty string. - // This should be sorted *after* transitions with a specific - // text pattern. - if (text_pattern.size() > other.text_pattern.size()) return true; - if (text_pattern.size() < other.text_pattern.size()) return false; - - return state_id < other.state_id; - } }; struct PropertyState {