diff --git a/externals/bandit b/externals/bandit index f14ade4f..bfdb8a33 160000 --- a/externals/bandit +++ b/externals/bandit @@ -1 +1 @@ -Subproject commit f14ade4fbba72f2e8ba3a7f47318eb07614a39d4 +Subproject commit bfdb8a3322a2e54b11aea64d84f9788d83477e83 diff --git a/include/tree_sitter/compiler.h b/include/tree_sitter/compiler.h index 3db2f7ca..a84d8a75 100644 --- a/include/tree_sitter/compiler.h +++ b/include/tree_sitter/compiler.h @@ -29,6 +29,7 @@ typedef struct { } TSCompileResult; TSCompileResult ts_compile_grammar(const char *input, FILE *log_file); +TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file); #ifdef __cplusplus } diff --git a/project.gyp b/project.gyp index b9818861..dfedfb6f 100644 --- a/project.gyp +++ b/project.gyp @@ -18,12 +18,14 @@ 'src/compiler/build_tables/parse_item.cc', 'src/compiler/build_tables/parse_item_set_builder.cc', 'src/compiler/build_tables/parse_table_builder.cc', + 'src/compiler/build_tables/property_table_builder.cc', 'src/compiler/build_tables/rule_can_be_blank.cc', 'src/compiler/compile.cc', 'src/compiler/generate_code/c_code.cc', + 'src/compiler/generate_code/property_table_json.cc', 'src/compiler/lex_table.cc', 'src/compiler/log.cc', - 'src/compiler/parse_grammar.cc', + 'src/compiler/parse_json.cc', 'src/compiler/parse_table.cc', 'src/compiler/precedence_range.cc', 'src/compiler/prepare_grammar/expand_repeats.cc', @@ -122,6 +124,7 @@ 'cflags': [ '-g' ], 'ldflags': [ '-g' ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', }, @@ -131,6 +134,7 @@ 'cflags': [ '-g' ], 'ldflags': [ '-g' ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'GCC_OPTIMIZATION_LEVEL': '0', 'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'], @@ -143,6 +147,9 @@ 'Release': { 'cflags': [ '-O2', '-fno-strict-aliasing' ], 'cflags!': [ '-O3', '-fstrict-aliasing' ], + 'xcode_settings': { + 'ARCHS': ['x86_64'], + }, }, }, diff --git a/src/compiler/build_tables/property_table_builder.cc b/src/compiler/build_tables/property_table_builder.cc new file mode 100644 index 00000000..37aa6b83 --- /dev/null +++ b/src/compiler/build_tables/property_table_builder.cc @@ -0,0 +1,447 @@ +#include +#include +#include +#include +#include +#include +#include "compiler/property_sheet.h" +#include "compiler/property_table.h" +#include "compiler/build_tables/property_table_builder.h" +#include "compiler/util/hash_combine.h" + +using std::deque; +using std::vector; +using std::pair; +using std::unordered_map; +using std::set; +using std::move; +using std::map; + +namespace tree_sitter { +namespace build_tables { + +// A position within a selector for a particular rule set. +// For example, in a selector like `a > b`, this might +// describe the state of having descended into an `a`, +// but not a `b`. +struct PropertyItem { + unsigned rule_id; + unsigned selector_id; + unsigned step_id; + + bool operator==(const PropertyItem &other) const { + return + rule_id == other.rule_id && + selector_id == other.selector_id && + step_id == other.step_id; + } + + bool operator<(const PropertyItem &other) const { + if (rule_id < other.rule_id) return true; + if (rule_id > other.rule_id) return false; + if (selector_id < other.selector_id) return true; + if (selector_id > other.selector_id) return false; + return step_id < other.step_id; + } +}; + +// A set of possible positions within different selectors. +// This directly represents a state of the property-matching +// state machine. +struct PropertyItemSet { + set entries; + + bool operator==(const PropertyItemSet &other) const { + return entries == other.entries; + } +}; + +// A set of properties that matched via a certain selector. +// These are ordered according to the usual CSS rules: +// specificity, falling back to the order in the original sheet. +struct PropertySelectorMatch { + unsigned specificity; + unsigned rule_id; + unsigned selector_id; + const PropertySet *property_set; + + bool operator<(const PropertySelectorMatch &other) const { + if (specificity < other.specificity) return true; + if (specificity > other.specificity) return false; + if (rule_id < other.rule_id) return true; + if (rule_id > other.rule_id) return false; + return selector_id < other.selector_id; + } +}; + +struct PropertyTransitionEntry { + PropertyTransition transition; + unsigned latest_matching_rule_id; + + unsigned specificity() const { + return + (transition.index == -1 ? 0 : 1) + + (transition.text_pattern.empty() ? 0 : 1); + } + + // When using the final state machine, the runtime library computes + // a node's property by descending from the root of the syntax + // tree to that node. For each ancestor node on the way, it should + // update its state using the *first* matching entry of the + // `transitions` list. Therefore, the order of the transitions + // must match the normal tie-breaking rules of CSS. + bool operator<(const PropertyTransitionEntry &other) const { + // If two transitions match different node types, they can't + // both match a given node, so their order is arbitrary. + if (transition.type < other.transition.type) return true; + if (transition.type > other.transition.type) return false; + if (transition.named && !other.transition.named) return true; + if (!transition.named && other.transition.named) return false; + + // More specific transitions should be considered before less + // specific ones. + if (specificity() > other.specificity()) return true; + if (specificity() < other.specificity()) return false; + + // If there are two transitions with a specificity tie (e.g. one + // with an `:nth-child` pseudo-class and a one with a `:text` + // pseudo-class), then the one whose matching properties appeared + // later in the cascade should be considered first. + return latest_matching_rule_id > other.latest_matching_rule_id; + } +}; + +} // namespace build_tables +} // namespace tree_sitter + +namespace std { + +using tree_sitter::util::hash_combine; + +// PropertyItemSets must be hashed because in the process of building +// the table, we maintain a map of existing property item sets to +// state ids. +template <> +struct hash { + size_t operator()(const tree_sitter::build_tables::PropertyItemSet &item_set) const { + size_t result = 0; + hash_combine(&result, item_set.entries.size()); + for (const auto &item : item_set.entries) { + hash_combine(&result, item.rule_id); + hash_combine(&result, item.selector_id); + hash_combine(&result, item.step_id); + } + return result; + } +}; + +// PropertyTransitions must be hashed because we represent state +// transitions as a map of PropertyTransitions to successor PropertyItemSets. +template <> +struct hash { + size_t operator()(const tree_sitter::PropertyTransition &transition) const { + size_t result = 0; + hash_combine(&result, transition.type); + hash_combine(&result, transition.named); + hash_combine(&result, transition.index); + hash_combine(&result, transition.text_pattern); + hash_combine(&result, transition.state_id); + return result; + } +}; + +// PropertySets must be hashed so that we can use a map to dedup them. +template <> +struct hash { + size_t operator()(const tree_sitter::PropertySet &set) const { + size_t result = 0; + hash_combine(&result, set.size()); + for (const auto &pair : set) { + hash_combine(&result, pair.first); + hash_combine(&result, pair.second); + } + return result; + } +}; + +} // namespace std + +namespace tree_sitter { +namespace build_tables { + +typedef unsigned StateId; +typedef unsigned PropertySetId; + +struct PropertyTableBuilder { + PropertySheet sheet; + PropertyTable result; + unordered_map ids_by_item_set; + unordered_map ids_by_property_set; + deque> item_set_queue; + + PropertyTableBuilder(const PropertySheet &sheet) : sheet(sheet) {} + + PropertyTable build() { + PropertyItemSet start_item_set; + for (unsigned i = 0; i < sheet.size(); i++) { + PropertyRule &rule = sheet[i]; + for (unsigned j = 0; j < rule.selectors.size(); j++) { + start_item_set.entries.insert(PropertyItem {i, j, 0}); + } + } + + add_state(start_item_set); + while (!item_set_queue.empty()) { + auto entry = item_set_queue.front(); + PropertyItemSet item_set = move(entry.first); + StateId state_id = entry.second; + item_set_queue.pop_front(); + populate_state(item_set, state_id); + } + + remove_duplicate_states(); + + return result; + } + + // Different item sets can actually produce the same state, so the + // states need to be explicitly deduped as a post-processing step. + void remove_duplicate_states() { + map replacements; + + while (true) { + map duplicates; + for (StateId i = 0, size = result.states.size(); i < size; i++) { + for (StateId j = 0; j < i; j++) { + if (!duplicates.count(j) && result.states[j] == result.states[i]) { + duplicates.insert({ i, j }); + break; + } + } + } + + if (duplicates.empty()) break; + + map new_replacements; + for (StateId i = 0, size = result.states.size(); i < size; i++) { + StateId new_state_index = i; + auto duplicate = duplicates.find(i); + if (duplicate != duplicates.end()) { + new_state_index = duplicate->second; + } + + size_t prior_removed = 0; + for (const auto &duplicate : duplicates) { + if (duplicate.first >= new_state_index) break; + prior_removed++; + } + + new_state_index -= prior_removed; + new_replacements.insert({i, new_state_index}); + replacements.insert({ i, new_state_index }); + for (auto &replacement : replacements) { + if (replacement.second == i) { + replacement.second = new_state_index; + } + } + } + + for (auto &state : result.states) { + for (auto &transition : state.transitions) { + auto new_replacement = new_replacements.find(transition.state_id); + if (new_replacement != new_replacements.end()) { + transition.state_id = new_replacement->second; + } + } + + auto new_replacement = new_replacements.find(state.default_next_state_id); + if (new_replacement != new_replacements.end()) { + state.default_next_state_id = new_replacement->second; + } + } + + for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) { + result.states.erase(result.states.begin() + i->first); + } + } + } + + // Get the next part of the selector that needs to be matched for a given item. + // This returns null if the item has consumed its entire selector. + const PropertySelectorStep *next_step_for_item(const PropertyItem &item) { + const PropertySelector &selector = sheet[item.rule_id].selectors[item.selector_id]; + if (item.step_id < selector.size()) { + return &selector[item.step_id]; + } else { + return nullptr; + } + } + + // Get the previous part of the selector that was matched for a given item. + // This returns null if the item has not consumed anything. + const PropertySelectorStep *prev_step_for_item(const PropertyItem &item) { + if (item.step_id > 0) { + return &sheet[item.rule_id].selectors[item.selector_id][item.step_id]; + } else { + return nullptr; + } + } + + unsigned specificity_for_selector(const PropertySelector &selector) { + unsigned result = selector.size(); + for (const PropertySelectorStep &step : selector) { + if (step.index != -1) result++; + if (!step.text_pattern.empty()) result++; + } + return result; + } + + // Check if the given state transition matches the given part of a selector. + bool step_matches_transition(const PropertySelectorStep &step, const PropertyTransition &transition) { + return + step.type == transition.type && + step.named == transition.named && + (step.index == transition.index || step.index == -1) && + (step.text_pattern == transition.text_pattern || step.text_pattern.empty()); + } + + void populate_state(const PropertyItemSet &item_set, StateId state_id) { + unordered_map transitions; + vector selector_matches; + + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + + // If this item has more elements to match for its selector, then + // there's a state transition for elements that match the next + // part of the selector. + if (next_step) { + transitions[PropertyTransition{ + next_step->type, + next_step->named, + next_step->index, + next_step->text_pattern, + 0 + }] = PropertyItemSet(); + } + + // If the item has matched its entire selector, then the property set + // for the item's rule applies in this state. + else { + const PropertyRule &rule = sheet[item.rule_id]; + selector_matches.push_back(PropertySelectorMatch { + specificity_for_selector(rule.selectors[item.selector_id]), + item.rule_id, + item.selector_id, + &rule.properties, + }); + } + } + + // For each element that follows an item in this set, + // compute the next item set after descending through that element. + vector transition_list; + for (auto &pair : transitions) { + PropertyTransition transition = pair.first; + PropertyItemSet &next_item_set = pair.second; + unsigned latest_matching_rule_id = 0; + + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + const PropertySelectorStep *prev_step = prev_step_for_item(item); + if (next_step) { + + // If the element matches the next part of the item, advance the + // item to the next part of its selector. + if (step_matches_transition(*next_step, transition)) { + PropertyItem next_item = item; + next_item.step_id++; + next_item_set.entries.insert(next_item); + + // If the item is at the end of its selector, record its rule id + // so that it can be used when sorting the transitions. + if (!next_step_for_item(next_item) && next_item.rule_id > latest_matching_rule_id) { + latest_matching_rule_id = item.rule_id; + } + } + + // If the element does not match, and the item is in the middle + // of an immediate child selector, then remove it from the + // next item set. Otherwise, keep it unchanged. + if (!prev_step || !prev_step->is_immediate) { + next_item_set.entries.insert(item); + } + } + } + + transition.state_id = add_state(next_item_set); + transition_list.push_back(PropertyTransitionEntry {transition, latest_matching_rule_id}); + } + + std::sort(transition_list.begin(), transition_list.end()); + for (auto &entry : transition_list) { + result.states[state_id].transitions.push_back(entry.transition); + } + + // Compute the default successor item set - the item set that + // we should advance to if the next element doesn't match any + // of the next elements in the item set's selectors. + PropertyItemSet default_next_item_set; + for (const PropertyItem &item : item_set.entries) { + const PropertySelectorStep *next_step = next_step_for_item(item); + const PropertySelectorStep *prev_step = prev_step_for_item(item); + if (next_step && (!prev_step || !prev_step->is_immediate)) { + default_next_item_set.entries.insert(item); + } + } + + StateId default_next_state_id = add_state(default_next_item_set); + result.states[state_id].default_next_state_id = default_next_state_id; + + // Sort the matching property sets by ascending specificity and by + // their order in the sheet. This way, more specific selectors and later + // rules will override less specific selectors and earlier rules. + PropertySet properties; + std::sort(selector_matches.begin(), selector_matches.end()); + for (auto &match : selector_matches) { + for (auto &pair : *match.property_set) { + properties[pair.first] = pair.second; + } + } + + // Add the final property set to the deduped list. + result.states[state_id].property_set_id = add_property_set(properties); + } + + StateId add_state(const PropertyItemSet &item_set) { + auto entry = ids_by_item_set.find(item_set); + if (entry == ids_by_item_set.end()) { + StateId id = result.states.size(); + ids_by_item_set[item_set] = id; + result.states.push_back(PropertyState {}); + item_set_queue.push_back({item_set, id}); + return id; + } else { + return entry->second; + } + } + + PropertySetId add_property_set(const PropertySet &property_set) { + auto entry = ids_by_property_set.find(property_set); + if (entry == ids_by_property_set.end()) { + PropertySetId id = result.property_sets.size(); + ids_by_property_set[property_set] = id; + result.property_sets.push_back(property_set); + return id; + } else { + return entry->second; + } + } +}; + +PropertyTable build_property_table(const PropertySheet &sheet) { + return PropertyTableBuilder(sheet).build(); +} + +} // namespace build_tables +} // namespace tree_sitter diff --git a/src/compiler/build_tables/property_table_builder.h b/src/compiler/build_tables/property_table_builder.h new file mode 100644 index 00000000..25b94ce7 --- /dev/null +++ b/src/compiler/build_tables/property_table_builder.h @@ -0,0 +1,15 @@ +#ifndef COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ +#define COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ + +#include +#include "compiler/property_table.h" + +namespace tree_sitter { +namespace build_tables { + +PropertyTable build_property_table(const PropertySheet &); + +} // namespace build_tables +} // namespace tree_sitter + +#endif // COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_ diff --git a/src/compiler/compile.cc b/src/compiler/compile.cc index 83bdbcc2..4a9bd663 100644 --- a/src/compiler/compile.cc +++ b/src/compiler/compile.cc @@ -1,11 +1,13 @@ #include "tree_sitter/compiler.h" #include "compiler/prepare_grammar/prepare_grammar.h" #include "compiler/build_tables/parse_table_builder.h" +#include "compiler/build_tables/property_table_builder.h" #include "compiler/generate_code/c_code.h" +#include "compiler/generate_code/property_table_json.h" #include "compiler/syntax_grammar.h" #include "compiler/log.h" #include "compiler/lexical_grammar.h" -#include "compiler/parse_grammar.h" +#include "compiler/parse_json.h" #include "json.h" namespace tree_sitter { @@ -20,7 +22,7 @@ using std::make_tuple; extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) { set_log_file(log_file); - ParseGrammarResult parse_result = parse_grammar(string(input)); + ParseGrammarResult parse_result = parse_grammar_json(string(input)); if (!parse_result.error_message.empty()) { return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar}; } @@ -60,4 +62,15 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone}; } +extern "C" TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file) { + set_log_file(log_file); + auto parse_result = parse_property_sheet_json(string(input)); + if (!parse_result.ok()) { + return {nullptr, strdup(parse_result.error.c_str()), TSCompileErrorTypeInvalidGrammar}; + } + PropertyTable table = build_tables::build_property_table(parse_result.value); + string code = generate_code::property_table_json(table); + return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone}; +} + } // namespace tree_sitter diff --git a/src/compiler/generate_code/property_table_json.cc b/src/compiler/generate_code/property_table_json.cc new file mode 100644 index 00000000..f2ca1061 --- /dev/null +++ b/src/compiler/generate_code/property_table_json.cc @@ -0,0 +1,117 @@ +#include "compiler/generate_code/property_table_json.h" +#include +#include + +using std::string; +using std::to_string; +using std::vector; + +namespace tree_sitter { +namespace generate_code { + +class CodeGenerator { + string buffer; + + public: + string generate(const PropertyTable &table) { + add("{"); + add("\"states\":"); + add("["); + for (unsigned i = 0; i < table.states.size(); i++) { + const PropertyState &state = table.states[i]; + if (i != 0) add(","); + add_state(i, state); + } + add("],"); + add("\"property_sets\":"); + add("["); + bool first = true; + for (const PropertySet &property_set : table.property_sets) { + if (!first) add(","); + first = false; + add_property_set(property_set); + } + add("]"); + add("}"); + return buffer; + } + + private: + void add_state(unsigned i, const PropertyState &state) { + add("{"); + add("\"id\":"); + add(to_string(i)); + add(",\"property_set_id\":"); + add(to_string(state.property_set_id)); + add(","); + add("\"transitions\":["); + bool first = true; + for (const auto &transition : state.transitions) { + if (!first) add(","); + first = false; + add_transition(transition); + } + add("],"); + add("\"default_next_state_id\":"); + add(to_string(state.default_next_state_id)); + add("}"); + } + + void add_property_set(const PropertySet &property_set) { + add("{"); + bool first = true; + for (const auto &pair : property_set) { + if (!first) add(","); + first = false; + add("\""); + add(pair.first); + add("\":\""); + add(pair.second); + add("\""); + } + add("}"); + } + + void add_transition(const PropertyTransition &transition) { + add("{"); + add("\"type\":"); + add_string(transition.type); + add(",\"named\":"); + add(transition.named ? "true" : "false"); + if (transition.index != -1) { + add(",\"index\":"); + add(to_string(transition.index)); + } + if (!transition.text_pattern.empty()) { + add(",\"text\":"); + add_string(transition.text_pattern); + } + add(",\"state_id\": "); + add(to_string(transition.state_id)); + add("}"); + } + + void add_string(const string &s) { + add("\""); + for (const char c : s) { + if (c == '"') add("\\"); + add(c); + } + add("\""); + } + + void add(string input) { + buffer += input; + } + + void add(char c) { + buffer += c; + } +}; + +string property_table_json(PropertyTable table) { + return CodeGenerator().generate(table); +} + +} // namespace generate_code +} // namespace tree_sitter diff --git a/src/compiler/generate_code/property_table_json.h b/src/compiler/generate_code/property_table_json.h new file mode 100644 index 00000000..7b6af342 --- /dev/null +++ b/src/compiler/generate_code/property_table_json.h @@ -0,0 +1,15 @@ +#ifndef COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ +#define COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ + +#include +#include "compiler/property_table.h" + +namespace tree_sitter { +namespace generate_code { + +std::string property_table_json(PropertyTable); + +} // namespace generate_code +} // namespace tree_sitter + +#endif // COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_ diff --git a/src/compiler/parse_grammar.h b/src/compiler/parse_grammar.h deleted file mode 100644 index c24cd9ca..00000000 --- a/src/compiler/parse_grammar.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef COMPILER_GRAMMAR_JSON_H_ -#define COMPILER_GRAMMAR_JSON_H_ - -#include -#include -#include "tree_sitter/compiler.h" -#include "compiler/grammar.h" - -namespace tree_sitter { - -struct ParseGrammarResult { - std::string name; - InputGrammar grammar; - std::string error_message; -}; - -ParseGrammarResult parse_grammar(const std::string &); - -} // namespace tree_sitter - -#endif // COMPILER_GRAMMAR_JSON_H_ diff --git a/src/compiler/parse_grammar.cc b/src/compiler/parse_json.cc similarity index 62% rename from src/compiler/parse_grammar.cc rename to src/compiler/parse_json.cc index 7b69ed61..0b32ad6f 100644 --- a/src/compiler/parse_grammar.cc +++ b/src/compiler/parse_json.cc @@ -1,10 +1,11 @@ -#include "compiler/parse_grammar.h" +#include "compiler/parse_json.h" #include #include #include #include #include "json.h" #include "compiler/rule.h" +#include "compiler/util/result.h" namespace tree_sitter { @@ -19,17 +20,9 @@ using rules::Metadata; using rules::Pattern; using rules::String; using rules::NamedSymbol; +using util::Result; -struct ParseRuleResult { - Rule rule; - string error_message; - - ParseRuleResult(const string &error_message) : error_message(error_message) {} - ParseRuleResult(const char *error_message) : error_message(error_message) {} - ParseRuleResult(Rule rule) : rule(rule) {} -}; - -ParseRuleResult parse_rule(json_value *rule_json) { +Result parse_rule_json(json_value *rule_json) { string error_message; json_value rule_type_json; string type; @@ -62,11 +55,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - auto result = parse_rule(member_json); - if (!result.error_message.empty()) { - return "Invalid choice member: " + result.error_message; + auto result = parse_rule_json(member_json); + if (!result.ok()) { + return "Invalid choice member: " + result.error; } - members.push_back(result.rule); + members.push_back(result.value); } return Rule::choice(members); } @@ -80,49 +73,49 @@ ParseRuleResult parse_rule(json_value *rule_json) { vector members; for (size_t i = 0, length = members_json.u.array.length; i < length; i++) { json_value *member_json = members_json.u.array.values[i]; - auto result = parse_rule(member_json); - if (!result.error_message.empty()) { - return "Invalid choice member: " + result.error_message; + auto result = parse_rule_json(member_json); + if (!result.ok()) { + return "Invalid choice member: " + result.error; } - members.push_back(result.rule); + members.push_back(result.value); } return Rule::seq(members); } if (type == "REPEAT") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid repeat content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid repeat content: " + result.error; } - return Rule::choice({Rule::repeat(result.rule), Blank{}}); + return Rule::choice({Rule::repeat(result.value), Blank{}}); } if (type == "REPEAT1") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid repeat content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid repeat content: " + result.error; } - return Rule::repeat(result.rule); + return Rule::repeat(result.value); } if (type == "TOKEN") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid token content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid token content: " + result.error; } - return Rule(Metadata::token(move(result.rule))); + return Rule(Metadata::token(move(result.value))); } if (type == "IMMEDIATE_TOKEN") { json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid token content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid token content: " + result.error; } - return Rule(Metadata::immediate_token(move(result.rule))); + return Rule(Metadata::immediate_token(move(result.value))); } if (type == "PATTERN") { @@ -159,11 +152,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec(precedence_json.u.integer, move(result.value))); } if (type == "PREC_LEFT") { @@ -173,11 +166,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.value))); } if (type == "PREC_RIGHT") { @@ -187,11 +180,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.value))); } if (type == "PREC_DYNAMIC") { @@ -201,11 +194,11 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid precedence content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid precedence content: " + result.error; } - return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.rule))); + return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.value))); } if (type == "ALIAS") { @@ -220,21 +213,21 @@ ParseRuleResult parse_rule(json_value *rule_json) { } json_value content_json = rule_json->operator[]("content"); - auto result = parse_rule(&content_json); - if (!result.error_message.empty()) { - return "Invalid rename content: " + result.error_message; + auto result = parse_rule_json(&content_json); + if (!result.ok()) { + return "Invalid rename content: " + result.error; } return Rule(Metadata::alias( string(value_json.u.string.ptr), is_named_json.u.boolean, - move(result.rule) + move(result.value) )); } return "Unknown rule type: " + type; } -ParseGrammarResult parse_grammar(const string &input) { +ParseGrammarResult parse_grammar_json(const string &input) { string error_message; string name; InputGrammar grammar; @@ -242,8 +235,8 @@ ParseGrammarResult parse_grammar(const string &input) { name_json, rules_json, extras_json, conflicts_json, external_tokens_json, inline_rules_json, word_rule_json; - json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; char parse_error[json_error_max]; + json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; json_value *grammar_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error); if (!grammar_json) { @@ -272,15 +265,15 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) { json_object_entry entry_json = rules_json.u.object.values[i]; - auto result = parse_rule(entry_json.value); - if (!result.error_message.empty()) { - error_message = result.error_message; + auto result = parse_rule_json(entry_json.value); + if (!result.ok()) { + error_message = result.error; goto error; } grammar.variables.push_back(Variable{ string(entry_json.name), VariableTypeNamed, - result.rule + result.value }); } @@ -293,12 +286,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) { json_value *extra_json = extras_json.u.array.values[i]; - auto result = parse_rule(extra_json); - if (!result.error_message.empty()) { - error_message = "Invalid extra token: " + result.error_message; + auto result = parse_rule_json(extra_json); + if (!result.ok()) { + error_message = "Invalid extra token: " + result.error; goto error; } - grammar.extra_tokens.push_back(result.rule); + grammar.extra_tokens.push_back(result.value); } } @@ -361,12 +354,12 @@ ParseGrammarResult parse_grammar(const string &input) { for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) { json_value *external_token_json = external_tokens_json.u.array.values[i]; - auto result = parse_rule(external_token_json); - if (!result.error_message.empty()) { - error_message = "Invalid external token: " + result.error_message; + auto result = parse_rule_json(external_token_json); + if (!result.ok()) { + error_message = "Invalid external token: " + result.error; goto error; } - grammar.external_tokens.push_back(result.rule); + grammar.external_tokens.push_back(result.value); } } @@ -384,11 +377,93 @@ ParseGrammarResult parse_grammar(const string &input) { return { name, grammar, "" }; error: - if (grammar_json) { - json_value_free(grammar_json); - } - + if (grammar_json) json_value_free(grammar_json); return { "", InputGrammar(), error_message }; } +Result parse_property_rule_json(json_value *rule_json) { + PropertyRule result; + + if (rule_json->type != json_object) return "Rule must be an object"; + + json_value selectors_json = rule_json->operator[]("selectors"); + if (selectors_json.type != json_array) return "Selectors must be an array"; + + for (unsigned i = 0; i < selectors_json.u.array.length; i++) { + PropertySelector selector; + json_value *selector_json = selectors_json.u.array.values[i]; + if (selector_json->type != json_array) return "Each selector must be an array"; + + for (unsigned j = 0; j < selector_json->u.array.length; j++) { + json_value *selector_step_json = selector_json->u.array.values[j]; + if (selector_step_json->type != json_object) return "Each selector must be an array of objects"; + PropertySelectorStep step; + step.type = selector_step_json->operator[]("type").u.string.ptr; + step.named = selector_step_json->operator[]("named").u.boolean; + step.is_immediate = selector_step_json->operator[]("immediate").u.boolean; + + json_value index_json = selector_step_json->operator[]("index"); + if (index_json.type == json_integer) { + step.index = index_json.u.integer; + } else { + step.index = -1; + } + + json_value text_pattern_json = selector_step_json->operator[]("text"); + if (text_pattern_json.type == json_string) { + step.text_pattern = text_pattern_json.u.string.ptr; + } + + selector.push_back(step); + } + + result.selectors.push_back(selector); + } + + json_value properties_json = rule_json->operator[]("properties"); + if (properties_json.type != json_object) return "Properties must be an object"; + + for (unsigned i = 0; i < properties_json.u.object.length; i++) { + json_object_entry entry_json = properties_json.u.object.values[i]; + json_value *value_json = entry_json.value; + if (value_json->type != json_string) return "Property values must be strings"; + result.properties[entry_json.name] = value_json->u.string.ptr; + } + + return result; +} + +Result parse_property_sheet_json(const string &input) { + PropertySheet sheet; + string error_message; + char parse_error[json_error_max]; + json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 }; + json_value *sheet_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error); + if (!sheet_json) { + error_message = string("Invalid JSON at ") + parse_error; + goto error; + } + + if (sheet_json->type != json_array) { + error_message = "Property sheet must be an array"; + goto error; + } + + for (unsigned i = 0; i < sheet_json->u.array.length; i++) { + json_value *rule_json = sheet_json->u.array.values[i]; + auto result = parse_property_rule_json(rule_json); + if (!result.ok()) { + error_message = "Invalid external token: " + result.error; + goto error; + } + sheet.push_back(result.value); + } + + return sheet; + +error: + if (sheet_json) json_value_free(sheet_json); + return error_message.c_str(); +} + } // namespace tree_sitter diff --git a/src/compiler/parse_json.h b/src/compiler/parse_json.h new file mode 100644 index 00000000..fda7378c --- /dev/null +++ b/src/compiler/parse_json.h @@ -0,0 +1,29 @@ +#ifndef COMPILER_PARSE_JSON_H_ +#define COMPILER_PARSE_JSON_H_ + +#include +#include +#include "tree_sitter/compiler.h" +#include "compiler/grammar.h" +#include "compiler/property_sheet.h" +#include "compiler/util/result.h" + +namespace tree_sitter { + +struct ParseGrammarResult { + std::string name; + InputGrammar grammar; + std::string error_message; +}; + +struct ParsePropertySheetResult { + PropertySheet property_sheet; + std::string error_message; +}; + +ParseGrammarResult parse_grammar_json(const std::string &); +util::Result parse_property_sheet_json(const std::string &); + +} // namespace tree_sitter + +#endif // COMPILER_PARSE_JSON_H_ diff --git a/src/compiler/property_sheet.h b/src/compiler/property_sheet.h new file mode 100644 index 00000000..38427d3d --- /dev/null +++ b/src/compiler/property_sheet.h @@ -0,0 +1,39 @@ +#ifndef COMPILER_PROPERTY_SHEET_H_ +#define COMPILER_PROPERTY_SHEET_H_ + +#include +#include +#include + +namespace tree_sitter { + +struct PropertySelectorStep { + std::string type; + bool named; + bool is_immediate; + int index; + std::string text_pattern; + + inline bool operator==(const PropertySelectorStep &other) const { + return + type == other.type && + named == other.named && + is_immediate == other.is_immediate && + index == other.index; + } +}; + +typedef std::vector PropertySelector; + +typedef std::map PropertySet; + +struct PropertyRule { + std::vector selectors; + PropertySet properties; +}; + +typedef std::vector PropertySheet; + +} // namespace tree_sitter + +#endif // COMPILER_PROPERTY_SHEET_H_ diff --git a/src/compiler/property_table.h b/src/compiler/property_table.h new file mode 100644 index 00000000..58c17d62 --- /dev/null +++ b/src/compiler/property_table.h @@ -0,0 +1,48 @@ +#ifndef COMPILER_PROPERTY_TABLE_H_ +#define COMPILER_PROPERTY_TABLE_H_ + +#include +#include +#include +#include "compiler/property_sheet.h" + +namespace tree_sitter { + +struct PropertyTransition { + std::string type; + bool named; + int index; + std::string text_pattern; + unsigned state_id; + + bool operator==(const PropertyTransition &other) const { + return + type == other.type && + named == other.named && + index == other.index && + text_pattern == other.text_pattern && + state_id == other.state_id; + } +}; + +struct PropertyState { + std::vector transitions; + unsigned default_next_state_id; + unsigned property_set_id; + + bool operator==(const PropertyState &other) const { + return + transitions == other.transitions && + default_next_state_id == other.default_next_state_id && + property_set_id == other.property_set_id; + } +}; + +struct PropertyTable { + std::vector states; + std::vector property_sets; +}; + +} // namespace tree_sitter + +#endif // COMPILER_PROPERTY_TABLE_H_ diff --git a/src/compiler/util/result.h b/src/compiler/util/result.h new file mode 100644 index 00000000..8e444dc7 --- /dev/null +++ b/src/compiler/util/result.h @@ -0,0 +1,24 @@ +#ifndef COMPILER_UTIL_RESULT_H_ +#define COMPILER_UTIL_RESULT_H_ + +#include + +namespace tree_sitter { +namespace util { + +template +struct Result { + Value value; + std::string error; + + inline Result() : error("Empty") {} + inline Result(Value &&v) : value(v) {} + inline Result(const std::string &message) : error(message) {} + inline Result(const char *message) : error(message) {} + inline bool ok() const { return error.empty(); } +}; + +} // namespace util +} // namespace tree_sitter + +#endif // COMPILER_UTIL_RESULT_H_ diff --git a/test/compiler/prepare_grammar/extract_tokens_test.cc b/test/compiler/prepare_grammar/extract_tokens_test.cc index da2ae60a..5ea6e469 100644 --- a/test/compiler/prepare_grammar/extract_tokens_test.cc +++ b/test/compiler/prepare_grammar/extract_tokens_test.cc @@ -181,7 +181,7 @@ describe("extract_tokens", []() { Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}}, true }, - })) + })); }); it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() { diff --git a/test/compiler/prepare_grammar/intern_symbols_test.cc b/test/compiler/prepare_grammar/intern_symbols_test.cc index 7b7f3624..6816dad4 100644 --- a/test/compiler/prepare_grammar/intern_symbols_test.cc +++ b/test/compiler/prepare_grammar/intern_symbols_test.cc @@ -84,7 +84,7 @@ describe("intern_symbols", []() { VariableTypeNamed, Symbol::non_terminal(2), }, - })) + })); }); }); diff --git a/test/fixtures/error_corpus/javascript_errors.txt b/test/fixtures/error_corpus/javascript_errors.txt index fb31d9e2..1717b85d 100644 --- a/test/fixtures/error_corpus/javascript_errors.txt +++ b/test/fixtures/error_corpus/javascript_errors.txt @@ -138,7 +138,8 @@ var x = !!! Errors inside of a template string substitution ========================================================= -const a = `b c ${d +} f g` +const a = `b c ${d += } f g` +const h = `i ${j(k} l` --- @@ -146,7 +147,13 @@ const a = `b c ${d +} f g` (lexical_declaration (variable_declarator (identifier) - (template_string (template_substitution (identifier) (ERROR)))))) + (template_string (template_substitution (identifier) (ERROR))))) + (lexical_declaration + (variable_declarator + (identifier) + (template_string (template_substitution (call_expression + (identifier) + (arguments (identifier) (MISSING)))))))) ========================================================= Long sequences of invalid tokens diff --git a/test/helpers/scope_sequence.cc b/test/helpers/scope_sequence.cc index 1121b80e..34109b76 100644 --- a/test/helpers/scope_sequence.cc +++ b/test/helpers/scope_sequence.cc @@ -7,6 +7,7 @@ using std::string; using std::cout; +using namespace snowhouse; static void append_text_to_scope_sequence(ScopeSequence *sequence, ScopeStack *current_scopes, diff --git a/test/helpers/tree_helpers.cc b/test/helpers/tree_helpers.cc index 1d4c0d3a..768e9f61 100644 --- a/test/helpers/tree_helpers.cc +++ b/test/helpers/tree_helpers.cc @@ -2,6 +2,7 @@ #include "helpers/tree_helpers.h" #include "helpers/point_helpers.h" #include +#include using std::string; using std::vector; diff --git a/test/runtime/node_test.cc b/test/runtime/node_test.cc index d0aa69e3..3a856403 100644 --- a/test/runtime/node_test.cc +++ b/test/runtime/node_test.cc @@ -310,7 +310,7 @@ describe("Node", [&]() { AssertThat(ts_node_start_point(child7), Equals({ 8, 0 })); AssertThat(ts_node_end_point(child7), Equals({ 8, 1 })); - AssertThat(ts_node_child_count(child6), Equals(3)) + AssertThat(ts_node_child_count(child6), Equals(3)); TSNode left_brace = ts_node_child(child6, 0); TSNode pair = ts_node_child(child6, 1); diff --git a/test/test_helper.h b/test/test_helper.h index 99db971a..04fe1ffb 100644 --- a/test/test_helper.h +++ b/test/test_helper.h @@ -11,6 +11,7 @@ namespace tree_sitter {} using namespace std; using namespace bandit; +using namespace snowhouse; using namespace tree_sitter; #define START_TEST go_bandit([]() { diff --git a/tests.gyp b/tests.gyp index a8f3a596..25932c7a 100644 --- a/tests.gyp +++ b/tests.gyp @@ -84,6 +84,7 @@ ], 'ldflags': ['-g'], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'OTHER_LDFLAGS': ['-g'], 'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'], 'GCC_OPTIMIZATION_LEVEL': '0', @@ -114,6 +115,7 @@ ], 'xcode_settings': { + 'ARCHS': ['x86_64'], 'CLANG_CXX_LANGUAGE_STANDARD': 'c++14', 'ALWAYS_SEARCH_USER_PATHS': 'NO', }