Merge pull request #204 from tree-sitter/property-sheets

Add a system for assigning properties to syntax nodes
This commit is contained in:
Max Brunsfeld 2018-10-15 23:18:33 -07:00 committed by GitHub
commit fa1f4aa86a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 924 additions and 103 deletions

2
externals/bandit vendored

@ -1 +1 @@
Subproject commit f14ade4fbba72f2e8ba3a7f47318eb07614a39d4
Subproject commit bfdb8a3322a2e54b11aea64d84f9788d83477e83

View file

@ -29,6 +29,7 @@ typedef struct {
} TSCompileResult;
TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file);
#ifdef __cplusplus
}

View file

@ -18,12 +18,14 @@
'src/compiler/build_tables/parse_item.cc',
'src/compiler/build_tables/parse_item_set_builder.cc',
'src/compiler/build_tables/parse_table_builder.cc',
'src/compiler/build_tables/property_table_builder.cc',
'src/compiler/build_tables/rule_can_be_blank.cc',
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',
'src/compiler/generate_code/property_table_json.cc',
'src/compiler/lex_table.cc',
'src/compiler/log.cc',
'src/compiler/parse_grammar.cc',
'src/compiler/parse_json.cc',
'src/compiler/parse_table.cc',
'src/compiler/precedence_range.cc',
'src/compiler/prepare_grammar/expand_repeats.cc',
@ -122,6 +124,7 @@
'cflags': [ '-g' ],
'ldflags': [ '-g' ],
'xcode_settings': {
'ARCHS': ['x86_64'],
'OTHER_LDFLAGS': ['-g'],
'GCC_OPTIMIZATION_LEVEL': '0',
},
@ -131,6 +134,7 @@
'cflags': [ '-g' ],
'ldflags': [ '-g' ],
'xcode_settings': {
'ARCHS': ['x86_64'],
'OTHER_LDFLAGS': ['-g'],
'GCC_OPTIMIZATION_LEVEL': '0',
'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'],
@ -143,6 +147,9 @@
'Release': {
'cflags': [ '-O2', '-fno-strict-aliasing' ],
'cflags!': [ '-O3', '-fstrict-aliasing' ],
'xcode_settings': {
'ARCHS': ['x86_64'],
},
},
},

View file

@ -0,0 +1,447 @@
#include <vector>
#include <deque>
#include <algorithm>
#include <map>
#include <unordered_map>
#include <set>
#include "compiler/property_sheet.h"
#include "compiler/property_table.h"
#include "compiler/build_tables/property_table_builder.h"
#include "compiler/util/hash_combine.h"
using std::deque;
using std::vector;
using std::pair;
using std::unordered_map;
using std::set;
using std::move;
using std::map;
namespace tree_sitter {
namespace build_tables {
// A position within a selector for a particular rule set.
// For example, in a selector like `a > b`, this might
// describe the state of having descended into an `a`,
// but not a `b`.
struct PropertyItem {
unsigned rule_id;
unsigned selector_id;
unsigned step_id;
bool operator==(const PropertyItem &other) const {
return
rule_id == other.rule_id &&
selector_id == other.selector_id &&
step_id == other.step_id;
}
bool operator<(const PropertyItem &other) const {
if (rule_id < other.rule_id) return true;
if (rule_id > other.rule_id) return false;
if (selector_id < other.selector_id) return true;
if (selector_id > other.selector_id) return false;
return step_id < other.step_id;
}
};
// A set of possible positions within different selectors.
// This directly represents a state of the property-matching
// state machine.
struct PropertyItemSet {
set<PropertyItem> entries;
bool operator==(const PropertyItemSet &other) const {
return entries == other.entries;
}
};
// A set of properties that matched via a certain selector.
// These are ordered according to the usual CSS rules:
// specificity, falling back to the order in the original sheet.
struct PropertySelectorMatch {
unsigned specificity;
unsigned rule_id;
unsigned selector_id;
const PropertySet *property_set;
bool operator<(const PropertySelectorMatch &other) const {
if (specificity < other.specificity) return true;
if (specificity > other.specificity) return false;
if (rule_id < other.rule_id) return true;
if (rule_id > other.rule_id) return false;
return selector_id < other.selector_id;
}
};
struct PropertyTransitionEntry {
PropertyTransition transition;
unsigned latest_matching_rule_id;
unsigned specificity() const {
return
(transition.index == -1 ? 0 : 1) +
(transition.text_pattern.empty() ? 0 : 1);
}
// When using the final state machine, the runtime library computes
// a node's property by descending from the root of the syntax
// tree to that node. For each ancestor node on the way, it should
// update its state using the *first* matching entry of the
// `transitions` list. Therefore, the order of the transitions
// must match the normal tie-breaking rules of CSS.
bool operator<(const PropertyTransitionEntry &other) const {
// If two transitions match different node types, they can't
// both match a given node, so their order is arbitrary.
if (transition.type < other.transition.type) return true;
if (transition.type > other.transition.type) return false;
if (transition.named && !other.transition.named) return true;
if (!transition.named && other.transition.named) return false;
// More specific transitions should be considered before less
// specific ones.
if (specificity() > other.specificity()) return true;
if (specificity() < other.specificity()) return false;
// If there are two transitions with a specificity tie (e.g. one
// with an `:nth-child` pseudo-class and a one with a `:text`
// pseudo-class), then the one whose matching properties appeared
// later in the cascade should be considered first.
return latest_matching_rule_id > other.latest_matching_rule_id;
}
};
} // namespace build_tables
} // namespace tree_sitter
namespace std {
using tree_sitter::util::hash_combine;
// PropertyItemSets must be hashed because in the process of building
// the table, we maintain a map of existing property item sets to
// state ids.
template <>
struct hash<tree_sitter::build_tables::PropertyItemSet> {
size_t operator()(const tree_sitter::build_tables::PropertyItemSet &item_set) const {
size_t result = 0;
hash_combine(&result, item_set.entries.size());
for (const auto &item : item_set.entries) {
hash_combine(&result, item.rule_id);
hash_combine(&result, item.selector_id);
hash_combine(&result, item.step_id);
}
return result;
}
};
// PropertyTransitions must be hashed because we represent state
// transitions as a map of PropertyTransitions to successor PropertyItemSets.
template <>
struct hash<tree_sitter::PropertyTransition> {
size_t operator()(const tree_sitter::PropertyTransition &transition) const {
size_t result = 0;
hash_combine(&result, transition.type);
hash_combine(&result, transition.named);
hash_combine(&result, transition.index);
hash_combine(&result, transition.text_pattern);
hash_combine(&result, transition.state_id);
return result;
}
};
// PropertySets must be hashed so that we can use a map to dedup them.
template <>
struct hash<tree_sitter::PropertySet> {
size_t operator()(const tree_sitter::PropertySet &set) const {
size_t result = 0;
hash_combine(&result, set.size());
for (const auto &pair : set) {
hash_combine(&result, pair.first);
hash_combine(&result, pair.second);
}
return result;
}
};
} // namespace std
namespace tree_sitter {
namespace build_tables {
typedef unsigned StateId;
typedef unsigned PropertySetId;
struct PropertyTableBuilder {
PropertySheet sheet;
PropertyTable result;
unordered_map<PropertyItemSet, StateId> ids_by_item_set;
unordered_map<PropertySet, PropertySetId> ids_by_property_set;
deque<pair<PropertyItemSet, StateId>> item_set_queue;
PropertyTableBuilder(const PropertySheet &sheet) : sheet(sheet) {}
PropertyTable build() {
PropertyItemSet start_item_set;
for (unsigned i = 0; i < sheet.size(); i++) {
PropertyRule &rule = sheet[i];
for (unsigned j = 0; j < rule.selectors.size(); j++) {
start_item_set.entries.insert(PropertyItem {i, j, 0});
}
}
add_state(start_item_set);
while (!item_set_queue.empty()) {
auto entry = item_set_queue.front();
PropertyItemSet item_set = move(entry.first);
StateId state_id = entry.second;
item_set_queue.pop_front();
populate_state(item_set, state_id);
}
remove_duplicate_states();
return result;
}
// Different item sets can actually produce the same state, so the
// states need to be explicitly deduped as a post-processing step.
void remove_duplicate_states() {
map<StateId, StateId> replacements;
while (true) {
map<StateId, StateId> duplicates;
for (StateId i = 0, size = result.states.size(); i < size; i++) {
for (StateId j = 0; j < i; j++) {
if (!duplicates.count(j) && result.states[j] == result.states[i]) {
duplicates.insert({ i, j });
break;
}
}
}
if (duplicates.empty()) break;
map<StateId, StateId> new_replacements;
for (StateId i = 0, size = result.states.size(); i < size; i++) {
StateId new_state_index = i;
auto duplicate = duplicates.find(i);
if (duplicate != duplicates.end()) {
new_state_index = duplicate->second;
}
size_t prior_removed = 0;
for (const auto &duplicate : duplicates) {
if (duplicate.first >= new_state_index) break;
prior_removed++;
}
new_state_index -= prior_removed;
new_replacements.insert({i, new_state_index});
replacements.insert({ i, new_state_index });
for (auto &replacement : replacements) {
if (replacement.second == i) {
replacement.second = new_state_index;
}
}
}
for (auto &state : result.states) {
for (auto &transition : state.transitions) {
auto new_replacement = new_replacements.find(transition.state_id);
if (new_replacement != new_replacements.end()) {
transition.state_id = new_replacement->second;
}
}
auto new_replacement = new_replacements.find(state.default_next_state_id);
if (new_replacement != new_replacements.end()) {
state.default_next_state_id = new_replacement->second;
}
}
for (auto i = duplicates.rbegin(); i != duplicates.rend(); ++i) {
result.states.erase(result.states.begin() + i->first);
}
}
}
// Get the next part of the selector that needs to be matched for a given item.
// This returns null if the item has consumed its entire selector.
const PropertySelectorStep *next_step_for_item(const PropertyItem &item) {
const PropertySelector &selector = sheet[item.rule_id].selectors[item.selector_id];
if (item.step_id < selector.size()) {
return &selector[item.step_id];
} else {
return nullptr;
}
}
// Get the previous part of the selector that was matched for a given item.
// This returns null if the item has not consumed anything.
const PropertySelectorStep *prev_step_for_item(const PropertyItem &item) {
if (item.step_id > 0) {
return &sheet[item.rule_id].selectors[item.selector_id][item.step_id];
} else {
return nullptr;
}
}
unsigned specificity_for_selector(const PropertySelector &selector) {
unsigned result = selector.size();
for (const PropertySelectorStep &step : selector) {
if (step.index != -1) result++;
if (!step.text_pattern.empty()) result++;
}
return result;
}
// Check if the given state transition matches the given part of a selector.
bool step_matches_transition(const PropertySelectorStep &step, const PropertyTransition &transition) {
return
step.type == transition.type &&
step.named == transition.named &&
(step.index == transition.index || step.index == -1) &&
(step.text_pattern == transition.text_pattern || step.text_pattern.empty());
}
void populate_state(const PropertyItemSet &item_set, StateId state_id) {
unordered_map<PropertyTransition, PropertyItemSet> transitions;
vector<PropertySelectorMatch> selector_matches;
for (const PropertyItem &item : item_set.entries) {
const PropertySelectorStep *next_step = next_step_for_item(item);
// If this item has more elements to match for its selector, then
// there's a state transition for elements that match the next
// part of the selector.
if (next_step) {
transitions[PropertyTransition{
next_step->type,
next_step->named,
next_step->index,
next_step->text_pattern,
0
}] = PropertyItemSet();
}
// If the item has matched its entire selector, then the property set
// for the item's rule applies in this state.
else {
const PropertyRule &rule = sheet[item.rule_id];
selector_matches.push_back(PropertySelectorMatch {
specificity_for_selector(rule.selectors[item.selector_id]),
item.rule_id,
item.selector_id,
&rule.properties,
});
}
}
// For each element that follows an item in this set,
// compute the next item set after descending through that element.
vector<PropertyTransitionEntry> transition_list;
for (auto &pair : transitions) {
PropertyTransition transition = pair.first;
PropertyItemSet &next_item_set = pair.second;
unsigned latest_matching_rule_id = 0;
for (const PropertyItem &item : item_set.entries) {
const PropertySelectorStep *next_step = next_step_for_item(item);
const PropertySelectorStep *prev_step = prev_step_for_item(item);
if (next_step) {
// If the element matches the next part of the item, advance the
// item to the next part of its selector.
if (step_matches_transition(*next_step, transition)) {
PropertyItem next_item = item;
next_item.step_id++;
next_item_set.entries.insert(next_item);
// If the item is at the end of its selector, record its rule id
// so that it can be used when sorting the transitions.
if (!next_step_for_item(next_item) && next_item.rule_id > latest_matching_rule_id) {
latest_matching_rule_id = item.rule_id;
}
}
// If the element does not match, and the item is in the middle
// of an immediate child selector, then remove it from the
// next item set. Otherwise, keep it unchanged.
if (!prev_step || !prev_step->is_immediate) {
next_item_set.entries.insert(item);
}
}
}
transition.state_id = add_state(next_item_set);
transition_list.push_back(PropertyTransitionEntry {transition, latest_matching_rule_id});
}
std::sort(transition_list.begin(), transition_list.end());
for (auto &entry : transition_list) {
result.states[state_id].transitions.push_back(entry.transition);
}
// Compute the default successor item set - the item set that
// we should advance to if the next element doesn't match any
// of the next elements in the item set's selectors.
PropertyItemSet default_next_item_set;
for (const PropertyItem &item : item_set.entries) {
const PropertySelectorStep *next_step = next_step_for_item(item);
const PropertySelectorStep *prev_step = prev_step_for_item(item);
if (next_step && (!prev_step || !prev_step->is_immediate)) {
default_next_item_set.entries.insert(item);
}
}
StateId default_next_state_id = add_state(default_next_item_set);
result.states[state_id].default_next_state_id = default_next_state_id;
// Sort the matching property sets by ascending specificity and by
// their order in the sheet. This way, more specific selectors and later
// rules will override less specific selectors and earlier rules.
PropertySet properties;
std::sort(selector_matches.begin(), selector_matches.end());
for (auto &match : selector_matches) {
for (auto &pair : *match.property_set) {
properties[pair.first] = pair.second;
}
}
// Add the final property set to the deduped list.
result.states[state_id].property_set_id = add_property_set(properties);
}
StateId add_state(const PropertyItemSet &item_set) {
auto entry = ids_by_item_set.find(item_set);
if (entry == ids_by_item_set.end()) {
StateId id = result.states.size();
ids_by_item_set[item_set] = id;
result.states.push_back(PropertyState {});
item_set_queue.push_back({item_set, id});
return id;
} else {
return entry->second;
}
}
PropertySetId add_property_set(const PropertySet &property_set) {
auto entry = ids_by_property_set.find(property_set);
if (entry == ids_by_property_set.end()) {
PropertySetId id = result.property_sets.size();
ids_by_property_set[property_set] = id;
result.property_sets.push_back(property_set);
return id;
} else {
return entry->second;
}
}
};
PropertyTable build_property_table(const PropertySheet &sheet) {
return PropertyTableBuilder(sheet).build();
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -0,0 +1,15 @@
#ifndef COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_
#define COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_
#include <memory>
#include "compiler/property_table.h"
namespace tree_sitter {
namespace build_tables {
PropertyTable build_property_table(const PropertySheet &);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_PROPERTY_TABLE_BUILDER_H_

View file

@ -1,11 +1,13 @@
#include "tree_sitter/compiler.h"
#include "compiler/prepare_grammar/prepare_grammar.h"
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/build_tables/property_table_builder.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/generate_code/property_table_json.h"
#include "compiler/syntax_grammar.h"
#include "compiler/log.h"
#include "compiler/lexical_grammar.h"
#include "compiler/parse_grammar.h"
#include "compiler/parse_json.h"
#include "json.h"
namespace tree_sitter {
@ -20,7 +22,7 @@ using std::make_tuple;
extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) {
set_log_file(log_file);
ParseGrammarResult parse_result = parse_grammar(string(input));
ParseGrammarResult parse_result = parse_grammar_json(string(input));
if (!parse_result.error_message.empty()) {
return {nullptr, strdup(parse_result.error_message.c_str()), TSCompileErrorTypeInvalidGrammar};
}
@ -60,4 +62,15 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file)
return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone};
}
extern "C" TSCompileResult ts_compile_property_sheet(const char *input, FILE *log_file) {
set_log_file(log_file);
auto parse_result = parse_property_sheet_json(string(input));
if (!parse_result.ok()) {
return {nullptr, strdup(parse_result.error.c_str()), TSCompileErrorTypeInvalidGrammar};
}
PropertyTable table = build_tables::build_property_table(parse_result.value);
string code = generate_code::property_table_json(table);
return {strdup(code.c_str()), nullptr, TSCompileErrorTypeNone};
}
} // namespace tree_sitter

View file

@ -0,0 +1,117 @@
#include "compiler/generate_code/property_table_json.h"
#include <vector>
#include <string>
using std::string;
using std::to_string;
using std::vector;
namespace tree_sitter {
namespace generate_code {
class CodeGenerator {
string buffer;
public:
string generate(const PropertyTable &table) {
add("{");
add("\"states\":");
add("[");
for (unsigned i = 0; i < table.states.size(); i++) {
const PropertyState &state = table.states[i];
if (i != 0) add(",");
add_state(i, state);
}
add("],");
add("\"property_sets\":");
add("[");
bool first = true;
for (const PropertySet &property_set : table.property_sets) {
if (!first) add(",");
first = false;
add_property_set(property_set);
}
add("]");
add("}");
return buffer;
}
private:
void add_state(unsigned i, const PropertyState &state) {
add("{");
add("\"id\":");
add(to_string(i));
add(",\"property_set_id\":");
add(to_string(state.property_set_id));
add(",");
add("\"transitions\":[");
bool first = true;
for (const auto &transition : state.transitions) {
if (!first) add(",");
first = false;
add_transition(transition);
}
add("],");
add("\"default_next_state_id\":");
add(to_string(state.default_next_state_id));
add("}");
}
void add_property_set(const PropertySet &property_set) {
add("{");
bool first = true;
for (const auto &pair : property_set) {
if (!first) add(",");
first = false;
add("\"");
add(pair.first);
add("\":\"");
add(pair.second);
add("\"");
}
add("}");
}
void add_transition(const PropertyTransition &transition) {
add("{");
add("\"type\":");
add_string(transition.type);
add(",\"named\":");
add(transition.named ? "true" : "false");
if (transition.index != -1) {
add(",\"index\":");
add(to_string(transition.index));
}
if (!transition.text_pattern.empty()) {
add(",\"text\":");
add_string(transition.text_pattern);
}
add(",\"state_id\": ");
add(to_string(transition.state_id));
add("}");
}
void add_string(const string &s) {
add("\"");
for (const char c : s) {
if (c == '"') add("\\");
add(c);
}
add("\"");
}
void add(string input) {
buffer += input;
}
void add(char c) {
buffer += c;
}
};
string property_table_json(PropertyTable table) {
return CodeGenerator().generate(table);
}
} // namespace generate_code
} // namespace tree_sitter

View file

@ -0,0 +1,15 @@
#ifndef COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_
#define COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_
#include <string>
#include "compiler/property_table.h"
namespace tree_sitter {
namespace generate_code {
std::string property_table_json(PropertyTable);
} // namespace generate_code
} // namespace tree_sitter
#endif // COMPILER_GENERATE_CODE_PROPERTY_TABLE_JSON_H_

View file

@ -1,21 +0,0 @@
#ifndef COMPILER_GRAMMAR_JSON_H_
#define COMPILER_GRAMMAR_JSON_H_
#include <string>
#include <unordered_set>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
namespace tree_sitter {
struct ParseGrammarResult {
std::string name;
InputGrammar grammar;
std::string error_message;
};
ParseGrammarResult parse_grammar(const std::string &);
} // namespace tree_sitter
#endif // COMPILER_GRAMMAR_JSON_H_

View file

@ -1,10 +1,11 @@
#include "compiler/parse_grammar.h"
#include "compiler/parse_json.h"
#include <string>
#include <vector>
#include <unordered_set>
#include <utility>
#include "json.h"
#include "compiler/rule.h"
#include "compiler/util/result.h"
namespace tree_sitter {
@ -19,17 +20,9 @@ using rules::Metadata;
using rules::Pattern;
using rules::String;
using rules::NamedSymbol;
using util::Result;
struct ParseRuleResult {
Rule rule;
string error_message;
ParseRuleResult(const string &error_message) : error_message(error_message) {}
ParseRuleResult(const char *error_message) : error_message(error_message) {}
ParseRuleResult(Rule rule) : rule(rule) {}
};
ParseRuleResult parse_rule(json_value *rule_json) {
Result<Rule> parse_rule_json(json_value *rule_json) {
string error_message;
json_value rule_type_json;
string type;
@ -62,11 +55,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
auto result = parse_rule_json(member_json);
if (!result.ok()) {
return "Invalid choice member: " + result.error;
}
members.push_back(result.rule);
members.push_back(result.value);
}
return Rule::choice(members);
}
@ -80,49 +73,49 @@ ParseRuleResult parse_rule(json_value *rule_json) {
vector<Rule> members;
for (size_t i = 0, length = members_json.u.array.length; i < length; i++) {
json_value *member_json = members_json.u.array.values[i];
auto result = parse_rule(member_json);
if (!result.error_message.empty()) {
return "Invalid choice member: " + result.error_message;
auto result = parse_rule_json(member_json);
if (!result.ok()) {
return "Invalid choice member: " + result.error;
}
members.push_back(result.rule);
members.push_back(result.value);
}
return Rule::seq(members);
}
if (type == "REPEAT") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid repeat content: " + result.error;
}
return Rule::choice({Rule::repeat(result.rule), Blank{}});
return Rule::choice({Rule::repeat(result.value), Blank{}});
}
if (type == "REPEAT1") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid repeat content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid repeat content: " + result.error;
}
return Rule::repeat(result.rule);
return Rule::repeat(result.value);
}
if (type == "TOKEN") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid token content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid token content: " + result.error;
}
return Rule(Metadata::token(move(result.rule)));
return Rule(Metadata::token(move(result.value)));
}
if (type == "IMMEDIATE_TOKEN") {
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid token content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid token content: " + result.error;
}
return Rule(Metadata::immediate_token(move(result.rule)));
return Rule(Metadata::immediate_token(move(result.value)));
}
if (type == "PATTERN") {
@ -159,11 +152,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid precedence content: " + result.error;
}
return Rule(Metadata::prec(precedence_json.u.integer, move(result.rule)));
return Rule(Metadata::prec(precedence_json.u.integer, move(result.value)));
}
if (type == "PREC_LEFT") {
@ -173,11 +166,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid precedence content: " + result.error;
}
return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.rule)));
return Rule(Metadata::prec_left(precedence_json.u.integer, move(result.value)));
}
if (type == "PREC_RIGHT") {
@ -187,11 +180,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid precedence content: " + result.error;
}
return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.rule)));
return Rule(Metadata::prec_right(precedence_json.u.integer, move(result.value)));
}
if (type == "PREC_DYNAMIC") {
@ -201,11 +194,11 @@ ParseRuleResult parse_rule(json_value *rule_json) {
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid precedence content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid precedence content: " + result.error;
}
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.rule)));
return Rule(Metadata::prec_dynamic(precedence_json.u.integer, move(result.value)));
}
if (type == "ALIAS") {
@ -220,21 +213,21 @@ ParseRuleResult parse_rule(json_value *rule_json) {
}
json_value content_json = rule_json->operator[]("content");
auto result = parse_rule(&content_json);
if (!result.error_message.empty()) {
return "Invalid rename content: " + result.error_message;
auto result = parse_rule_json(&content_json);
if (!result.ok()) {
return "Invalid rename content: " + result.error;
}
return Rule(Metadata::alias(
string(value_json.u.string.ptr),
is_named_json.u.boolean,
move(result.rule)
move(result.value)
));
}
return "Unknown rule type: " + type;
}
ParseGrammarResult parse_grammar(const string &input) {
ParseGrammarResult parse_grammar_json(const string &input) {
string error_message;
string name;
InputGrammar grammar;
@ -242,8 +235,8 @@ ParseGrammarResult parse_grammar(const string &input) {
name_json, rules_json, extras_json, conflicts_json, external_tokens_json,
inline_rules_json, word_rule_json;
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
char parse_error[json_error_max];
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
json_value *grammar_json =
json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
if (!grammar_json) {
@ -272,15 +265,15 @@ ParseGrammarResult parse_grammar(const string &input) {
for (size_t i = 0, length = rules_json.u.object.length; i < length; i++) {
json_object_entry entry_json = rules_json.u.object.values[i];
auto result = parse_rule(entry_json.value);
if (!result.error_message.empty()) {
error_message = result.error_message;
auto result = parse_rule_json(entry_json.value);
if (!result.ok()) {
error_message = result.error;
goto error;
}
grammar.variables.push_back(Variable{
string(entry_json.name),
VariableTypeNamed,
result.rule
result.value
});
}
@ -293,12 +286,12 @@ ParseGrammarResult parse_grammar(const string &input) {
for (size_t i = 0, length = extras_json.u.array.length; i < length; i++) {
json_value *extra_json = extras_json.u.array.values[i];
auto result = parse_rule(extra_json);
if (!result.error_message.empty()) {
error_message = "Invalid extra token: " + result.error_message;
auto result = parse_rule_json(extra_json);
if (!result.ok()) {
error_message = "Invalid extra token: " + result.error;
goto error;
}
grammar.extra_tokens.push_back(result.rule);
grammar.extra_tokens.push_back(result.value);
}
}
@ -361,12 +354,12 @@ ParseGrammarResult parse_grammar(const string &input) {
for (size_t i = 0, length = external_tokens_json.u.array.length; i < length; i++) {
json_value *external_token_json = external_tokens_json.u.array.values[i];
auto result = parse_rule(external_token_json);
if (!result.error_message.empty()) {
error_message = "Invalid external token: " + result.error_message;
auto result = parse_rule_json(external_token_json);
if (!result.ok()) {
error_message = "Invalid external token: " + result.error;
goto error;
}
grammar.external_tokens.push_back(result.rule);
grammar.external_tokens.push_back(result.value);
}
}
@ -384,11 +377,93 @@ ParseGrammarResult parse_grammar(const string &input) {
return { name, grammar, "" };
error:
if (grammar_json) {
json_value_free(grammar_json);
}
if (grammar_json) json_value_free(grammar_json);
return { "", InputGrammar(), error_message };
}
Result<PropertyRule> parse_property_rule_json(json_value *rule_json) {
PropertyRule result;
if (rule_json->type != json_object) return "Rule must be an object";
json_value selectors_json = rule_json->operator[]("selectors");
if (selectors_json.type != json_array) return "Selectors must be an array";
for (unsigned i = 0; i < selectors_json.u.array.length; i++) {
PropertySelector selector;
json_value *selector_json = selectors_json.u.array.values[i];
if (selector_json->type != json_array) return "Each selector must be an array";
for (unsigned j = 0; j < selector_json->u.array.length; j++) {
json_value *selector_step_json = selector_json->u.array.values[j];
if (selector_step_json->type != json_object) return "Each selector must be an array of objects";
PropertySelectorStep step;
step.type = selector_step_json->operator[]("type").u.string.ptr;
step.named = selector_step_json->operator[]("named").u.boolean;
step.is_immediate = selector_step_json->operator[]("immediate").u.boolean;
json_value index_json = selector_step_json->operator[]("index");
if (index_json.type == json_integer) {
step.index = index_json.u.integer;
} else {
step.index = -1;
}
json_value text_pattern_json = selector_step_json->operator[]("text");
if (text_pattern_json.type == json_string) {
step.text_pattern = text_pattern_json.u.string.ptr;
}
selector.push_back(step);
}
result.selectors.push_back(selector);
}
json_value properties_json = rule_json->operator[]("properties");
if (properties_json.type != json_object) return "Properties must be an object";
for (unsigned i = 0; i < properties_json.u.object.length; i++) {
json_object_entry entry_json = properties_json.u.object.values[i];
json_value *value_json = entry_json.value;
if (value_json->type != json_string) return "Property values must be strings";
result.properties[entry_json.name] = value_json->u.string.ptr;
}
return result;
}
Result<PropertySheet> parse_property_sheet_json(const string &input) {
PropertySheet sheet;
string error_message;
char parse_error[json_error_max];
json_settings settings = { 0, json_enable_comments, 0, 0, 0, 0 };
json_value *sheet_json = json_parse_ex(&settings, input.c_str(), input.size(), parse_error);
if (!sheet_json) {
error_message = string("Invalid JSON at ") + parse_error;
goto error;
}
if (sheet_json->type != json_array) {
error_message = "Property sheet must be an array";
goto error;
}
for (unsigned i = 0; i < sheet_json->u.array.length; i++) {
json_value *rule_json = sheet_json->u.array.values[i];
auto result = parse_property_rule_json(rule_json);
if (!result.ok()) {
error_message = "Invalid external token: " + result.error;
goto error;
}
sheet.push_back(result.value);
}
return sheet;
error:
if (sheet_json) json_value_free(sheet_json);
return error_message.c_str();
}
} // namespace tree_sitter

29
src/compiler/parse_json.h Normal file
View file

@ -0,0 +1,29 @@
#ifndef COMPILER_PARSE_JSON_H_
#define COMPILER_PARSE_JSON_H_
#include <string>
#include <unordered_set>
#include "tree_sitter/compiler.h"
#include "compiler/grammar.h"
#include "compiler/property_sheet.h"
#include "compiler/util/result.h"
namespace tree_sitter {
struct ParseGrammarResult {
std::string name;
InputGrammar grammar;
std::string error_message;
};
struct ParsePropertySheetResult {
PropertySheet property_sheet;
std::string error_message;
};
ParseGrammarResult parse_grammar_json(const std::string &);
util::Result<PropertySheet> parse_property_sheet_json(const std::string &);
} // namespace tree_sitter
#endif // COMPILER_PARSE_JSON_H_

View file

@ -0,0 +1,39 @@
#ifndef COMPILER_PROPERTY_SHEET_H_
#define COMPILER_PROPERTY_SHEET_H_
#include <vector>
#include <map>
#include <string>
namespace tree_sitter {
struct PropertySelectorStep {
std::string type;
bool named;
bool is_immediate;
int index;
std::string text_pattern;
inline bool operator==(const PropertySelectorStep &other) const {
return
type == other.type &&
named == other.named &&
is_immediate == other.is_immediate &&
index == other.index;
}
};
typedef std::vector<PropertySelectorStep> PropertySelector;
typedef std::map<std::string, std::string> PropertySet;
struct PropertyRule {
std::vector<PropertySelector> selectors;
PropertySet properties;
};
typedef std::vector<PropertyRule> PropertySheet;
} // namespace tree_sitter
#endif // COMPILER_PROPERTY_SHEET_H_

View file

@ -0,0 +1,48 @@
#ifndef COMPILER_PROPERTY_TABLE_H_
#define COMPILER_PROPERTY_TABLE_H_
#include <vector>
#include <map>
#include <string>
#include "compiler/property_sheet.h"
namespace tree_sitter {
struct PropertyTransition {
std::string type;
bool named;
int index;
std::string text_pattern;
unsigned state_id;
bool operator==(const PropertyTransition &other) const {
return
type == other.type &&
named == other.named &&
index == other.index &&
text_pattern == other.text_pattern &&
state_id == other.state_id;
}
};
struct PropertyState {
std::vector<PropertyTransition> transitions;
unsigned default_next_state_id;
unsigned property_set_id;
bool operator==(const PropertyState &other) const {
return
transitions == other.transitions &&
default_next_state_id == other.default_next_state_id &&
property_set_id == other.property_set_id;
}
};
struct PropertyTable {
std::vector<PropertyState> states;
std::vector<PropertySet> property_sets;
};
} // namespace tree_sitter
#endif // COMPILER_PROPERTY_TABLE_H_

View file

@ -0,0 +1,24 @@
#ifndef COMPILER_UTIL_RESULT_H_
#define COMPILER_UTIL_RESULT_H_
#include <string>
namespace tree_sitter {
namespace util {
template <typename Value>
struct Result {
Value value;
std::string error;
inline Result() : error("Empty") {}
inline Result(Value &&v) : value(v) {}
inline Result(const std::string &message) : error(message) {}
inline Result(const char *message) : error(message) {}
inline bool ok() const { return error.empty(); }
};
} // namespace util
} // namespace tree_sitter
#endif // COMPILER_UTIL_RESULT_H_

View file

@ -181,7 +181,7 @@ describe("extract_tokens", []() {
Seq{CharacterSet{{'a'}}, CharacterSet{{'b'}}},
true
},
}))
}));
});
it("does not move entire rules into the lexical grammar if their content is used elsewhere in the grammar", [&]() {

View file

@ -84,7 +84,7 @@ describe("intern_symbols", []() {
VariableTypeNamed,
Symbol::non_terminal(2),
},
}))
}));
});
});

View file

@ -138,7 +138,8 @@ var x = !!!
Errors inside of a template string substitution
=========================================================
const a = `b c ${d +} f g`
const a = `b c ${d += } f g`
const h = `i ${j(k} l`
---
@ -146,7 +147,13 @@ const a = `b c ${d +} f g`
(lexical_declaration
(variable_declarator
(identifier)
(template_string (template_substitution (identifier) (ERROR))))))
(template_string (template_substitution (identifier) (ERROR)))))
(lexical_declaration
(variable_declarator
(identifier)
(template_string (template_substitution (call_expression
(identifier)
(arguments (identifier) (MISSING))))))))
=========================================================
Long sequences of invalid tokens

View file

@ -7,6 +7,7 @@
using std::string;
using std::cout;
using namespace snowhouse;
static void append_text_to_scope_sequence(ScopeSequence *sequence,
ScopeStack *current_scopes,

View file

@ -2,6 +2,7 @@
#include "helpers/tree_helpers.h"
#include "helpers/point_helpers.h"
#include <ostream>
#include <cstring>
using std::string;
using std::vector;

View file

@ -310,7 +310,7 @@ describe("Node", [&]() {
AssertThat(ts_node_start_point(child7), Equals<TSPoint>({ 8, 0 }));
AssertThat(ts_node_end_point(child7), Equals<TSPoint>({ 8, 1 }));
AssertThat(ts_node_child_count(child6), Equals<size_t>(3))
AssertThat(ts_node_child_count(child6), Equals<size_t>(3));
TSNode left_brace = ts_node_child(child6, 0);
TSNode pair = ts_node_child(child6, 1);

View file

@ -11,6 +11,7 @@ namespace tree_sitter {}
using namespace std;
using namespace bandit;
using namespace snowhouse;
using namespace tree_sitter;
#define START_TEST go_bandit([]() {

View file

@ -84,6 +84,7 @@
],
'ldflags': ['-g'],
'xcode_settings': {
'ARCHS': ['x86_64'],
'OTHER_LDFLAGS': ['-g'],
'OTHER_CPLUSPLUSFLAGS': ['-fsanitize=address'],
'GCC_OPTIMIZATION_LEVEL': '0',
@ -114,6 +115,7 @@
],
'xcode_settings': {
'ARCHS': ['x86_64'],
'CLANG_CXX_LANGUAGE_STANDARD': 'c++14',
'ALWAYS_SEARCH_USER_PATHS': 'NO',
}