Make ts_compile_grammar take an optional log file, start logging to it

This commit is contained in:
Max Brunsfeld 2018-05-24 16:01:14 -07:00
parent 69d8c6f5e6
commit 6fca8f2f4d
13 changed files with 164 additions and 25 deletions

View file

@ -5,6 +5,8 @@
extern "C" {
#endif
#include <stdio.h>
typedef enum {
TSCompileErrorTypeNone,
TSCompileErrorTypeInvalidGrammar,
@ -25,7 +27,7 @@ typedef struct {
TSCompileErrorType error_type;
} TSCompileResult;
TSCompileResult ts_compile_grammar(const char *input);
TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
#ifdef __cplusplus
}

View file

@ -22,6 +22,7 @@
'src/compiler/compile.cc',
'src/compiler/generate_code/c_code.cc',
'src/compiler/lex_table.cc',
'src/compiler/log.cc',
'src/compiler/parse_grammar.cc',
'src/compiler/parse_table.cc',
'src/compiler/precedence_range.cc',

View file

@ -9,9 +9,11 @@
#include <vector>
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/lookahead_set.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/log.h"
#include "compiler/parse_table.h"
#include "compiler/rule.h"
#include "utf8proc.h"
namespace tree_sitter {
namespace build_tables {
@ -78,6 +80,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
bool conflict_detection_mode;
LookaheadSet keyword_symbols;
Symbol keyword_capture_token;
char encoding_buffer[8];
public:
LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
@ -151,6 +154,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
// For each pair of tokens, generate a lex table for just those two tokens and record what
// conflicts arise.
LOG_START("detecting conflicts between tokens");
conflict_detection_mode = true;
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
for (Symbol::Index j = 0; j < i; j++) {
@ -165,6 +169,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
}
}
}
LOG_END();
// Find a 'keyword capture token' that matches all of the indentified keywords.
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
@ -304,9 +309,33 @@ class LexTableBuilderImpl : public LexTableBuilder {
if (prefer_advancing && !next_item_set_can_yield_this_token) {
auto advance_symbol = transition.destination.entries.begin()->lhs;
if (characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
characters.intersects(separator_start_characters)) {
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerStringWithValidNextChar);
auto &following_chars = following_characters_by_token[accept_action.symbol.index];
CharacterSet conflicting_following_chars = characters.intersection(following_chars);
CharacterSet conflicting_sep_chars = characters.intersection(separator_start_characters);
if (!conflicting_following_chars.is_empty()) {
LOG(
"%s shadows %s followed by '%s'",
token_name(advance_symbol).c_str(),
token_name(accept_action.symbol).c_str(),
log_char(*conflicting_following_chars.included_chars.begin())
);
record_conflict(
accept_action.symbol,
advance_symbol,
MatchesLongerStringWithValidNextChar
);
} else if (!conflicting_sep_chars.is_empty()) {
LOG(
"%s shadows %s followed by '%s'",
token_name(advance_symbol).c_str(),
token_name(accept_action.symbol).c_str(),
log_char(*conflicting_sep_chars.included_chars.begin())
);
record_conflict(
accept_action.symbol,
advance_symbol,
MatchesLongerStringWithValidNextChar
);
} else {
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerString);
}
@ -508,8 +537,22 @@ class LexTableBuilderImpl : public LexTableBuilder {
main_lex_state_ids.clear();
}
const string &token_name(rules::Symbol &symbol) {
return grammar.variables[symbol.index].name;
string token_name(rules::Symbol &symbol) {
const LexicalVariable &variable = grammar.variables[symbol.index];
if (variable.type == VariableTypeNamed) {
return variable.name;
} else {
return "'" + variable.name + "'";
}
}
const char *log_char(int32_t character) {
uint32_t count = utf8proc_encode_char(
character,
reinterpret_cast<utf8proc_uint8_t *>(encoding_buffer)
);
encoding_buffer[count] = 0;
return encoding_buffer;
}
};

View file

@ -6,6 +6,7 @@
#include <string>
#include <unordered_map>
#include <utility>
#include "compiler/log.h"
#include "compiler/parse_table.h"
#include "compiler/build_tables/parse_item.h"
#include "compiler/build_tables/parse_item_set_builder.h"
@ -152,8 +153,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
parse_table.states[state_id].terminal_entries.clear();
// Add all the tokens that have no conflict with other tokens.
LookaheadSet non_conflicting_tokens;
// First, identify the conflict-free tokens.
LookaheadSet conflict_free_tokens;
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol token = Symbol::terminal(i);
bool conflicts_with_other_tokens = false;
@ -166,27 +167,41 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
break;
}
}
if (!conflicts_with_other_tokens) non_conflicting_tokens.insert(token);
if (!conflicts_with_other_tokens) conflict_free_tokens.insert(token);
}
// Include in the error recover state all of the tokens that are either
// conflict-free themselves, or have no conflicts with any conflict-free
// tokens.
LOG_START("finding non-conflicting tokens for error recovery");
LookaheadSet tokens;
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
Symbol token = Symbol::terminal(i);
bool conflicts_with_other_tokens = false;
if (!non_conflicting_tokens.contains(token)) {
non_conflicting_tokens.for_each([&](Symbol other_token) {
if (conflict_free_tokens.contains(token)) {
LOG("include %s", symbol_name(token).c_str());
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
} else {
bool conflicts_with_other_tokens = false;
conflict_free_tokens.for_each([&](Symbol other_token) {
if (!coincident_tokens_by_token[token.index].contains(other_token) &&
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
LOG(
"exclude %s: conflicts with %s",
symbol_name(token).c_str(),
symbol_name(other_token).c_str()
);
conflicts_with_other_tokens = true;
return false;
}
return true;
});
}
if (!conflicts_with_other_tokens) {
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
if (!conflicts_with_other_tokens) {
LOG("include %s", symbol_name(token).c_str());
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
}
}
}
LOG_END();
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
if (grammar.external_tokens[i].corresponding_internal_token == rules::NONE()) {

View file

@ -3,6 +3,7 @@
#include "compiler/build_tables/parse_table_builder.h"
#include "compiler/generate_code/c_code.h"
#include "compiler/syntax_grammar.h"
#include "compiler/log.h"
#include "compiler/lexical_grammar.h"
#include "compiler/parse_grammar.h"
#include "json.h"
@ -16,7 +17,9 @@ using std::vector;
using std::get;
using std::make_tuple;
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) {
set_log_file(log_file);
ParseGrammarResult parse_result = parse_grammar(string(input));
if (!parse_result.error_message.empty()) {
return { nullptr, strdup(parse_result.error_message.c_str()),
@ -48,8 +51,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
move(lexical_grammar)
);
return {
strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
set_log_file(nullptr);
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
}
} // namespace tree_sitter

31
src/compiler/log.cc Normal file
View file

@ -0,0 +1,31 @@
#include "compiler/log.h"
static const char *SPACES = " ";
namespace tree_sitter {
thread_local unsigned _indent_level = 0;
thread_local FILE *_log_file = nullptr;
void set_log_file(FILE *file) {
_log_file = file;
_indent_level = 0;
}
FILE *get_log_file() {
return _log_file;
}
void _indent_logs() {
_indent_level++;
}
void _outdent_logs() {
_indent_level--;
}
void _print_indent() {
fwrite(SPACES, 1, _indent_level * 2, _log_file);
}
}

38
src/compiler/log.h Normal file
View file

@ -0,0 +1,38 @@
#ifndef COMPILER_LOG_H_
#define COMPILER_LOG_H_
#include <stdio.h>
namespace tree_sitter {
void set_log_file(FILE *);
FILE *get_log_file();
void _indent_logs();
void _outdent_logs();
void _print_indent();
#define LOG_START(...) \
do { \
LOG(__VA_ARGS__); \
_indent_logs(); \
} while (0)
#define LOG_END(...) \
do { \
_outdent_logs(); \
LOG(""); \
} while (0)
#define LOG(...) \
do { \
FILE *f = get_log_file(); \
if (f) { \
_print_indent(); \
fprintf(f, __VA_ARGS__); \
fputs("\n", f); \
} \
} while (0)
} // namespace tree_sitter
#endif // COMPILER_LOG_H_

View file

@ -159,6 +159,11 @@ bool CharacterSet::intersects(const CharacterSet &other) const {
return !copy.remove_set(other).is_empty();
}
CharacterSet CharacterSet::intersection(const CharacterSet &other) const {
CharacterSet copy(*this);
return copy.remove_set(other);
}
vector<CharacterRange> CharacterSet::included_ranges() const {
return consolidate_ranges(included_chars);
}

View file

@ -35,6 +35,7 @@ struct CharacterSet {
void add_set(const CharacterSet &other);
CharacterSet remove_set(const CharacterSet &other);
CharacterSet intersection(const CharacterSet &other) const;
bool intersects(const CharacterSet &other) const;
bool is_empty() const;
@ -49,4 +50,4 @@ struct CharacterSet {
} // namespace rules
} // namespace tree_sitter
#endif // COMPILER_RULES_CHARACTER_SET_H_
#endif // COMPILER_RULES_CHARACTER_SET_H_

View file

@ -223,7 +223,7 @@ const TSLanguage *load_real_language(const string &language_name) {
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
string grammar_json = read_file(grammar_filename);
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult result = ts_compile_grammar(grammar_json.c_str(), nullptr);
if (result.error_type != TSCompileErrorTypeNone) {
fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
return nullptr;

View file

@ -27,7 +27,7 @@ for (auto &language_name : test_languages) {
if (file_exists(expected_error_path)) {
it("fails with the correct error message", [&]() {
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
string expected_error = read_file(expected_error_path);
AssertThat((void *)compile_result.error_message, !Equals<void *>(nullptr));
AssertThat(compile_result.error_message, Equals(expected_error));
@ -43,7 +43,7 @@ for (auto &language_name : test_languages) {
string external_scanner_path = join_path({directory_path, "scanner.c"});
if (!file_exists(external_scanner_path)) external_scanner_path = "";
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
language = load_test_language(
language_name,

View file

@ -26,7 +26,7 @@ describe("Language", []() {
"value": "b"
}
}
})JSON");
})JSON", nullptr);
TSParser *parser = ts_parser_new();
const TSLanguage *language = load_test_language("aliased_rules", compile_result);

View file

@ -71,7 +71,7 @@ string grammar_with_aliases_and_extras = R"JSON({
const TSLanguage *language_with_aliases_and_extras = load_test_language(
"aliases_and_extras",
ts_compile_grammar(grammar_with_aliases_and_extras.c_str())
ts_compile_grammar(grammar_with_aliases_and_extras.c_str(), nullptr)
);
describe("Node", [&]() {