Make ts_compile_grammar take an optional log file, start logging to it
This commit is contained in:
parent
69d8c6f5e6
commit
6fca8f2f4d
13 changed files with 164 additions and 25 deletions
|
|
@ -5,6 +5,8 @@
|
|||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
typedef enum {
|
||||
TSCompileErrorTypeNone,
|
||||
TSCompileErrorTypeInvalidGrammar,
|
||||
|
|
@ -25,7 +27,7 @@ typedef struct {
|
|||
TSCompileErrorType error_type;
|
||||
} TSCompileResult;
|
||||
|
||||
TSCompileResult ts_compile_grammar(const char *input);
|
||||
TSCompileResult ts_compile_grammar(const char *input, FILE *log_file);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@
|
|||
'src/compiler/compile.cc',
|
||||
'src/compiler/generate_code/c_code.cc',
|
||||
'src/compiler/lex_table.cc',
|
||||
'src/compiler/log.cc',
|
||||
'src/compiler/parse_grammar.cc',
|
||||
'src/compiler/parse_table.cc',
|
||||
'src/compiler/precedence_range.cc',
|
||||
|
|
|
|||
|
|
@ -9,9 +9,11 @@
|
|||
#include <vector>
|
||||
#include "compiler/build_tables/lex_item.h"
|
||||
#include "compiler/build_tables/lookahead_set.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/log.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/rule.h"
|
||||
#include "utf8proc.h"
|
||||
|
||||
namespace tree_sitter {
|
||||
namespace build_tables {
|
||||
|
|
@ -78,6 +80,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
bool conflict_detection_mode;
|
||||
LookaheadSet keyword_symbols;
|
||||
Symbol keyword_capture_token;
|
||||
char encoding_buffer[8];
|
||||
|
||||
public:
|
||||
LexTableBuilderImpl(const SyntaxGrammar &syntax_grammar,
|
||||
|
|
@ -151,6 +154,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
|
||||
// For each pair of tokens, generate a lex table for just those two tokens and record what
|
||||
// conflicts arise.
|
||||
LOG_START("detecting conflicts between tokens");
|
||||
conflict_detection_mode = true;
|
||||
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
for (Symbol::Index j = 0; j < i; j++) {
|
||||
|
|
@ -165,6 +169,7 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
}
|
||||
}
|
||||
}
|
||||
LOG_END();
|
||||
|
||||
// Find a 'keyword capture token' that matches all of the indentified keywords.
|
||||
for (Symbol::Index i = 0, n = grammar.variables.size(); i < n; i++) {
|
||||
|
|
@ -304,9 +309,33 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
|
||||
if (prefer_advancing && !next_item_set_can_yield_this_token) {
|
||||
auto advance_symbol = transition.destination.entries.begin()->lhs;
|
||||
if (characters.intersects(following_characters_by_token[accept_action.symbol.index]) ||
|
||||
characters.intersects(separator_start_characters)) {
|
||||
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerStringWithValidNextChar);
|
||||
auto &following_chars = following_characters_by_token[accept_action.symbol.index];
|
||||
CharacterSet conflicting_following_chars = characters.intersection(following_chars);
|
||||
CharacterSet conflicting_sep_chars = characters.intersection(separator_start_characters);
|
||||
if (!conflicting_following_chars.is_empty()) {
|
||||
LOG(
|
||||
"%s shadows %s followed by '%s'",
|
||||
token_name(advance_symbol).c_str(),
|
||||
token_name(accept_action.symbol).c_str(),
|
||||
log_char(*conflicting_following_chars.included_chars.begin())
|
||||
);
|
||||
record_conflict(
|
||||
accept_action.symbol,
|
||||
advance_symbol,
|
||||
MatchesLongerStringWithValidNextChar
|
||||
);
|
||||
} else if (!conflicting_sep_chars.is_empty()) {
|
||||
LOG(
|
||||
"%s shadows %s followed by '%s'",
|
||||
token_name(advance_symbol).c_str(),
|
||||
token_name(accept_action.symbol).c_str(),
|
||||
log_char(*conflicting_sep_chars.included_chars.begin())
|
||||
);
|
||||
record_conflict(
|
||||
accept_action.symbol,
|
||||
advance_symbol,
|
||||
MatchesLongerStringWithValidNextChar
|
||||
);
|
||||
} else {
|
||||
record_conflict(accept_action.symbol, advance_symbol, MatchesLongerString);
|
||||
}
|
||||
|
|
@ -508,8 +537,22 @@ class LexTableBuilderImpl : public LexTableBuilder {
|
|||
main_lex_state_ids.clear();
|
||||
}
|
||||
|
||||
const string &token_name(rules::Symbol &symbol) {
|
||||
return grammar.variables[symbol.index].name;
|
||||
string token_name(rules::Symbol &symbol) {
|
||||
const LexicalVariable &variable = grammar.variables[symbol.index];
|
||||
if (variable.type == VariableTypeNamed) {
|
||||
return variable.name;
|
||||
} else {
|
||||
return "'" + variable.name + "'";
|
||||
}
|
||||
}
|
||||
|
||||
const char *log_char(int32_t character) {
|
||||
uint32_t count = utf8proc_encode_char(
|
||||
character,
|
||||
reinterpret_cast<utf8proc_uint8_t *>(encoding_buffer)
|
||||
);
|
||||
encoding_buffer[count] = 0;
|
||||
return encoding_buffer;
|
||||
}
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@
|
|||
#include <string>
|
||||
#include <unordered_map>
|
||||
#include <utility>
|
||||
#include "compiler/log.h"
|
||||
#include "compiler/parse_table.h"
|
||||
#include "compiler/build_tables/parse_item.h"
|
||||
#include "compiler/build_tables/parse_item_set_builder.h"
|
||||
|
|
@ -152,8 +153,8 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
|
||||
parse_table.states[state_id].terminal_entries.clear();
|
||||
|
||||
// Add all the tokens that have no conflict with other tokens.
|
||||
LookaheadSet non_conflicting_tokens;
|
||||
// First, identify the conflict-free tokens.
|
||||
LookaheadSet conflict_free_tokens;
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
bool conflicts_with_other_tokens = false;
|
||||
|
|
@ -166,27 +167,41 @@ class ParseTableBuilderImpl : public ParseTableBuilder {
|
|||
break;
|
||||
}
|
||||
}
|
||||
if (!conflicts_with_other_tokens) non_conflicting_tokens.insert(token);
|
||||
if (!conflicts_with_other_tokens) conflict_free_tokens.insert(token);
|
||||
}
|
||||
|
||||
// Include in the error recover state all of the tokens that are either
|
||||
// conflict-free themselves, or have no conflicts with any conflict-free
|
||||
// tokens.
|
||||
LOG_START("finding non-conflicting tokens for error recovery");
|
||||
LookaheadSet tokens;
|
||||
for (unsigned i = 0; i < lexical_grammar.variables.size(); i++) {
|
||||
Symbol token = Symbol::terminal(i);
|
||||
bool conflicts_with_other_tokens = false;
|
||||
if (!non_conflicting_tokens.contains(token)) {
|
||||
non_conflicting_tokens.for_each([&](Symbol other_token) {
|
||||
if (conflict_free_tokens.contains(token)) {
|
||||
LOG("include %s", symbol_name(token).c_str());
|
||||
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
|
||||
} else {
|
||||
bool conflicts_with_other_tokens = false;
|
||||
conflict_free_tokens.for_each([&](Symbol other_token) {
|
||||
if (!coincident_tokens_by_token[token.index].contains(other_token) &&
|
||||
(lex_table_builder->get_conflict_status(other_token, token) & CannotMerge)) {
|
||||
LOG(
|
||||
"exclude %s: conflicts with %s",
|
||||
symbol_name(token).c_str(),
|
||||
symbol_name(other_token).c_str()
|
||||
);
|
||||
conflicts_with_other_tokens = true;
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
});
|
||||
}
|
||||
if (!conflicts_with_other_tokens) {
|
||||
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
|
||||
if (!conflicts_with_other_tokens) {
|
||||
LOG("include %s", symbol_name(token).c_str());
|
||||
parse_table.add_terminal_action(state_id, token, ParseAction::Recover());
|
||||
}
|
||||
}
|
||||
}
|
||||
LOG_END();
|
||||
|
||||
for (size_t i = 0; i < grammar.external_tokens.size(); i++) {
|
||||
if (grammar.external_tokens[i].corresponding_internal_token == rules::NONE()) {
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
#include "compiler/build_tables/parse_table_builder.h"
|
||||
#include "compiler/generate_code/c_code.h"
|
||||
#include "compiler/syntax_grammar.h"
|
||||
#include "compiler/log.h"
|
||||
#include "compiler/lexical_grammar.h"
|
||||
#include "compiler/parse_grammar.h"
|
||||
#include "json.h"
|
||||
|
|
@ -16,7 +17,9 @@ using std::vector;
|
|||
using std::get;
|
||||
using std::make_tuple;
|
||||
|
||||
extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
||||
extern "C" TSCompileResult ts_compile_grammar(const char *input, FILE *log_file) {
|
||||
set_log_file(log_file);
|
||||
|
||||
ParseGrammarResult parse_result = parse_grammar(string(input));
|
||||
if (!parse_result.error_message.empty()) {
|
||||
return { nullptr, strdup(parse_result.error_message.c_str()),
|
||||
|
|
@ -48,8 +51,8 @@ extern "C" TSCompileResult ts_compile_grammar(const char *input) {
|
|||
move(lexical_grammar)
|
||||
);
|
||||
|
||||
return {
|
||||
strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
|
||||
set_log_file(nullptr);
|
||||
return { strdup(code.c_str()), nullptr, TSCompileErrorTypeNone };
|
||||
}
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
|
|
|||
31
src/compiler/log.cc
Normal file
31
src/compiler/log.cc
Normal file
|
|
@ -0,0 +1,31 @@
|
|||
#include "compiler/log.h"
|
||||
|
||||
static const char *SPACES = " ";
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
thread_local unsigned _indent_level = 0;
|
||||
thread_local FILE *_log_file = nullptr;
|
||||
|
||||
void set_log_file(FILE *file) {
|
||||
_log_file = file;
|
||||
_indent_level = 0;
|
||||
}
|
||||
|
||||
FILE *get_log_file() {
|
||||
return _log_file;
|
||||
}
|
||||
|
||||
void _indent_logs() {
|
||||
_indent_level++;
|
||||
}
|
||||
|
||||
void _outdent_logs() {
|
||||
_indent_level--;
|
||||
}
|
||||
|
||||
void _print_indent() {
|
||||
fwrite(SPACES, 1, _indent_level * 2, _log_file);
|
||||
}
|
||||
|
||||
}
|
||||
38
src/compiler/log.h
Normal file
38
src/compiler/log.h
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
#ifndef COMPILER_LOG_H_
|
||||
#define COMPILER_LOG_H_
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
namespace tree_sitter {
|
||||
|
||||
void set_log_file(FILE *);
|
||||
FILE *get_log_file();
|
||||
void _indent_logs();
|
||||
void _outdent_logs();
|
||||
void _print_indent();
|
||||
|
||||
#define LOG_START(...) \
|
||||
do { \
|
||||
LOG(__VA_ARGS__); \
|
||||
_indent_logs(); \
|
||||
} while (0)
|
||||
|
||||
#define LOG_END(...) \
|
||||
do { \
|
||||
_outdent_logs(); \
|
||||
LOG(""); \
|
||||
} while (0)
|
||||
|
||||
#define LOG(...) \
|
||||
do { \
|
||||
FILE *f = get_log_file(); \
|
||||
if (f) { \
|
||||
_print_indent(); \
|
||||
fprintf(f, __VA_ARGS__); \
|
||||
fputs("\n", f); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_LOG_H_
|
||||
|
|
@ -159,6 +159,11 @@ bool CharacterSet::intersects(const CharacterSet &other) const {
|
|||
return !copy.remove_set(other).is_empty();
|
||||
}
|
||||
|
||||
CharacterSet CharacterSet::intersection(const CharacterSet &other) const {
|
||||
CharacterSet copy(*this);
|
||||
return copy.remove_set(other);
|
||||
}
|
||||
|
||||
vector<CharacterRange> CharacterSet::included_ranges() const {
|
||||
return consolidate_ranges(included_chars);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,7 @@ struct CharacterSet {
|
|||
|
||||
void add_set(const CharacterSet &other);
|
||||
CharacterSet remove_set(const CharacterSet &other);
|
||||
CharacterSet intersection(const CharacterSet &other) const;
|
||||
bool intersects(const CharacterSet &other) const;
|
||||
bool is_empty() const;
|
||||
|
||||
|
|
@ -49,4 +50,4 @@ struct CharacterSet {
|
|||
} // namespace rules
|
||||
} // namespace tree_sitter
|
||||
|
||||
#endif // COMPILER_RULES_CHARACTER_SET_H_
|
||||
#endif // COMPILER_RULES_CHARACTER_SET_H_
|
||||
|
|
|
|||
|
|
@ -223,7 +223,7 @@ const TSLanguage *load_real_language(const string &language_name) {
|
|||
printf("\n" "Regenerating the %s parser...\n", language_name.c_str());
|
||||
|
||||
string grammar_json = read_file(grammar_filename);
|
||||
TSCompileResult result = ts_compile_grammar(grammar_json.c_str());
|
||||
TSCompileResult result = ts_compile_grammar(grammar_json.c_str(), nullptr);
|
||||
if (result.error_type != TSCompileErrorTypeNone) {
|
||||
fprintf(stderr, "Failed to compile %s grammar: %s\n", language_name.c_str(), result.error_message);
|
||||
return nullptr;
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ for (auto &language_name : test_languages) {
|
|||
|
||||
if (file_exists(expected_error_path)) {
|
||||
it("fails with the correct error message", [&]() {
|
||||
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
|
||||
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
|
||||
string expected_error = read_file(expected_error_path);
|
||||
AssertThat((void *)compile_result.error_message, !Equals<void *>(nullptr));
|
||||
AssertThat(compile_result.error_message, Equals(expected_error));
|
||||
|
|
@ -43,7 +43,7 @@ for (auto &language_name : test_languages) {
|
|||
string external_scanner_path = join_path({directory_path, "scanner.c"});
|
||||
if (!file_exists(external_scanner_path)) external_scanner_path = "";
|
||||
|
||||
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str());
|
||||
TSCompileResult compile_result = ts_compile_grammar(grammar_json.c_str(), nullptr);
|
||||
|
||||
language = load_test_language(
|
||||
language_name,
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ describe("Language", []() {
|
|||
"value": "b"
|
||||
}
|
||||
}
|
||||
})JSON");
|
||||
})JSON", nullptr);
|
||||
|
||||
TSParser *parser = ts_parser_new();
|
||||
const TSLanguage *language = load_test_language("aliased_rules", compile_result);
|
||||
|
|
|
|||
|
|
@ -71,7 +71,7 @@ string grammar_with_aliases_and_extras = R"JSON({
|
|||
|
||||
const TSLanguage *language_with_aliases_and_extras = load_test_language(
|
||||
"aliases_and_extras",
|
||||
ts_compile_grammar(grammar_with_aliases_and_extras.c_str())
|
||||
ts_compile_grammar(grammar_with_aliases_and_extras.c_str(), nullptr)
|
||||
);
|
||||
|
||||
describe("Node", [&]() {
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue