In lex error state, don't look for tokens that would match *any* line

This commit is contained in:
Max Brunsfeld 2015-10-28 17:45:17 -07:00
parent dba0726eef
commit a8ead10d6f
9 changed files with 3279 additions and 2626 deletions

View file

@ -13,6 +13,7 @@
'src/compiler/build_tables/build_lex_table.cc',
'src/compiler/build_tables/build_parse_table.cc',
'src/compiler/build_tables/build_tables.cc',
'src/compiler/build_tables/does_match_any_line.cc',
'src/compiler/build_tables/get_completion_status.cc',
'src/compiler/build_tables/get_metadata.cc',
'src/compiler/build_tables/item_set_closure.cc',

View file

@ -0,0 +1,41 @@
#include "compiler/compiler_spec_helper.h"
#include "compiler/rules/character_set.h"
#include "compiler/build_tables/does_match_any_line.h"
using namespace rules;
using namespace build_tables;
START_TEST
describe("does_match_any_line(rule)", []() {
it("returns true for rules that match any sequence of characters on a line", [&]() {
rule_ptr rule = character({}, false);
AssertThat(does_match_any_line(rule), IsFalse());
rule = repeat(character({}, false));
AssertThat(does_match_any_line(rule), IsTrue());
rule = repeat(character({}, false));
AssertThat(does_match_any_line(rule), IsTrue());
rule = choice({ repeat(character({}, false)), str("x") });
AssertThat(does_match_any_line(rule), IsTrue());
rule = repeat(choice({ character({}, false), str("x") }));
AssertThat(does_match_any_line(rule), IsTrue());
rule = choice({ str("y"), str("x") });
AssertThat(does_match_any_line(rule), IsFalse());
rule = seq({ repeat(character({}, false)), repeat(character({}, false)) });
AssertThat(does_match_any_line(rule), IsTrue());
rule = seq({ repeat(character({}, false)), str("x") });
AssertThat(does_match_any_line(rule), IsFalse());
rule = repeat(character({0, '\n'}, false));
AssertThat(does_match_any_line(rule), IsTrue());
});
});
END_TEST

13
spec/fixtures/corpus/c/errors.txt vendored Normal file
View file

@ -0,0 +1,13 @@
==========================================
errors in compound statements
==========================================
int main() { %%% }
---
(program (function_definition
(type_name (identifier))
(declarator (identifier))
(compound_statement
(ERROR (UNEXPECTED '%')))))

13
spec/fixtures/corpus/c/preprocesser.txt vendored Normal file
View file

@ -0,0 +1,13 @@
=============================================
#defines
=============================================
#define THING abc def \
ghi jkl
#define OTHER_THING mno
---
(program
(preproc_define (identifier))
(preproc_define (identifier)))

View file

@ -6,9 +6,15 @@ namespace tree_sitter_examples {
// http://slps.github.io/zoo/c/iso-9899-tc3.html
extern const Grammar c = Grammar({
{ "program", choice({
{ "program", repeat(choice({
sym("preproc_define"),
sym("function_definition"),
sym("declaration") }) },
sym("declaration") })) },
{ "preproc_define", seq({
str("#define"),
sym("identifier"),
token(repeat(choice({ str("\\\n"), pattern(".") }))) }) },
{ "function_definition", seq({
optional(sym("declaration_specifiers")),
@ -126,7 +132,7 @@ extern const Grammar c = Grammar({
{ "compound_statement", seq({
str("{"),
repeat(choice({ sym("declaration"), sym("statement") })),
err(repeat(choice({ sym("declaration"), sym("statement") }))),
str("}") }) },
{ "expression", choice({

File diff suppressed because it is too large Load diff

View file

@ -9,6 +9,7 @@
#include "compiler/build_tables/get_completion_status.h"
#include "compiler/build_tables/get_metadata.h"
#include "compiler/build_tables/lex_item.h"
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/parse_table.h"
#include "compiler/lexical_grammar.h"
#include "compiler/rules/built_in_symbols.h"
@ -50,18 +51,18 @@ class LexTableBuilder {
LexTable build() {
for (ParseState &parse_state : parse_table->states) {
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs());
LexItemSet item_set = build_lex_item_set(parse_state.expected_inputs(), false);
parse_state.lex_state_id = add_lex_state(item_set);
}
LexItemSet error_item_set = build_lex_item_set(parse_table->symbols);
LexItemSet error_item_set = build_lex_item_set(parse_table->symbols, true);
populate_lex_state(error_item_set, LexTable::ERROR_STATE_ID);
return lex_table;
}
private:
LexItemSet build_lex_item_set(const set<Symbol> &symbols) {
LexItemSet build_lex_item_set(const set<Symbol> &symbols, bool error) {
LexItemSet result;
for (const Symbol &symbol : symbols) {
vector<rule_ptr> rules;
@ -71,6 +72,9 @@ class LexTableBuilder {
rules.push_back(CharacterSet().include(0).copy());
} else if (symbol.is_token) {
rule_ptr rule = lex_grammar.variables[symbol.index].rule;
if (error && does_match_any_line(rule))
continue;
auto choice = rule->as<rules::Choice>();
if (choice)
for (const rule_ptr &element : choice->elements)

View file

@ -0,0 +1,62 @@
#include "compiler/build_tables/does_match_any_line.h"
#include "compiler/rules/choice.h"
#include "compiler/rules/character_set.h"
#include "compiler/rules/repeat.h"
#include "compiler/rules/visitor.h"
#include "compiler/rules/seq.h"
#include "compiler/rules/metadata.h"
namespace tree_sitter {
namespace build_tables {
class DoesTokenCatchAnyCharacter : public rules::RuleFn<bool> {
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
bool apply_to(const rules::CharacterSet *rule) {
if (rule->includes_all) {
for (uint32_t character : rule->excluded_chars) {
if (character != 0 && character != '\n')
return false;
}
return true;
}
return false;
}
};
class DoesTokenCatchAll : public rules::RuleFn<bool> {
bool apply_to(const rules::Repeat *rule) {
return DoesTokenCatchAnyCharacter().apply(rule->content);
}
bool apply_to(const rules::Metadata *rule) {
return apply(rule->rule);
}
bool apply_to(const rules::Choice *rule) {
for (const rule_ptr &element : rule->elements)
if (apply(element))
return true;
return false;
}
bool apply_to(const rules::Seq *rule) {
return apply(rule->left) && apply(rule->right);
}
};
bool does_match_any_line(const rule_ptr &rule) {
return DoesTokenCatchAll().apply(rule);
}
} // namespace build_tables
} // namespace tree_sitter

View file

@ -0,0 +1,14 @@
#ifndef COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
#define COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_
#include "tree_sitter/compiler.h"
namespace tree_sitter {
namespace build_tables {
bool does_match_any_line(const rule_ptr &);
} // namespace build_tables
} // namespace tree_sitter
#endif // COMPILER_BUILD_TABLES_DOES_MATCH_ANY_LINE_H_