This commit is contained in:
Max Brunsfeld 2019-01-17 17:16:04 -08:00
parent cbcc61a8cf
commit ed195de8b6
15 changed files with 170 additions and 143 deletions

View file

@ -1,4 +1,4 @@
use std::{io, env, fs};
use std::{env, fs, io};
fn main() {
let git_sha = read_git_sha().unwrap();

View file

@ -456,60 +456,68 @@ impl<'a> ParseTableBuilder<'a> {
.unwrap();
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
let interpretions = conflicting_items.iter().enumerate().map(|(i, item)| {
let mut line = String::new();
write!(&mut line, " {}:", i + 1).unwrap();
let interpretions = conflicting_items
.iter()
.enumerate()
.map(|(i, item)| {
let mut line = String::new();
write!(&mut line, " {}:", i + 1).unwrap();
for preceding_symbol in preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index as usize)
{
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
}
write!(
&mut line,
" ({}",
&self.syntax_grammar.variables[item.variable_index as usize].name
)
.unwrap();
for (j, step) in item.production.steps.iter().enumerate() {
if j as u32 == item.step_index {
write!(&mut line, "").unwrap();
for preceding_symbol in preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index as usize)
{
write!(&mut line, " {}", self.symbol_name(preceding_symbol)).unwrap();
}
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
}
write!(&mut line, ")").unwrap();
if item.is_done() {
write!(
&mut line,
" • {} …",
self.symbol_name(&conflicting_lookahead)
" ({}",
&self.syntax_grammar.variables[item.variable_index as usize].name
)
.unwrap();
}
let precedence = item.precedence();
let associativity = item.associativity();
for (j, step) in item.production.steps.iter().enumerate() {
if j as u32 == item.step_index {
write!(&mut line, "").unwrap();
}
write!(&mut line, " {}", self.symbol_name(&step.symbol)).unwrap();
}
let prec_line = if let Some(associativity) = associativity {
Some(format!(
"(precedence: {}, associativity: {:?})",
precedence, associativity
))
} else if precedence > 0 {
Some(format!("(precedence: {})", precedence))
} else {
None
};
write!(&mut line, ")").unwrap();
(line, prec_line)
}).collect::<Vec<_>>();
if item.is_done() {
write!(
&mut line,
" • {} …",
self.symbol_name(&conflicting_lookahead)
)
.unwrap();
}
let max_interpretation_length = interpretions.iter().map(|i| i.0.chars().count()).max().unwrap();
let precedence = item.precedence();
let associativity = item.associativity();
let prec_line = if let Some(associativity) = associativity {
Some(format!(
"(precedence: {}, associativity: {:?})",
precedence, associativity
))
} else if precedence > 0 {
Some(format!("(precedence: {})", precedence))
} else {
None
};
(line, prec_line)
})
.collect::<Vec<_>>();
let max_interpretation_length = interpretions
.iter()
.map(|i| i.0.chars().count())
.max()
.unwrap();
for (line, prec_suffix) in interpretions {
msg += &line;

View file

@ -3,14 +3,15 @@ use crate::error::{Error, Result};
use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
use crate::generate::rules::Rule;
use regex::Regex;
use regex_syntax::ast::{
parse, Ast, Class, ClassPerlKind, ClassSet, ClassSetItem, RepetitionKind, RepetitionRange,
};
use regex::Regex;
use std::i32;
lazy_static! {
static ref CURLY_BRACE_REGEX: Regex = Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
static ref CURLY_BRACE_REGEX: Regex =
Regex::new(r#"(^|[^\\])\{([^}]*[^0-9,}][^}]*)\}"#).unwrap();
}
const ALLOWED_REDUNDANT_ESCAPED_CHARS: [char; 4] = ['!', '\'', '"', '/'];
@ -621,14 +622,9 @@ mod tests {
},
// nested groups
Row {
rules: vec![Rule::seq(vec![
Rule::pattern(r#"([^x\\]|\\(.|\n))+"#),
])],
rules: vec![Rule::seq(vec![Rule::pattern(r#"([^x\\]|\\(.|\n))+"#)])],
separators: vec![],
examples: vec![
("abcx", Some((0, "abc"))),
("abc\\0x", Some((0, "abc\\0"))),
],
examples: vec![("abcx", Some((0, "abc"))), ("abc\\0x", Some((0, "abc\\0")))],
},
// allowing unrecognized escape sequences
Row {
@ -660,7 +656,7 @@ mod tests {
("u{1234} ok", Some((0, "u{1234}"))),
("{aba}}", Some((1, "{aba}"))),
],
}
},
];
for Row {

View file

@ -1,5 +1,5 @@
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
@ -9,20 +9,34 @@ struct SymbolStatus {
pub(super) fn extract_simple_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
// Determine which symbols in the grammars are *always* aliased to a single name.
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list = vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list = vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol {
Symbol { kind: SymbolType::External, index} => &mut external_status_list[index],
Symbol { kind: SymbolType::NonTerminal, index} => &mut non_terminal_status_list[index],
Symbol { kind: SymbolType::Terminal, index} => &mut terminal_status_list[index],
Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
Symbol {
kind: SymbolType::External,
index,
} => &mut external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &mut non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &mut terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if step.alias.is_none() {
@ -47,10 +61,22 @@ pub(super) fn extract_simple_aliases(
for production in variable.productions.iter_mut() {
for step in production.steps.iter_mut() {
let status = match step.symbol {
Symbol { kind: SymbolType::External, index} => &external_status_list[index],
Symbol { kind: SymbolType::NonTerminal, index} => &non_terminal_status_list[index],
Symbol { kind: SymbolType::Terminal, index} => &terminal_status_list[index],
Symbol { kind: SymbolType::End, .. } => panic!("Unexpected end token"),
Symbol {
kind: SymbolType::External,
index,
} => &external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if status.alias.is_some() {
@ -83,7 +109,9 @@ pub(super) fn extract_simple_aliases(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType, Production, ProductionStep};
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
@ -93,35 +121,29 @@ mod tests {
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
},
],
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently than above.
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
},
],
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently than above.
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
}],
},
],
extra_tokens: Vec::new(),
@ -151,49 +173,50 @@ mod tests {
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
}
},
],
};
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(simple_aliases.len(), 1);
assert_eq!(simple_aliases[&Symbol::terminal(0)], Alias {
value: "a1".to_string(),
is_named: true,
});
assert_eq!(
simple_aliases[&Symbol::terminal(0)],
Alias {
value: "a1".to_string(),
is_named: true,
}
);
assert_eq!(syntax_grammar.variables, vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![
Production {
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// 'Simple' alias removed
ProductionStep::new(Symbol::terminal(0)),
// Other aliases unchanged
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
},
],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![
Production {
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
},
],
},
]);
},],
},
]
);
}
}

View file

@ -243,7 +243,7 @@ impl TokenExtractor {
Variable {
name: string_value.clone(),
kind: VariableType::Anonymous,
rule: rule.clone()
rule: rule.clone(),
}
} else {
self.current_variable_token_count += 1;

View file

@ -1,9 +1,9 @@
use super::ExtractedSyntaxGrammar;
use crate::error::{Error, Result};
use crate::generate::rules::Symbol;
use crate::generate::grammars::{
Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
};
use crate::generate::rules::Symbol;
use crate::generate::rules::{Alias, Associativity, Rule};
struct RuleFlattener {

View file

@ -7,9 +7,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
let interner = Interner { grammar };
if variable_type_for_name(&grammar.variables[0].name) == VariableType::Hidden {
return Err(Error(
"A grammar's start rule must be visible.".to_string(),
));
return Err(Error("A grammar's start rule must be visible.".to_string()));
}
let mut variables = Vec::with_capacity(grammar.variables.len());

View file

@ -178,7 +178,6 @@ impl Builder {
text: step.text_pattern.clone(),
state_id: 0,
},
// Include the rule id so that it can be used when sorting transitions.
item.rule_id,
));
@ -432,9 +431,8 @@ pub fn generate_property_sheets(repo_path: &Path) -> Result<()> {
let property_sheet_json_path = src_dir_path
.join(css_path.file_name().unwrap())
.with_extension("json");
let property_sheet_json_file = File::create(&property_sheet_json_path).map_err(|e|
format!("Failed to create {:?}: {}", property_sheet_json_path, e)
)?;
let property_sheet_json_file = File::create(&property_sheet_json_path)
.map_err(|e| format!("Failed to create {:?}: {}", property_sheet_json_path, e))?;
let mut writer = BufWriter::new(property_sheet_json_file);
serde_json::to_writer_pretty(&mut writer, &sheet)?;
}

View file

@ -563,7 +563,9 @@ impl Generator {
indent!(self);
for i in 0..self.syntax_grammar.external_tokens.len() {
let token = &self.syntax_grammar.external_tokens[i];
let id_token = token.corresponding_internal_token.unwrap_or(Symbol::external(i));
let id_token = token
.corresponding_internal_token
.unwrap_or(Symbol::external(i));
add_line!(
self,
"[{}] = {},",

View file

@ -59,10 +59,7 @@ pub(crate) enum Rule {
impl Rule {
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
add_metadata(content, move |params| {
params.alias = Some(Alias {
is_named,
value
});
params.alias = Some(Alias { is_named, value });
})
}

View file

@ -1,4 +1,4 @@
use super::fixtures::{get_language, get_test_language, fixtures_dir};
use super::fixtures::{fixtures_dir, get_language, get_test_language};
use crate::generate;
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
use crate::util;

View file

@ -1,7 +1,7 @@
use crate::loader::Loader;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::Language;
use std::fs;
lazy_static! {
static ref ROOT_DIR: PathBuf = [env!("CARGO_MANIFEST_DIR"), ".."].iter().collect();
@ -41,11 +41,6 @@ pub fn get_test_language(name: &str, parser_code: String, path: &Path) -> Langua
None
};
TEST_LOADER
.load_language_from_sources(
name,
&HEADER_DIR,
&parser_c_path,
&scanner_path,
)
.load_language_from_sources(name, &HEADER_DIR, &parser_c_path, &scanner_path)
.unwrap()
}

View file

@ -1,3 +1,3 @@
mod fixtures;
mod corpuses;
mod fixtures;
mod parser_api;

View file

@ -1,6 +1,6 @@
use super::fixtures::get_language;
use std::thread;
use tree_sitter::{InputEdit, LogType, Parser, Point, PropertySheet, Language};
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, PropertySheet};
fn rust() -> Language {
get_language("rust")

View file

@ -35,7 +35,11 @@ pub(crate) fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<Log
.take()
.expect("Failed to open stdin for Dot");
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(PathBuf::from(path), Some(dot_process), Some(dot_stdin)))
Ok(LogSession(
PathBuf::from(path),
Some(dot_process),
Some(dot_stdin),
))
}
#[cfg(unix)]
@ -46,11 +50,17 @@ impl Drop for LogSession {
drop(self.2.take().unwrap());
let output = self.1.take().unwrap().wait_with_output().unwrap();
if output.status.success() {
if cfg!(target_os = "macos") && fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64 {
if cfg!(target_os = "macos")
&& fs::metadata(&self.0).unwrap().len() > HTML_HEADER.len() as u64
{
Command::new("open").arg("log.html").output().unwrap();
}
} else {
eprintln!("Dot failed: {} {}", String::from_utf8_lossy(&output.stdout), String::from_utf8_lossy(&output.stderr));
eprintln!(
"Dot failed: {} {}",
String::from_utf8_lossy(&output.stdout),
String::from_utf8_lossy(&output.stderr)
);
}
}
}