diff --git a/cli/src/generate/build_tables/build_lex_table.rs b/cli/src/generate/build_tables/build_lex_table.rs index be265b7a..e5f78921 100644 --- a/cli/src/generate/build_tables/build_lex_table.rs +++ b/cli/src/generate/build_tables/build_lex_table.rs @@ -18,7 +18,6 @@ pub(crate) fn build_lex_table( keywords: &TokenSet, coincident_token_index: &CoincidentTokenIndex, token_conflict_map: &TokenConflictMap, - minimize: bool, ) -> (LexTable, LexTable) { let keyword_lex_table; if syntax_grammar.word_token.is_some() { @@ -78,12 +77,8 @@ pub(crate) fn build_lex_table( } let mut table = builder.table; - - if minimize { - minimize_lex_table(&mut table, parse_table); - sort_states(&mut table, parse_table); - } - + minimize_lex_table(&mut table, parse_table); + sort_states(&mut table, parse_table); (table, keyword_lex_table) } diff --git a/cli/src/generate/build_tables/mod.rs b/cli/src/generate/build_tables/mod.rs index 54356d8d..78c26045 100644 --- a/cli/src/generate/build_tables/mod.rs +++ b/cli/src/generate/build_tables/mod.rs @@ -26,7 +26,6 @@ pub(crate) fn build_tables( simple_aliases: &AliasMap, variable_info: &Vec, inlines: &InlinedProductionMap, - minimize: bool, ) -> Result<(ParseTable, LexTable, LexTable, Option)> { let (mut parse_table, following_tokens) = build_parse_table(syntax_grammar, lexical_grammar, inlines, variable_info)?; @@ -48,16 +47,14 @@ pub(crate) fn build_tables( &keywords, ); populate_used_symbols(&mut parse_table, syntax_grammar, lexical_grammar); - if minimize { - minimize_parse_table( - &mut parse_table, - syntax_grammar, - lexical_grammar, - simple_aliases, - &token_conflict_map, - &keywords, - ); - } + minimize_parse_table( + &mut parse_table, + syntax_grammar, + lexical_grammar, + simple_aliases, + &token_conflict_map, + &keywords, + ); let (main_lex_table, keyword_lex_table) = build_lex_table( &mut parse_table, syntax_grammar, @@ -65,7 +62,6 @@ pub(crate) fn build_tables( &keywords, &coincident_token_index, &token_conflict_map, - minimize, ); mark_fragile_tokens(&mut parse_table, lexical_grammar, &token_conflict_map); Ok(( diff --git a/cli/src/generate/mod.rs b/cli/src/generate/mod.rs index e4daa071..07788a02 100644 --- a/cli/src/generate/mod.rs +++ b/cli/src/generate/mod.rs @@ -1,15 +1,3 @@ -use self::build_tables::build_tables; -use self::parse_grammar::parse_grammar; -use self::prepare_grammar::prepare_grammar; -use self::render::render_c_code; -use crate::error::{Error, Result}; -use lazy_static::lazy_static; -use regex::{Regex, RegexBuilder}; -use std::fs; -use std::io::Write; -use std::path::{Path, PathBuf}; -use std::process::{Command, Stdio}; - mod build_tables; mod dedup; mod grammars; @@ -18,10 +6,26 @@ mod node_types; mod npm_files; pub mod parse_grammar; mod prepare_grammar; +pub mod properties; mod render; mod rules; mod tables; +use self::build_tables::build_tables; +use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType}; +use self::parse_grammar::parse_grammar; +use self::prepare_grammar::prepare_grammar; +use self::render::render_c_code; +use self::rules::AliasMap; +use crate::error::{Error, Result}; +use lazy_static::lazy_static; +use regex::{Regex, RegexBuilder}; +use std::collections::HashSet; +use std::fs::{self, File}; +use std::io::{BufWriter, Write}; +use std::path::{Path, PathBuf}; +use std::process::{Command, Stdio}; + lazy_static! { static ref JSON_COMMENT_REGEX: Regex = RegexBuilder::new("^\\s*//.*") .multi_line(true) @@ -30,7 +34,6 @@ lazy_static! { } struct GeneratedParser { - name: String, c_code: String, node_types_json: String, } @@ -38,14 +41,17 @@ struct GeneratedParser { pub fn generate_parser_in_directory( repo_path: &PathBuf, grammar_path: Option<&str>, - minimize: bool, + properties_only: bool, ) -> Result<()> { - let repo_src_path = repo_path.join("src"); - let repo_header_path = repo_src_path.join("tree_sitter"); + let src_path = repo_path.join("src"); + let header_path = src_path.join("tree_sitter"); + let properties_dir_path = repo_path.join("properties"); - fs::create_dir_all(&repo_src_path)?; - fs::create_dir_all(&repo_header_path)?; + // Ensure that the output directories exist. + fs::create_dir_all(&src_path)?; + fs::create_dir_all(&header_path)?; + // Read the grammar.json. let grammar_json; match grammar_path { Some(path) => { @@ -54,48 +60,90 @@ pub fn generate_parser_in_directory( None => { let grammar_js_path = grammar_path.map_or(repo_path.join("grammar.js"), |s| s.into()); grammar_json = load_grammar_file(&grammar_js_path)?; - fs::write(&repo_src_path.join("grammar.json"), &grammar_json)?; + fs::write(&src_path.join("grammar.json"), &grammar_json)?; } } - let GeneratedParser { - name: language_name, - c_code, - node_types_json, - } = generate_parser_for_grammar_with_opts(&grammar_json, minimize)?; + // Parse and preprocess the grammar. + let input_grammar = parse_grammar(&grammar_json)?; + let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = + prepare_grammar(&input_grammar)?; + let language_name = input_grammar.name; + + // If run with no arguments, read all of the property sheets and compile them to JSON. + if grammar_path.is_none() { + let token_names = get_token_names(&syntax_grammar, &lexical_grammar); + if let Ok(entries) = fs::read_dir(properties_dir_path) { + for entry in entries { + let css_path = entry?.path(); + let css = fs::read_to_string(&css_path)?; + let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?; + let property_sheet_json_path = src_path + .join(css_path.file_name().unwrap()) + .with_extension("json"); + let property_sheet_json_file = + File::create(&property_sheet_json_path).map_err(Error::wrap(|| { + format!("Failed to create {:?}", property_sheet_json_path) + }))?; + let mut writer = BufWriter::new(property_sheet_json_file); + serde_json::to_writer_pretty(&mut writer, &sheet)?; + } + } + } + + // Generate the parser and related files. + if !properties_only { + let GeneratedParser { + c_code, + node_types_json, + } = generate_parser_for_grammar_with_opts( + &language_name, + syntax_grammar, + lexical_grammar, + inlines, + simple_aliases, + )?; + + write_file(&src_path.join("parser.c"), c_code)?; + write_file(&src_path.join("node-types.json"), node_types_json)?; + write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?; + write_file( + &repo_path.join("index.js"), + npm_files::index_js(&language_name), + )?; + ensure_file(&src_path.join("binding.cc"), || { + npm_files::binding_cc(&language_name) + })?; + ensure_file(&repo_path.join("binding.gyp"), || { + npm_files::binding_gyp(&language_name) + })?; + } - write_file(&repo_src_path.join("parser.c"), c_code)?; - write_file(&repo_src_path.join("node-types.json"), node_types_json)?; - write_file( - &repo_header_path.join("parser.h"), - tree_sitter::PARSER_HEADER, - )?; - write_file( - &repo_path.join("index.js"), - npm_files::index_js(&language_name), - )?; - ensure_file(&repo_src_path.join("binding.cc"), || { - npm_files::binding_cc(&language_name) - })?; - ensure_file(&repo_path.join("binding.gyp"), || { - npm_files::binding_gyp(&language_name) - })?; Ok(()) } pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> { let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n"); - let parser = generate_parser_for_grammar_with_opts(&grammar_json, true)?; - Ok((parser.name, parser.c_code)) + let input_grammar = parse_grammar(&grammar_json)?; + let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = + prepare_grammar(&input_grammar)?; + let parser = generate_parser_for_grammar_with_opts( + &input_grammar.name, + syntax_grammar, + lexical_grammar, + inlines, + simple_aliases, + )?; + Ok((input_grammar.name, parser.c_code)) } fn generate_parser_for_grammar_with_opts( - grammar_json: &str, - minimize: bool, + name: &String, + syntax_grammar: SyntaxGrammar, + lexical_grammar: LexicalGrammar, + inlines: InlinedProductionMap, + simple_aliases: AliasMap, ) -> Result { - let input_grammar = parse_grammar(grammar_json)?; - let (syntax_grammar, lexical_grammar, inlines, simple_aliases) = - prepare_grammar(&input_grammar)?; let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?; let node_types_json = node_types::generate_node_types_json( &syntax_grammar, @@ -109,11 +157,9 @@ fn generate_parser_for_grammar_with_opts( &simple_aliases, &variable_info, &inlines, - minimize, )?; - let name = input_grammar.name; let c_code = render_c_code( - &name, + name, parse_table, main_lex_table, keyword_lex_table, @@ -123,12 +169,40 @@ fn generate_parser_for_grammar_with_opts( simple_aliases, ); Ok(GeneratedParser { - name, c_code, node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(), }) } +fn get_token_names( + syntax_grammar: &SyntaxGrammar, + lexical_grammar: &LexicalGrammar, +) -> HashSet { + let mut result = HashSet::new(); + for variable in &lexical_grammar.variables { + if variable.kind == VariableType::Named { + result.insert(variable.name.clone()); + } + } + for token in &syntax_grammar.external_tokens { + if token.kind == VariableType::Named { + result.insert(token.name.clone()); + } + } + for variable in &syntax_grammar.variables { + for production in &variable.productions { + for step in &production.steps { + if let Some(alias) = &step.alias { + if !step.symbol.is_non_terminal() && alias.is_named { + result.insert(alias.value.clone()); + } + } + } + } + } + result +} + fn load_grammar_file(grammar_path: &Path) -> Result { match grammar_path.extension().and_then(|e| e.to_str()) { Some("js") => Ok(load_js_grammar_file(grammar_path)?), diff --git a/cli/src/properties.rs b/cli/src/generate/properties.rs similarity index 84% rename from cli/src/properties.rs rename to cli/src/generate/properties.rs index ebea8ba0..bd4a5a08 100644 --- a/cli/src/properties.rs +++ b/cli/src/generate/properties.rs @@ -1,20 +1,21 @@ use crate::error::{Error, Result}; -use log::info; +use crate::generate::dedup::split_state_id_groups; use rsass; use rsass::sass::Value; use rsass::selectors::SelectorPart; use serde_derive::Serialize; +use std::cmp::Ordering; use std::collections::hash_map::Entry; -use std::collections::{btree_map, BTreeMap, HashMap, VecDeque}; +use std::collections::{btree_map, BTreeMap, HashMap, HashSet, VecDeque}; use std::fmt::{self, Write}; -use std::fs::{self, File}; -use std::io::BufWriter; +use std::hash::{Hash, Hasher}; +use std::mem; use std::path::{Path, PathBuf}; use tree_sitter::{self, PropertyStateJSON, PropertyTransitionJSON}; #[derive(Clone, Debug, PartialEq, Eq, Serialize)] #[serde(untagged)] -enum PropertyValue { +pub(crate) enum PropertyValue { Number(isize), Boolean(bool), String(String), @@ -27,7 +28,7 @@ type PropertySheetJSON = tree_sitter::PropertySheetJSON; type StateId = usize; type PropertySetId = usize; -#[derive(Clone, PartialEq, Eq)] +#[derive(Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] struct SelectorStep { kind: Option, field: Option, @@ -37,7 +38,7 @@ struct SelectorStep { is_immediate: bool, } -#[derive(PartialEq, Eq)] +#[derive(PartialEq, Eq, PartialOrd, Ord)] struct Selector(Vec); #[derive(Debug, PartialEq, Eq)] @@ -46,15 +47,15 @@ struct Rule { properties: PropertySet, } -#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)] -struct Item { +#[derive(Clone, Copy, Debug)] +struct Item<'a> { rule_id: u32, - selector_id: u32, + selector: &'a Selector, step_id: u32, } -#[derive(Clone, PartialEq, Eq, Hash)] -struct ItemSet(Vec); +#[derive(Clone, PartialEq, Eq)] +struct ItemSet<'a>(Vec>); #[derive(Debug, PartialEq, Eq, PartialOrd, Ord)] struct SelectorMatch { @@ -62,19 +63,104 @@ struct SelectorMatch { rule_id: u32, } -struct Builder { - rules: Vec, +struct Builder<'a> { + rules: &'a Vec, output: PropertySheetJSON, - ids_by_item_set: HashMap, - item_set_queue: VecDeque<(ItemSet, StateId)>, + start_item_set: ItemSet<'a>, + token_names: &'a HashSet, + ids_by_item_set: HashMap, StateId>, + item_set_queue: VecDeque<(ItemSet<'a>, StateId)>, + item_set_list: Vec>, } -impl ItemSet { +impl<'a> Item<'a> { + fn next_step(&self) -> Option<&SelectorStep> { + self.selector.0.get(self.step_id as usize) + } + + fn is_done(&self) -> bool { + self.step_id as usize == self.selector.0.len() + } +} + +impl<'a> Ord for Item<'a> { + fn cmp(&self, other: &Item) -> Ordering { + self.rule_id + .cmp(&other.rule_id) + .then_with(|| self.selector.0.len().cmp(&other.selector.0.len())) + .then_with(|| { + for (i, step) in self + .selector + .0 + .iter() + .enumerate() + .skip(self.step_id as usize) + { + let result = step.cmp(&other.selector.0[i]); + if result != Ordering::Equal { + return result; + } + } + Ordering::Equal + }) + } +} + +impl<'a> PartialOrd for Item<'a> { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl<'a> Hash for Item<'a> { + fn hash(&self, hasher: &mut H) { + hasher.write_u32(self.rule_id); + hasher.write_usize(self.selector.0.len()); + for step in &self.selector.0[self.step_id as usize..] { + step.hash(hasher); + } + } +} + +impl<'a> PartialEq for Item<'a> { + fn eq(&self, other: &Self) -> bool { + if self.rule_id != other.rule_id || self.selector.0.len() != other.selector.0.len() { + return false; + } + + for (i, step) in self + .selector + .0 + .iter() + .enumerate() + .skip(self.step_id as usize) + { + if *step != other.selector.0[i] { + return false; + } + } + + true + } +} + +impl<'a> Eq for Item<'a> {} + +impl<'a> Hash for ItemSet<'a> { + fn hash(&self, hasher: &mut H) { + hasher.write_usize(self.0.len()); + for item in &self.0 { + item.hash(hasher); + } + } +} + +impl<'a> ItemSet<'a> { fn new() -> Self { ItemSet(Vec::new()) } - fn insert(&mut self, item: Item) { + fn insert(&mut self, item: Item<'a>) { match self.0.binary_search(&item) { Err(i) => self.0.insert(i, item), _ => {} @@ -82,40 +168,35 @@ impl ItemSet { } } -impl<'a> IntoIterator for &'a ItemSet { - type Item = &'a Item; - type IntoIter = std::slice::Iter<'a, Item>; - fn into_iter(self) -> Self::IntoIter { - self.0.iter() - } -} - -impl Builder { - fn new(rules: Vec) -> Self { +impl<'a> Builder<'a> { + fn new(rules: &'a Vec, token_names: &'a HashSet) -> Self { Builder { rules, + start_item_set: ItemSet::new(), + item_set_list: Vec::new(), output: PropertySheetJSON { states: Vec::new(), property_sets: Vec::new(), }, + token_names, ids_by_item_set: HashMap::new(), item_set_queue: VecDeque::new(), } } fn build(mut self) -> PropertySheetJSON { - let mut start_item_set = ItemSet::new(); for (i, rule) in self.rules.iter().enumerate() { - for j in 0..rule.selectors.len() { - start_item_set.insert(Item { + for selector in &rule.selectors { + self.start_item_set.insert(Item { rule_id: i as u32, - selector_id: j as u32, + selector, step_id: 0, }); } } - self.add_state(start_item_set); + self.add_state(ItemSet::new()); + self.output.states[0].id = Some(0); while let Some((item_set, state_id)) = self.item_set_queue.pop_front() { self.populate_state(item_set, state_id); } @@ -129,7 +210,7 @@ impl Builder { self.output } - fn add_state(&mut self, item_set: ItemSet) -> StateId { + fn add_state(&mut self, item_set: ItemSet<'a>) -> StateId { match self.ids_by_item_set.entry(item_set) { Entry::Occupied(o) => *o.get(), Entry::Vacant(v) => { @@ -161,21 +242,17 @@ impl Builder { } } - fn populate_state(&mut self, item_set: ItemSet, state_id: StateId) { + fn populate_state(&mut self, item_set: ItemSet<'a>, state_id: StateId) { let is_start_state = state_id == 0; let mut transitions: HashMap = HashMap::new(); let mut selector_matches = Vec::new(); // First, compute all of the possible state transition conditions for // this state, and all of the rules that are currently matching. - for item in &item_set { - let rule = &self.rules[item.rule_id as usize]; - let selector = &rule.selectors[item.selector_id as usize]; - let next_step = selector.0.get(item.step_id as usize); - + for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { // If this item has more elements remaining in its selector, then // add a state transition based on the next step. - if let Some(step) = next_step { + if let Some(step) = item.next_step() { transitions .entry(PropertyTransitionJSON { kind: step.kind.clone(), @@ -197,11 +274,28 @@ impl Builder { else { selector_matches.push(SelectorMatch { rule_id: item.rule_id, - specificity: selector_specificity(selector), + specificity: selector_specificity(item.selector), }); } } + // Compute the merged properties that apply in the current state. + // Sort the matching property sets by ascending specificity and by + // their order in the sheet. This way, more specific selectors and later + // rules will override less specific selectors and earlier rules. + let mut properties = PropertySet::new(); + selector_matches.sort_unstable_by(|a, b| { + (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id)) + }); + selector_matches.dedup(); + for selector_match in selector_matches { + let rule = &self.rules[selector_match.rule_id as usize]; + for (property, value) in &rule.properties { + properties.insert(property.clone(), value.clone()); + } + } + self.output.states[state_id].property_set_id = self.add_property_set(properties); + // If there are multiple transitions that could *both* match (e.g. one based on a // a node type and one based on a field name), then create an additional transition // for the intersection of the two. @@ -237,80 +331,75 @@ impl Builder { // destination state. i = 0; while i < transition_list.len() { - let mut next_item_set = ItemSet::new(); - for item in &item_set { - let rule = &self.rules[item.rule_id as usize]; - let selector = &rule.selectors[item.selector_id as usize]; - let next_step = selector.0.get(item.step_id as usize); + let transition = &mut transition_list[i].0; + let transition_is_leaf = transition.named == Some(false) + || transition + .kind + .as_ref() + .map_or(false, |kind| self.token_names.contains(kind)); - if let Some(step) = next_step { + let mut next_item_set = ItemSet::new(); + let mut transition_differs_from_start_state = false; + for item in item_set.0.iter().chain(self.start_item_set.0.iter()) { + if let Some(next_step) = item.next_step() { // If the next step of the item's selector satisfies this transition, // advance the item to the next part of its selector and add the // resulting item to this transition's destination state. - if step_matches_transition(step, &transition_list[i].0) { - next_item_set.insert(Item { + if step_matches_transition(next_step, transition) { + let next_item = Item { rule_id: item.rule_id, - selector_id: item.selector_id, + selector: item.selector, step_id: item.step_id + 1, - }); + }; + if !transition_is_leaf || next_item.is_done() { + next_item_set.insert(next_item); + if item.step_id > 0 { + transition_differs_from_start_state = true; + } + } } // If the next step of the item is not an immediate child, then // include this item in this transition's destination state, because // the next step of the item might match a descendant node. - if !step.is_immediate { + if !transition_is_leaf && !next_step.is_immediate && item.step_id > 0 { next_item_set.insert(*item); + transition_differs_from_start_state = true; } } } - if !is_start_state { - if next_item_set.0.iter().all(|item| item.step_id == 1) { - transition_list.remove(i); + if (is_start_state || transition_differs_from_start_state) + && !next_item_set.0.is_empty() + { + transition.state_id = self.add_state(next_item_set); + if is_start_state || !self.output.states[0].transitions.contains(&transition) { + i += 1; continue; } } - - transition_list[i].0.state_id = self.add_state(next_item_set); - i += 1; + transition_list.remove(i); } - // Compute the merged properties that apply in the current state. - // Sort the matching property sets by ascending specificity and by - // their order in the sheet. This way, more specific selectors and later - // rules will override less specific selectors and earlier rules. - let mut properties = PropertySet::new(); - selector_matches.sort_unstable_by(|a, b| { - (a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id)) - }); - selector_matches.dedup(); - for selector_match in selector_matches { - let rule = &self.rules[selector_match.rule_id as usize]; - for (property, value) in &rule.properties { - properties.insert(property.clone(), value.clone()); - } - } + self.output.states[state_id] + .transitions + .extend(transition_list.into_iter().map(|i| i.0)); // Compute the default successor item set - the item set that // we should advance to if the next element doesn't match any // of the next elements in the item set's selectors. let mut default_next_item_set = ItemSet::new(); - for item in &item_set { - let rule = &self.rules[item.rule_id as usize]; - let selector = &rule.selectors[item.selector_id as usize]; - let next_step = selector.0.get(item.step_id as usize); + for item in &item_set.0 { + let next_step = item.selector.0.get(item.step_id as usize); if let Some(step) = next_step { if !step.is_immediate { default_next_item_set.insert(*item); } } } - self.output.states[state_id].default_next_state_id = self.add_state(default_next_item_set); - self.output.states[state_id].property_set_id = self.add_property_set(properties); - self.output.states[state_id] - .transitions - .extend(transition_list.into_iter().map(|i| i.0)); + + self.item_set_list.push(item_set); } fn intersect_transitions( @@ -339,73 +428,80 @@ impl Builder { } fn remove_duplicate_states(&mut self) { - let mut state_replacements = BTreeMap::new(); - let mut done = false; - while !done { - done = true; - for (i, state_i) in self.output.states.iter().enumerate() { - if state_replacements.contains_key(&i) { - continue; - } - for (j, state_j) in self.output.states.iter().enumerate() { - if j == i { - break; - } - if state_replacements.contains_key(&j) { - continue; - } - if state_i == state_j { - info!("replace state {} with state {}", i, j); - state_replacements.insert(i, j); - done = false; - break; - } - } - } - for state in self.output.states.iter_mut() { - for transition in state.transitions.iter_mut() { - if let Some(replacement) = state_replacements.get(&transition.state_id) { - transition.state_id = *replacement; - } - } - if let Some(replacement) = state_replacements.get(&state.default_next_state_id) { - state.default_next_state_id = *replacement; - } - state.transitions.dedup(); - } + let mut state_ids_by_properties = HashMap::new(); + for (i, state) in self.output.states.iter().enumerate() { + state_ids_by_properties + .entry(state.property_set_id) + .or_insert(Vec::new()) + .push(i); } - - let final_state_replacements = (0..self.output.states.len()) + let mut state_ids_by_group_id = state_ids_by_properties .into_iter() - .map(|state_id| { - let replacement = state_replacements - .get(&state_id) - .cloned() - .unwrap_or(state_id); - let prior_removed = state_replacements - .iter() - .take_while(|i| *i.0 < replacement) - .count(); - replacement - prior_removed - }) + .map(|e| e.1) .collect::>(); + state_ids_by_group_id.sort(); + let start_group_index = state_ids_by_group_id + .iter() + .position(|g| g.contains(&0)) + .unwrap(); + state_ids_by_group_id.swap(start_group_index, 0); - for state in self.output.states.iter_mut() { - for transition in state.transitions.iter_mut() { - transition.state_id = final_state_replacements[transition.state_id]; + let mut group_ids_by_state_id = vec![0; self.output.states.len()]; + for (group_id, state_ids) in state_ids_by_group_id.iter().enumerate() { + for state_id in state_ids { + group_ids_by_state_id[*state_id] = group_id; } - state.default_next_state_id = final_state_replacements[state.default_next_state_id] } - let mut i = 0; - self.output.states.retain(|_| { - let result = !state_replacements.contains_key(&i); - i += 1; - result - }); + while split_state_id_groups( + &self.output.states, + &mut state_ids_by_group_id, + &mut group_ids_by_state_id, + 0, + property_states_differ, + ) { + continue; + } + + let mut new_states = Vec::with_capacity(state_ids_by_group_id.len()); + for state_ids in state_ids_by_group_id.iter() { + let mut new_state = PropertyStateJSON::default(); + mem::swap(&mut new_state, &mut self.output.states[state_ids[0]]); + for transition in new_state.transitions.iter_mut() { + transition.state_id = group_ids_by_state_id[transition.state_id]; + } + new_state.default_next_state_id = + group_ids_by_state_id[new_state.default_next_state_id]; + new_states.push(new_state); + } + self.output.states = new_states; } } +fn property_states_differ( + left: &PropertyStateJSON, + right: &PropertyStateJSON, + group_ids_by_state_id: &Vec, +) -> bool { + if group_ids_by_state_id[left.default_next_state_id] + != group_ids_by_state_id[right.default_next_state_id] + { + return true; + } + + left.transitions + .iter() + .zip(right.transitions.iter()) + .any(|(left, right)| { + left.kind != right.kind + || left.named != right.named + || left.index != right.index + || left.field != right.field + || left.text != right.text + || group_ids_by_state_id[left.state_id] != group_ids_by_state_id[right.state_id] + }) +} + fn selector_specificity(selector: &Selector) -> u32 { let mut result = 0; for step in &selector.0 { @@ -464,6 +560,9 @@ fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJ impl fmt::Debug for SelectorStep { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.is_immediate { + write!(f, "> ")?; + } write!(f, "(")?; if let Some(kind) = &self.kind { if self.is_named.unwrap() { @@ -502,38 +601,13 @@ impl fmt::Debug for Selector { } } -pub fn generate_property_sheets_in_directory(repo_path: &Path) -> Result<()> { - let src_dir_path = repo_path.join("src"); - let properties_dir_path = repo_path.join("properties"); - - if let Ok(entries) = fs::read_dir(properties_dir_path) { - for entry in entries { - let css_path = entry?.path(); - let css = fs::read_to_string(&css_path)?; - let sheet = generate_property_sheet(&css_path, &css)?; - let property_sheet_json_path = src_dir_path - .join(css_path.file_name().unwrap()) - .with_extension("json"); - let property_sheet_json_file = - File::create(&property_sheet_json_path).map_err(Error::wrap(|| { - format!("Failed to create {:?}", property_sheet_json_path) - }))?; - let mut writer = BufWriter::new(property_sheet_json_file); - serde_json::to_writer_pretty(&mut writer, &sheet)?; - } - } - - Ok(()) -} - -pub fn generate_property_sheet_string(path: impl AsRef, css: &str) -> Result { - let sheet = generate_property_sheet(path, css)?; - Ok(serde_json::to_string(&sheet)?) -} - -fn generate_property_sheet(path: impl AsRef, css: &str) -> Result { +pub(crate) fn generate_property_sheet( + path: impl AsRef, + css: &str, + token_names: &HashSet, +) -> Result { let rules = parse_property_sheet(path.as_ref(), &css)?; - Ok(Builder::new(rules).build()) + Ok(Builder::new(&rules, token_names).build()) } fn parse_property_sheet(path: &Path, css: &str) -> Result> { @@ -868,6 +942,7 @@ fn interpolation_error() -> Error { mod tests { use super::*; use regex::Regex; + use std::fs; use tempfile::TempDir; #[test] @@ -897,6 +972,7 @@ mod tests { height: 3; } ", + &HashSet::new(), ) .unwrap(); @@ -996,6 +1072,7 @@ mod tests { color: purple; } ", + &HashSet::new(), ) .unwrap(); @@ -1040,6 +1117,7 @@ mod tests { } .x { color: violet; } ", + &HashSet::new(), ) .unwrap(); @@ -1095,6 +1173,7 @@ mod tests { f1 f2[text='a'] { color: blue; } f1 f2[text='b'] { color: violet; } ", + &HashSet::new(), ) .unwrap(); @@ -1150,6 +1229,7 @@ mod tests { c: f(g(h), i, \"j\", 10); } ", + &HashSet::new(), ) .unwrap(); @@ -1190,6 +1270,7 @@ mod tests { ); } ", + &HashSet::new(), ) .unwrap(); @@ -1212,6 +1293,7 @@ mod tests { c: h(); } ", + &HashSet::new(), ) .unwrap(); @@ -1285,6 +1367,7 @@ mod tests { @import "the-dependency/properties/dependency-sheet1.css"; b { y: '4'; } "#, + &HashSet::new(), ) .unwrap(); diff --git a/cli/src/lib.rs b/cli/src/lib.rs index 1e0d021d..33a9904f 100644 --- a/cli/src/lib.rs +++ b/cli/src/lib.rs @@ -5,7 +5,6 @@ pub mod highlight; pub mod loader; pub mod logger; pub mod parse; -pub mod properties; pub mod test; pub mod util; pub mod wasm; diff --git a/cli/src/main.rs b/cli/src/main.rs index b3aa0322..23e7fc1a 100644 --- a/cli/src/main.rs +++ b/cli/src/main.rs @@ -4,7 +4,7 @@ use std::{env, fs, u64}; use std::path::Path; use std::process::exit; use tree_sitter_cli::{ - config, error, generate, highlight, loader, logger, parse, properties, test, wasm, web_ui, + config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui, }; const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION"); @@ -114,19 +114,11 @@ fn run() -> error::Result<()> { config.save(&home_dir)?; } else if let Some(matches) = matches.subcommand_matches("generate") { let grammar_path = matches.value_of("grammar-path"); - let minimize = !matches.is_present("no-minimize"); let properties_only = matches.is_present("properties-only"); - let parser_only = grammar_path.is_some(); if matches.is_present("log") { logger::init(); } - - if !properties_only { - generate::generate_parser_in_directory(¤t_dir, grammar_path, minimize)?; - } - if !parser_only { - properties::generate_property_sheets_in_directory(¤t_dir)?; - } + generate::generate_parser_in_directory(¤t_dir, grammar_path, properties_only)?; } else if let Some(matches) = matches.subcommand_matches("test") { let debug = matches.is_present("debug"); let debug_graph = matches.is_present("debug-graph"); diff --git a/cli/src/tests/properties_test.rs b/cli/src/tests/properties_test.rs index 90000bfb..7c67a4df 100644 --- a/cli/src/tests/properties_test.rs +++ b/cli/src/tests/properties_test.rs @@ -1,7 +1,9 @@ use super::helpers::fixtures::get_language; -use crate::properties; +use crate::generate::properties; use serde_derive::Deserialize; +use serde_json; use tree_sitter::{Parser, PropertySheet}; +use std::collections::HashSet; #[derive(Debug, Default, Deserialize, PartialEq, Eq)] struct Properties { @@ -14,7 +16,7 @@ fn test_walk_with_properties_with_nth_child() { let language = get_language("javascript"); let property_sheet = PropertySheet::::new( language, - &properties::generate_property_sheet_string( + &generate_property_sheet_string( "/some/path.css", " binary_expression > identifier:nth-child(2) { @@ -30,7 +32,6 @@ fn test_walk_with_properties_with_nth_child() { } ", ) - .unwrap(), ) .unwrap(); @@ -90,7 +91,7 @@ fn test_walk_with_properties_with_regexes() { let language = get_language("javascript"); let property_sheet = PropertySheet::::new( language, - &properties::generate_property_sheet_string( + &generate_property_sheet_string( "/some/path.css", " identifier { @@ -106,7 +107,6 @@ fn test_walk_with_properties_with_regexes() { } ", ) - .unwrap(), ) .unwrap(); @@ -174,7 +174,7 @@ fn test_walk_with_properties_based_on_fields() { let language = get_language("javascript"); let property_sheet = PropertySheet::::new( language, - &properties::generate_property_sheet_string( + &generate_property_sheet_string( "/some/path.css", " arrow_function > .parameter { @@ -196,7 +196,6 @@ fn test_walk_with_properties_based_on_fields() { } ", ) - .unwrap(), ) .unwrap(); @@ -259,3 +258,7 @@ fn test_walk_with_properties_based_on_fields() { } ); } + +fn generate_property_sheet_string(path: &str, css: &str) -> String { + serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap()).unwrap() +} diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index cbb16140..9db71d8e 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -104,7 +104,7 @@ pub struct PropertyTransitionJSON { pub state_id: usize, } -#[derive(Debug, Deserialize, Serialize, PartialEq, Eq)] +#[derive(Debug, Default, Deserialize, Serialize, PartialEq, Eq)] pub struct PropertyStateJSON { pub id: Option, pub property_set_id: usize,