Merge pull request #1329 from narpfel/improve-performance

Improve performance of `tree-sitter generate`
This commit is contained in:
Max Brunsfeld 2021-08-11 16:08:23 -07:00 committed by GitHub
commit c6dd5da5e6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 47 additions and 29 deletions

12
Cargo.lock generated
View file

@ -495,6 +495,12 @@ dependencies = [
"crossbeam-utils",
]
[[package]]
name = "rustc-hash"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "08d43f7aa6b08d49f382cde6a7982047c3426db949b1424bc4b7ec9ae12c6ce2"
[[package]]
name = "ryu"
version = "1.0.5"
@ -541,9 +547,9 @@ dependencies = [
[[package]]
name = "smallbitvec"
version = "2.5.0"
version = "2.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "797a4eaffb90d896f29698d45676f9f940a71936d7574996a7df54593ba209fa"
checksum = "75ce4f9dc4a41b4c3476cc925f1efb11b66df373a8fde5d4b8915fa91b5d995e"
[[package]]
name = "spin"
@ -689,11 +695,13 @@ dependencies = [
"dirs",
"glob",
"html-escape",
"indexmap",
"lazy_static",
"log",
"rand",
"regex",
"regex-syntax",
"rustc-hash",
"serde",
"serde_derive",
"serde_json",

View file

@ -27,12 +27,14 @@ difference = "2.0"
dirs = "3.0"
glob = "0.3.0"
html-escape = "0.2.6"
indexmap = "1"
lazy_static = "1.2.0"
regex = "1"
regex-syntax = "0.6.4"
rustc-hash = "1"
serde = "1.0"
serde_derive = "1.0"
smallbitvec = "2.3.0"
smallbitvec = "2.5.1"
tiny_http = "0.8"
walkdir = "2.3"
webbrowser = "0.5.1"

View file

@ -11,10 +11,14 @@ use crate::generate::tables::{
ProductionInfo, ProductionInfoId,
};
use anyhow::{anyhow, Result};
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::hash::BuildHasherDefault;
use std::u32;
use std::{cmp::Ordering, collections::hash_map::Entry};
use indexmap::{map::Entry, IndexMap};
use rustc_hash::FxHasher;
// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
@ -49,7 +53,7 @@ struct ParseTableBuilder<'a> {
lexical_grammar: &'a LexicalGrammar,
variable_info: &'a Vec<VariableInfo>,
core_ids_by_core: HashMap<ParseItemSetCore<'a>, usize>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
state_ids_by_item_set: IndexMap<ParseItemSet<'a>, ParseStateId, BuildHasherDefault<FxHasher>>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
@ -147,13 +151,7 @@ impl<'a> ParseTableBuilder<'a> {
Entry::Vacant(v) => {
let core = v.key().core();
let core_count = self.core_ids_by_core.len();
let core_id = match self.core_ids_by_core.entry(core) {
Entry::Occupied(e) => *e.get(),
Entry::Vacant(e) => {
e.insert(core_count);
core_count
}
};
let core_id = *self.core_ids_by_core.entry(core).or_insert(core_count);
let state_id = self.parse_table.states.len();
self.parse_state_info_by_id
@ -163,8 +161,8 @@ impl<'a> ParseTableBuilder<'a> {
id: state_id,
lex_state_id: 0,
external_lex_state_id: 0,
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
terminal_entries: IndexMap::default(),
nonterminal_entries: IndexMap::default(),
core_id,
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
@ -981,7 +979,7 @@ pub(crate) fn build_parse_table<'a>(
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
state_ids_by_item_set: HashMap::new(),
state_ids_by_item_set: IndexMap::default(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),
parse_state_queue: VecDeque::new(),

View file

@ -1057,7 +1057,7 @@ impl Generator {
}
fn add_parse_table(&mut self) {
let mut parse_table_entries = Vec::new();
let mut parse_table_entries = HashMap::new();
let mut next_parse_action_list_index = 0;
self.get_parse_action_list_id(
@ -1224,6 +1224,11 @@ impl Generator {
add_line!(self, "");
}
let mut parse_table_entries: Vec<_> = parse_table_entries
.into_iter()
.map(|(entry, i)| (i, entry))
.collect();
parse_table_entries.sort_by_key(|(index, _)| *index);
self.add_parse_action_list(parse_table_entries);
}
@ -1404,17 +1409,17 @@ impl Generator {
fn get_parse_action_list_id(
&self,
entry: &ParseTableEntry,
parse_table_entries: &mut Vec<(usize, ParseTableEntry)>,
parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
next_parse_action_list_index: &mut usize,
) -> usize {
if let Some((index, _)) = parse_table_entries.iter().find(|(_, e)| *e == *entry) {
return *index;
if let Some(&index) = parse_table_entries.get(entry) {
index
} else {
let result = *next_parse_action_list_index;
parse_table_entries.insert(entry.clone(), result);
*next_parse_action_list_index += 1 + entry.actions.len();
result
}
let result = *next_parse_action_list_index;
parse_table_entries.push((result, entry.clone()));
*next_parse_action_list_index += 1 + entry.actions.len();
result
}
fn get_field_map_id(

View file

@ -1,11 +1,16 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Symbol, TokenSet};
use std::collections::{BTreeMap, HashMap};
use std::collections::BTreeMap;
pub(crate) type ProductionInfoId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
use std::hash::BuildHasherDefault;
use indexmap::IndexMap;
use rustc_hash::FxHasher;
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ParseAction {
Accept,
Shift {
@ -28,7 +33,7 @@ pub(crate) enum GotoAction {
ShiftExtra,
}
#[derive(Clone, Debug, PartialEq, Eq)]
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub(crate) struct ParseTableEntry {
pub actions: Vec<ParseAction>,
pub reusable: bool,
@ -37,8 +42,8 @@ pub(crate) struct ParseTableEntry {
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct ParseState {
pub id: ParseStateId,
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
pub terminal_entries: IndexMap<Symbol, ParseTableEntry, BuildHasherDefault<FxHasher>>,
pub nonterminal_entries: IndexMap<Symbol, GotoAction, BuildHasherDefault<FxHasher>>,
pub lex_state_id: usize,
pub external_lex_state_id: usize,
pub core_id: usize,