Merge branch 'master' into simpler-large-char-set-code

This commit is contained in:
Max Brunsfeld 2024-04-12 10:03:46 -07:00
commit 3498498449
81 changed files with 918 additions and 734 deletions

View file

@ -1,14 +1,18 @@
use std::{
collections::{hash_map::Entry, HashMap, VecDeque},
mem,
};
use log::info;
use super::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap};
use crate::generate::{
build_tables::{coincident_tokens::CoincidentTokenIndex, token_conflicts::TokenConflictMap},
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor},
rules::{Symbol, TokenSet},
tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable},
};
use log::info;
use std::collections::{hash_map::Entry, HashMap, VecDeque};
use std::mem;
pub const LARGE_CHARACTER_RANGE_COUNT: usize = 8;

View file

@ -1,25 +1,30 @@
use super::item::{ParseItem, ParseItemSet, ParseItemSetCore};
use super::item_set_builder::ParseItemSetBuilder;
use crate::generate::grammars::PrecedenceEntry;
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet, VecDeque},
fmt::Write,
hash::BuildHasherDefault,
};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use anyhow::{anyhow, Result};
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::hash::BuildHasherDefault;
use std::u32;
use anyhow::{anyhow, Result};
use indexmap::{map::Entry, IndexMap};
use rustc_hash::FxHasher;
use super::{
item::{ParseItem, ParseItemSet, ParseItemSetCore},
item_set_builder::ParseItemSetBuilder,
};
use crate::generate::{
grammars::{
InlinedProductionMap, LexicalGrammar, PrecedenceEntry, SyntaxGrammar, VariableType,
},
node_types::VariableInfo,
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
tables::{
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable,
ParseTableEntry, ProductionInfo, ProductionInfoId,
},
};
// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
type SymbolSequence = Vec<Symbol>;
@ -293,7 +298,7 @@ impl<'a> ParseTableBuilder<'a> {
}
}
reduction_info.precedence = precedence.clone();
reduction_info.precedence.clone_from(precedence);
if let Err(i) = reduction_info.symbols.binary_search(&symbol) {
reduction_info.symbols.insert(i, symbol);
}
@ -599,13 +604,13 @@ impl<'a> ParseTableBuilder<'a> {
write!(&mut msg, " {}", self.symbol_name(symbol)).unwrap();
}
write!(
writeln!(
&mut msg,
" • {} …\n\n",
" • {} …\n",
self.symbol_name(&conflicting_lookahead)
)
.unwrap();
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
writeln!(&mut msg, "Possible interpretations:\n").unwrap();
let mut interpretations = conflicting_items
.iter()
@ -680,7 +685,7 @@ impl<'a> ParseTableBuilder<'a> {
}
let mut resolution_count = 0;
write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
writeln!(&mut msg, "\nPossible resolutions:\n").unwrap();
let mut shift_items = Vec::new();
let mut reduce_items = Vec::new();
for item in conflicting_items {
@ -956,7 +961,7 @@ fn populate_following_tokens(
for entry in result.iter_mut() {
entry.insert(*extra);
}
result[extra.index] = all_tokens.clone();
result[extra.index].clone_from(&all_tokens);
}
}
}

View file

@ -1,8 +1,11 @@
use crate::generate::grammars::LexicalGrammar;
use crate::generate::rules::Symbol;
use crate::generate::tables::{ParseStateId, ParseTable};
use std::fmt;
use crate::generate::{
grammars::LexicalGrammar,
rules::Symbol,
tables::{ParseStateId, ParseTable},
};
pub struct CoincidentTokenIndex<'a> {
entries: Vec<Vec<ParseStateId>>,
grammar: &'a LexicalGrammar,

View file

@ -1,10 +1,15 @@
use crate::generate::grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar};
use crate::generate::rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet};
use std::{
cmp::Ordering,
fmt,
hash::{Hash, Hasher},
};
use lazy_static::lazy_static;
use std::cmp::Ordering;
use std::fmt;
use std::hash::{Hash, Hasher};
use std::u32;
use crate::generate::{
grammars::{LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
rules::{Associativity, Precedence, Symbol, SymbolType, TokenSet},
};
lazy_static! {
static ref START_PRODUCTION: Production = Production {
@ -128,7 +133,7 @@ impl<'a> ParseItem<'a> {
/// Create an item like this one, but advanced by one step.
#[must_use]
pub const fn successor(&self) -> ParseItem<'a> {
pub const fn successor(&self) -> Self {
ParseItem {
variable_index: self.variable_index,
production: self.production,

View file

@ -1,8 +1,13 @@
use std::{
collections::{HashMap, HashSet},
fmt,
};
use super::item::{ParseItem, ParseItemDisplay, ParseItemSet, TokenSetDisplay};
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Symbol, SymbolType, TokenSet};
use std::collections::{HashMap, HashSet};
use std::fmt;
use crate::generate::{
grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar},
rules::{Symbol, SymbolType, TokenSet},
};
#[derive(Clone, Debug, PartialEq, Eq)]
struct TransitiveClosureAddition<'a> {
@ -70,8 +75,8 @@ impl<'a> ParseItemSetBuilder<'a> {
// The FIRST set of a non-terminal `i` is the union of the following sets:
// * the set of all terminals that appear at the beginnings of i's productions
// * the FIRST sets of all the non-terminals that appear at the beginnings
// of i's productions
// * the FIRST sets of all the non-terminals that appear at the beginnings of i's
// productions
//
// Rather than computing these sets using recursion, we use an explicit stack
// called `symbols_to_process`.
@ -130,11 +135,11 @@ impl<'a> ParseItemSetBuilder<'a> {
// item set when `i` occurs as the next symbol in one if its core items. The
// structure of an *addition* is as follows:
// * `item` - the new item that must be added as part of the expansion of `i`
// * `lookaheads` - lookahead tokens that can always come after that item in
// the expansion of `i`
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can
// occur at the *end* of the expansion of `i`, so that i's own current
// lookahead tokens can occur after `item`.
// * `lookaheads` - lookahead tokens that can always come after that item in the expansion
// of `i`
// * `propagates_lookaheads` - a boolean indicating whether or not `item` can occur at the
// *end* of the expansion of `i`, so that i's own current lookahead tokens can occur
// after `item`.
//
// Again, rather than computing these additions recursively, we use an explicit
// stack called `entries_to_process`.

View file

@ -1,13 +1,17 @@
use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
use crate::generate::tables::{
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
use std::{
collections::{HashMap, HashSet},
mem,
};
use log::info;
use std::collections::{HashMap, HashSet};
use std::mem;
use super::token_conflicts::TokenConflictMap;
use crate::generate::{
dedup::split_state_id_groups,
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
rules::{AliasMap, Symbol, TokenSet},
tables::{GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry},
};
pub fn minimize_parse_table(
parse_table: &mut ParseTable,

View file

@ -6,6 +6,11 @@ mod item_set_builder;
mod minimize_parse_table;
mod token_conflicts;
use std::collections::{BTreeSet, HashMap};
use anyhow::Result;
use log::info;
use self::{
build_lex_table::build_lex_table,
build_parse_table::{build_parse_table, ParseStateInfo},
@ -20,9 +25,6 @@ use crate::generate::{
rules::{AliasMap, Symbol, SymbolType, TokenSet},
tables::{LexTable, ParseAction, ParseTable, ParseTableEntry},
};
use anyhow::Result;
use log::info;
use std::collections::{BTreeSet, HashMap};
pub use build_lex_table::LARGE_CHARACTER_RANGE_COUNT;

View file

@ -1,10 +1,11 @@
use crate::generate::build_tables::item::TokenSetDisplay;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::generate::rules::TokenSet;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::fmt;
use std::{cmp::Ordering, collections::HashSet, fmt};
use crate::generate::{
build_tables::item::TokenSetDisplay,
grammars::{LexicalGrammar, SyntaxGrammar},
nfa::{CharacterSet, NfaCursor, NfaTransition},
rules::TokenSet,
};
#[derive(Clone, Debug, Default, PartialEq, Eq)]
struct TokenConflictStatus {
@ -372,9 +373,11 @@ fn compute_conflict_status(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{Variable, VariableType};
use crate::generate::prepare_grammar::{expand_tokens, ExtractedLexicalGrammar};
use crate::generate::rules::{Precedence, Rule, Symbol};
use crate::generate::{
grammars::{Variable, VariableType},
prepare_grammar::{expand_tokens, ExtractedLexicalGrammar},
rules::{Precedence, Rule, Symbol},
};
#[test]
fn test_starting_characters() {

View file

@ -23,7 +23,7 @@ function alias(rule, value) {
}
}
throw new Error('Invalid alias value ' + value);
throw new Error(`Invalid alias value ${value}`);
}
function blank() {
@ -35,7 +35,7 @@ function blank() {
function field(name, rule) {
return {
type: "FIELD",
name: name,
name,
content: normalize(rule)
}
}
@ -156,7 +156,7 @@ function seq(...elements) {
function sym(name) {
return {
type: "SYMBOL",
name: name
name
};
}
@ -201,17 +201,17 @@ function normalize(value) {
if (typeof value.type === 'string') {
return value;
} else {
throw new TypeError("Invalid rule: " + value.toString());
throw new TypeError(`Invalid rule: ${value}`);
}
}
}
function RuleBuilder(ruleMap) {
return new Proxy({}, {
get(target, propertyName) {
get(_, propertyName) {
const symbol = sym(propertyName);
if (!ruleMap || ruleMap.hasOwnProperty(propertyName)) {
if (!ruleMap || Object.prototype.hasOwnProperty.call(ruleMap, propertyName)) {
return symbol;
} else {
const error = new ReferenceError(`Undefined symbol '${propertyName}'`);
@ -256,10 +256,10 @@ function grammar(baseGrammar, options) {
}
const ruleMap = {};
for (const key in options.rules) {
for (const key of Object.keys(options.rules)) {
ruleMap[key] = true;
}
for (const key in baseGrammar.rules) {
for (const key of Object.keys(baseGrammar.rules)) {
ruleMap[key] = true;
}
for (const external of externals) {
@ -279,16 +279,16 @@ function grammar(baseGrammar, options) {
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
}
let rules = Object.assign({}, baseGrammar.rules);
const rules = Object.assign({}, baseGrammar.rules);
if (options.rules) {
if (typeof options.rules !== "object") {
throw new Error("Grammar's 'rules' property must be an object.");
}
for (const ruleName in options.rules) {
for (const ruleName of Object.keys(options.rules)) {
const ruleFn = options.rules[ruleName];
if (typeof ruleFn !== "function") {
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
throw new Error(`Grammar rules must all be functions. '${ruleName}' rule is not.`);
}
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
}
@ -403,7 +403,7 @@ function grammar(baseGrammar, options) {
});
}
if (Object.keys(rules).length == 0) {
if (Object.keys(rules).length === 0) {
throw new Error("Grammar must have at least one rule.");
}

View file

@ -1,13 +1,18 @@
use super::write_file;
use std::{
fs,
fs::File,
io::BufReader,
path::{Path, PathBuf},
str,
};
use anyhow::{anyhow, Context, Result};
use heck::{ToKebabCase, ToShoutySnakeCase, ToSnakeCase, ToUpperCamelCase};
use indoc::indoc;
use serde::Deserialize;
use serde_json::{json, Map, Value};
use std::fs::File;
use std::io::BufReader;
use std::path::{Path, PathBuf};
use std::{fs, str};
use super::write_file;
const CLI_VERSION: &str = env!("CARGO_PKG_VERSION");
const CLI_VERSION_PLACEHOLDER: &str = "CLI_VERSION";
@ -281,7 +286,7 @@ pub fn generate_grammar_files(
|path| {
let build_rs =
fs::read_to_string(path).with_context(|| "Failed to read build.rs")?;
if !build_rs.contains("/utf-8") {
if !build_rs.contains("-utf-8") {
let index = build_rs
.find(" let parser_path = src_dir.join(\"parser.c\")")
.ok_or_else(|| anyhow!(indoc!{

View file

@ -1,7 +1,9 @@
use super::nfa::Nfa;
use super::rules::{Alias, Associativity, Precedence, Rule, Symbol};
use std::collections::HashMap;
use std::fmt;
use std::{collections::HashMap, fmt};
use super::{
nfa::Nfa,
rules::{Alias, Associativity, Precedence, Rule, Symbol},
};
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
pub enum VariableType {

View file

@ -1,17 +1,20 @@
use self::grammars::InputGrammar;
use std::{
env, fs,
io::Write,
path::{Path, PathBuf},
process::{Command, Stdio},
};
use anyhow::{anyhow, Context, Result};
use build_tables::build_tables;
use grammar_files::path_in_ignore;
use grammars::InputGrammar;
use lazy_static::lazy_static;
use parse_grammar::parse_grammar;
use prepare_grammar::prepare_grammar;
use regex::{Regex, RegexBuilder};
use render::render_c_code;
use semver::Version;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
use std::{env, fs};
mod build_tables;
mod dedup;

View file

@ -1,9 +1,15 @@
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use std::{
cmp::Ordering,
collections::{BTreeMap, HashMap, HashSet},
};
use anyhow::{anyhow, Result};
use serde::Serialize;
use std::cmp::Ordering;
use std::collections::{BTreeMap, HashMap, HashSet};
use super::{
grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
rules::{Alias, AliasMap, Symbol, SymbolType},
};
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum ChildType {
@ -134,18 +140,17 @@ impl ChildQuantity {
/// * `types` - The types of visible children the field can contain.
/// * `optional` - Do `N` nodes always have this field?
/// * `multiple` - Can `N` nodes have multiple children for this field?
/// 3. `children_without_fields` - The *other* named children of `N` that are
/// not associated with fields. Data regarding these children:
/// 3. `children_without_fields` - The *other* named children of `N` that are not associated with
/// fields. Data regarding these children:
/// * `types` - The types of named children with no field.
/// * `optional` - Do `N` nodes always have at least one named child with no field?
/// * `multiple` - Can `N` nodes have multiple named children with no field?
///
/// Each summary must account for some indirect factors:
/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible
/// children of `C` *appear* to be direct children of `N`.
/// 2. aliases. If a parent node type `M` is aliased as some other type `N`,
/// then nodes which *appear* to have type `N` may have internal structure based
/// on `M`.
/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible children of `C`
/// *appear* to be direct children of `N`.
/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, then nodes which
/// *appear* to have type `N` may have internal structure based on `M`.
pub fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -218,7 +223,8 @@ pub fn get_variable_info(
.entry(field_name)
.or_insert_with(ChildQuantity::zero);
// Inherit the types and quantities of hidden children associated with fields.
// Inherit the types and quantities of hidden children associated with
// fields.
if child_is_hidden && child_symbol.is_non_terminal() {
let child_variable_info = &result[child_symbol.index];
did_change |= extend_sorted(
@ -523,8 +529,8 @@ pub fn generate_node_types_json(
let fields_json = node_type_json.fields.as_mut().unwrap();
for (new_field, field_info) in &info.fields {
let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| {
// If another rule is aliased with the same name, and does *not* have this field,
// then this field cannot be required.
// If another rule is aliased with the same name, and does *not* have this
// field, then this field cannot be required.
let mut field_json = FieldInfoJSON::default();
if node_type_existed {
field_json.required = false;
@ -534,8 +540,8 @@ pub fn generate_node_types_json(
populate_field_info_json(field_json, field_info);
}
// If another rule is aliased with the same name, any fields that aren't present in this
// cannot be required.
// If another rule is aliased with the same name, any fields that aren't present in
// this cannot be required.
for (existing_field, field_json) in fields_json.iter_mut() {
if !info.fields.contains_key(existing_field) {
field_json.required = false;
@ -715,11 +721,13 @@ where
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
use crate::generate::{
grammars::{
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
},
prepare_grammar::prepare_grammar,
rules::Rule,
};
use crate::generate::prepare_grammar::prepare_grammar;
use crate::generate::rules::Rule;
#[test]
fn test_node_types_simple() {

View file

@ -1,9 +1,12 @@
use super::grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType};
use super::rules::{Precedence, Rule};
use anyhow::{anyhow, Result};
use serde::Deserialize;
use serde_json::{Map, Value};
use super::{
grammars::{InputGrammar, PrecedenceEntry, Variable, VariableType},
rules::{Precedence, Rule},
};
#[derive(Deserialize)]
#[serde(tag = "type")]
#[allow(non_camel_case_types)]
@ -163,19 +166,18 @@ fn parse_rule(json: RuleJSON) -> Rule {
RuleJSON::PATTERN { value, flags } => Rule::Pattern(
value,
flags.map_or(String::new(), |f| {
f.chars()
.filter(|c| {
if *c == 'i' {
true
} else {
// silently ignore unicode flags
if *c != 'u' && *c != 'v' {
eprintln!("Warning: unsupported flag {c}");
}
false
f.matches(|c| {
if c == 'i' {
true
} else {
// silently ignore unicode flags
if c != 'u' && c != 'v' {
eprintln!("Warning: unsupported flag {c}");
}
})
.collect()
false
}
})
.collect()
}),
),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),

View file

@ -1,8 +1,10 @@
use std::{collections::HashMap, mem};
use super::ExtractedSyntaxGrammar;
use crate::generate::grammars::{Variable, VariableType};
use crate::generate::rules::{Rule, Symbol};
use std::collections::HashMap;
use std::mem;
use crate::generate::{
grammars::{Variable, VariableType},
rules::{Rule, Symbol},
};
struct Expander {
variable_name: String,

View file

@ -1,15 +1,18 @@
use super::ExtractedLexicalGrammar;
use crate::generate::grammars::{LexicalGrammar, LexicalVariable};
use crate::generate::nfa::{CharacterSet, Nfa, NfaState};
use crate::generate::rules::{Precedence, Rule};
use std::collections::HashMap;
use anyhow::{anyhow, Context, Result};
use lazy_static::lazy_static;
use regex_syntax::ast::{
parse, Ast, ClassPerlKind, ClassSet, ClassSetBinaryOpKind, ClassSetItem, ClassUnicodeKind,
RepetitionKind, RepetitionRange,
};
use std::collections::HashMap;
use std::i32;
use super::ExtractedLexicalGrammar;
use crate::generate::{
grammars::{LexicalGrammar, LexicalVariable},
nfa::{CharacterSet, Nfa, NfaState},
rules::{Precedence, Rule},
};
lazy_static! {
static ref UNICODE_CATEGORIES: HashMap<&'static str, Vec<u32>> =
@ -539,8 +542,10 @@ impl NfaBuilder {
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::Variable;
use crate::generate::nfa::{NfaCursor, NfaTransition};
use crate::generate::{
grammars::Variable,
nfa::{NfaCursor, NfaTransition},
};
fn simulate_nfa<'a>(grammar: &'a LexicalGrammar, s: &'a str) -> Option<(usize, &'a str)> {
let start_states = grammar.variables.iter().map(|v| v.start_state).collect();

View file

@ -1,5 +1,7 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::generate::{
grammars::{LexicalGrammar, SyntaxGrammar},
rules::{Alias, AliasMap, Symbol, SymbolType},
};
#[derive(Clone, Default)]
struct SymbolStatus {
@ -14,8 +16,8 @@ struct SymbolStatus {
// This has two benefits:
// * It reduces the overhead of storing production-specific alias info in the parse table.
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
// ensures that the children of an `ERROR` node have symbols that are consistent with the
// way that they would appear in a valid syntax tree.
// ensures that the children of an `ERROR` node have symbols that are consistent with the way that
// they would appear in a valid syntax tree.
pub(super) fn extract_default_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
@ -162,10 +164,10 @@ pub(super) fn extract_default_aliases(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
use crate::generate::{
grammars::{LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType},
nfa::Nfa,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {

View file

@ -1,9 +1,12 @@
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::generate::grammars::{ExternalToken, Variable, VariableType};
use crate::generate::rules::{MetadataParams, Rule, Symbol, SymbolType};
use std::{collections::HashMap, mem};
use anyhow::{anyhow, Result};
use std::collections::HashMap;
use std::mem;
use super::{ExtractedLexicalGrammar, ExtractedSyntaxGrammar, InternedGrammar};
use crate::generate::{
grammars::{ExternalToken, Variable, VariableType},
rules::{MetadataParams, Rule, Symbol, SymbolType},
};
pub(super) fn extract_tokens(
mut grammar: InternedGrammar,

View file

@ -1,10 +1,11 @@
use super::ExtractedSyntaxGrammar;
use crate::generate::grammars::{
Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable,
};
use crate::generate::rules::{Alias, Associativity, Precedence, Rule, Symbol};
use anyhow::{anyhow, Result};
use super::ExtractedSyntaxGrammar;
use crate::generate::{
grammars::{Production, ProductionStep, SyntaxGrammar, SyntaxVariable, Variable},
rules::{Alias, Associativity, Precedence, Rule, Symbol},
};
struct RuleFlattener {
production: Production,
precedence_stack: Vec<Precedence>,

View file

@ -1,8 +1,11 @@
use super::InternedGrammar;
use crate::generate::grammars::{InputGrammar, Variable, VariableType};
use crate::generate::rules::{Rule, Symbol};
use anyhow::{anyhow, Result};
use super::InternedGrammar;
use crate::generate::{
grammars::{InputGrammar, Variable, VariableType},
rules::{Rule, Symbol},
};
pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar> {
let interner = Interner { grammar };

View file

@ -6,26 +6,28 @@ mod flatten_grammar;
mod intern_symbols;
mod process_inlines;
pub use self::expand_tokens::expand_tokens;
use self::expand_repeats::expand_repeats;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
use self::process_inlines::process_inlines;
use super::grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
SyntaxGrammar, Variable,
};
use super::rules::{AliasMap, Precedence, Rule, Symbol};
use anyhow::{anyhow, Result};
use std::{
cmp::Ordering,
collections::{hash_map, HashMap, HashSet},
mem,
};
use anyhow::{anyhow, Result};
pub use self::expand_tokens::expand_tokens;
use self::{
expand_repeats::expand_repeats, extract_default_aliases::extract_default_aliases,
extract_tokens::extract_tokens, flatten_grammar::flatten_grammar,
intern_symbols::intern_symbols, process_inlines::process_inlines,
};
use super::{
grammars::{
ExternalToken, InlinedProductionMap, InputGrammar, LexicalGrammar, PrecedenceEntry,
SyntaxGrammar, Variable,
},
rules::{AliasMap, Precedence, Rule, Symbol},
};
pub struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,
extra_symbols: Vec<T>,

View file

@ -1,9 +1,11 @@
use std::collections::HashMap;
use anyhow::{anyhow, Result};
use crate::generate::{
grammars::{InlinedProductionMap, LexicalGrammar, Production, ProductionStep, SyntaxGrammar},
rules::SymbolType,
};
use anyhow::{anyhow, Result};
use std::collections::HashMap;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
struct ProductionStepId {
@ -152,7 +154,7 @@ impl InlinedProductionMapBuilder {
self.productions
.iter()
.position(|p| *p == production)
.unwrap_or({
.unwrap_or_else(|| {
self.productions.push(production);
self.productions.len() - 1
})
@ -223,8 +225,10 @@ pub(super) fn process_inlines(
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{LexicalVariable, SyntaxVariable, VariableType};
use crate::generate::rules::{Associativity, Precedence, Symbol};
use crate::generate::{
grammars::{LexicalVariable, SyntaxVariable, VariableType},
rules::{Associativity, Precedence, Symbol},
};
#[test]
fn test_basic_inlining() {

View file

@ -1,3 +1,10 @@
use std::{
cmp,
collections::{HashMap, HashSet},
fmt::Write,
mem::swap,
};
use super::{
build_tables::Tables,
grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
@ -8,12 +15,6 @@ use super::{
ParseTableEntry,
},
};
use std::{
cmp,
collections::{HashMap, HashSet},
fmt::Write,
mem::swap,
};
const SMALL_STATE_THRESHOLD: usize = 64;
const ABI_VERSION_MIN: usize = 13;
@ -27,7 +28,7 @@ macro_rules! add {
}
macro_rules! add_whitespace {
($this: tt) => {{
($this:tt) => {{
for _ in 0..$this.indent_level {
write!(&mut $this.buffer, " ").unwrap();
}
@ -43,13 +44,13 @@ macro_rules! add_line {
}
macro_rules! indent {
($this: tt) => {
($this:tt) => {
$this.indent_level += 1;
};
}
macro_rules! dedent {
($this: tt) => {
($this:tt) => {
assert_ne!($this.indent_level, 0);
$this.indent_level -= 1;
};
@ -172,8 +173,8 @@ impl Generator {
}
// Two anonymous tokens with different flags but the same string value
// should be represented with the same symbol in the public API. Examples:
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
// * "<" and token(prec(1, "<"))
// * "(" and token.immediate("(")
else if symbol.is_terminal() {
let metadata = self.metadata_for_symbol(*symbol);
for other_symbol in &self.parse_table.symbols {
@ -215,22 +216,22 @@ impl Generator {
});
// Some aliases match an existing symbol in the grammar.
let alias_id;
if let Some(existing_symbol) = existing_symbol {
alias_id = self.symbol_ids[&self.symbol_map[&existing_symbol]].clone();
let alias_id = if let Some(existing_symbol) = existing_symbol {
self.symbol_ids[&self.symbol_map[&existing_symbol]].clone()
}
// Other aliases don't match any existing symbol, and need their own identifiers.
// Other aliases don't match any existing symbol, and need their own
// identifiers.
else {
if let Err(i) = self.unique_aliases.binary_search(alias) {
self.unique_aliases.insert(i, alias.clone());
}
alias_id = if alias.is_named {
if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
}
}
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
}
@ -1612,16 +1613,15 @@ impl Generator {
/// * `parse_table` - The generated parse table for the language
/// * `main_lex_table` - The generated lexing table for the language
/// * `keyword_lex_table` - The generated keyword lexing table for the language
/// * `keyword_capture_token` - A symbol indicating which token is used
/// for keyword capture, if any.
/// * `keyword_capture_token` - A symbol indicating which token is used for keyword capture, if any.
/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
/// * `default_aliases` - A map describing the global rename rules that should apply.
/// the keys are symbols that are *always* aliased in the same way, and the values
/// are the aliases that are applied to those symbols.
/// * `abi_version` - The language ABI version that should be generated. Usually
/// you want Tree-sitter's current version, but right after making an ABI
/// change, it may be useful to generate code with the previous ABI.
/// * `default_aliases` - A map describing the global rename rules that should apply. the keys are
/// symbols that are *always* aliased in the same way, and the values are the aliases that are
/// applied to those symbols.
/// * `abi_version` - The language ABI version that should be generated. Usually you want
/// Tree-sitter's current version, but right after making an ABI change, it may be useful to
/// generate code with the previous ABI.
#[allow(clippy::too_many_arguments)]
pub fn render_c_code(
name: &str,

View file

@ -1,7 +1,9 @@
use super::grammars::VariableType;
use smallbitvec::SmallBitVec;
use std::{collections::HashMap, fmt};
use smallbitvec::SmallBitVec;
use super::grammars::VariableType;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
pub enum SymbolType {
External,

View file

@ -1,6 +1,9 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Symbol, TokenSet};
use std::collections::BTreeMap;
use super::{
nfa::CharacterSet,
rules::{Alias, Symbol, TokenSet},
};
pub type ProductionInfoId = usize;
pub type ParseStateId = usize;
pub type LexStateId = usize;

View file

@ -1,14 +1,12 @@
use std::{
collections::HashMap, fmt::Write, fs, io, path, str, sync::atomic::AtomicUsize, time::Instant,
};
use ansi_term::Color;
use anyhow::Result;
use lazy_static::lazy_static;
use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde::{ser::SerializeMap, Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::fmt::Write;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fs, io, path, str, usize};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
use tree_sitter_loader::Loader;
@ -417,9 +415,10 @@ pub fn html(
#[cfg(test)]
mod tests {
use super::*;
use std::env;
use super::*;
const JUNGLE_GREEN: &str = "#26A69A";
const DARK_CYAN: &str = "#00AF87";

View file

@ -1,17 +1,21 @@
use std::{
collections::HashSet,
env, fs,
path::{Path, PathBuf},
};
use anstyle::{AnsiColor, Color, Style};
use anyhow::{anyhow, Context, Result};
use clap::{crate_authors, Args, Command, FromArgMatches as _, Subcommand};
use glob::glob;
use regex::Regex;
use std::collections::HashSet;
use std::path::{Path, PathBuf};
use std::{env, fs, u64};
use tree_sitter::{ffi, Parser, Point};
use tree_sitter_cli::test::TestOptions;
use tree_sitter_cli::{
generate, highlight, logger,
parse::{self, ParseFileOptions, ParseOutput},
playground, query, tags, test, test_highlight, test_tags, util, wasm,
playground, query, tags, test,
test::TestOptions,
test_highlight, test_tags, util, wasm,
};
use tree_sitter_config::Config;
use tree_sitter_highlight::Highlighter;
@ -866,8 +870,7 @@ fn run() -> Result<()> {
let open_in_browser = !playground_options.quiet;
let grammar_path = playground_options
.grammar_path
.map(PathBuf::from)
.unwrap_or(current_dir);
.map_or(current_dir, PathBuf::from);
playground::serve(&grammar_path, open_in_browser)?;
}

View file

@ -1,12 +1,16 @@
use super::util;
use std::{
fmt, fs,
io::{self, Write},
path::Path,
sync::atomic::AtomicUsize,
time::{Duration, Instant},
};
use anyhow::{anyhow, Context, Result};
use std::io::{self, Write};
use std::path::Path;
use std::sync::atomic::AtomicUsize;
use std::time::{Duration, Instant};
use std::{fmt, fs, usize};
use tree_sitter::{ffi, InputEdit, Language, LogType, Parser, Point, Tree};
use super::util;
#[derive(Debug)]
pub struct Edit {
pub position: usize,

View file

@ -1,5 +1,3 @@
use super::wasm;
use anyhow::{anyhow, Context, Result};
use std::{
borrow::Cow,
env, fs,
@ -7,10 +5,14 @@ use std::{
path::{Path, PathBuf},
str::{self, FromStr as _},
};
use anyhow::{anyhow, Context, Result};
use tiny_http::{Header, Response, Server};
use super::wasm;
macro_rules! optional_resource {
($name: tt, $path: tt) => {
($name:tt, $path:tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: Option<&Path>) -> Cow<'static, [u8]> {
if let Some(tree_sitter_dir) = tree_sitter_dir {

View file

@ -1,5 +1,3 @@
use crate::query_testing;
use anyhow::{Context, Result};
use std::{
fs,
io::{self, Write},
@ -7,8 +5,12 @@ use std::{
path::Path,
time::Instant,
};
use anyhow::{Context, Result};
use tree_sitter::{Language, Parser, Point, Query, QueryCursor};
use crate::query_testing;
#[allow(clippy::too_many_arguments)]
pub fn query_files_at_paths(
language: &Language,

View file

@ -1,7 +1,8 @@
use std::fs;
use anyhow::{anyhow, Result};
use lazy_static::lazy_static;
use regex::Regex;
use std::fs;
use tree_sitter::{Language, Parser, Point};
lazy_static! {

View file

@ -1,12 +1,17 @@
use super::util;
use std::{
fs,
io::{self, Write},
path::Path,
str,
time::Instant,
};
use anyhow::{anyhow, Result};
use std::io::{self, Write};
use std::path::Path;
use std::time::Instant;
use std::{fs, str};
use tree_sitter_loader::{Config, Loader};
use tree_sitter_tags::TagsContext;
use super::util;
pub fn generate_tags(
loader: &Loader,
loader_config: &Config,

View file

@ -1,20 +1,26 @@
use super::util;
use std::{
collections::BTreeMap,
ffi::OsStr,
fs,
io::{self, Write},
path::{Path, PathBuf},
str,
};
use ansi_term::Colour;
use anyhow::{anyhow, Context, Result};
use difference::{Changeset, Difference};
use indoc::indoc;
use lazy_static::lazy_static;
use regex::bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder};
use regex::Regex;
use std::collections::BTreeMap;
use std::ffi::OsStr;
use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::str;
use regex::{
bytes::{Regex as ByteRegex, RegexBuilder as ByteRegexBuilder},
Regex,
};
use tree_sitter::{format_sexp, Language, LogType, Parser, Query};
use walkdir::WalkDir;
use super::util;
lazy_static! {
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(
r"^(?x)
@ -300,13 +306,15 @@ fn run_tests(
let expected_output = format_sexp(&output, 0);
let actual_output = format_sexp(&actual, 0);
// Only bail early before updating if the actual is not the output, sometimes
// users want to test cases that are intended to have errors, hence why this
// Only bail early before updating if the actual is not the output,
// sometimes users want to test cases that
// are intended to have errors, hence why this
// check isn't shown above
if actual.contains("ERROR") || actual.contains("MISSING") {
*has_parse_errors = true;
// keep the original `expected` output if the actual output has an error
// keep the original `expected` output if the actual output has an
// error
corrected_entries.push((
name.clone(),
input,
@ -424,9 +432,9 @@ fn write_tests_to_buffer(
if i > 0 {
writeln!(buffer)?;
}
write!(
writeln!(
buffer,
"{}\n{name}\n{}\n{input}\n{}\n\n{}\n",
"{}\n{name}\n{}\n{input}\n{}\n\n{}",
"=".repeat(*header_delim_len),
"=".repeat(*header_delim_len),
"-".repeat(*divider_delim_len),
@ -654,7 +662,7 @@ fn parse_test_content(name: String, content: &str, file_path: Option<PathBuf>) -
}
}
prev_attributes = attributes;
prev_name = test_name.unwrap_or(String::new());
prev_name = test_name.unwrap_or_default();
prev_header_len = header_delim_len;
prev_header_end = header_range.end;
}

View file

@ -1,5 +1,4 @@
use std::fs;
use std::path::Path;
use std::{fs, path::Path};
use ansi_term::Colour;
use anyhow::{anyhow, Result};

View file

@ -1,5 +1,4 @@
use std::fs;
use std::path::Path;
use std::{fs, path::Path};
use ansi_term::Colour;
use anyhow::{anyhow, Result};

View file

@ -1,10 +1,14 @@
use super::helpers::fixtures::get_language;
use std::future::Future;
use std::pin::{pin, Pin};
use std::ptr;
use std::task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker};
use std::{
future::Future,
pin::{pin, Pin},
ptr,
task::{self, Context, Poll, RawWaker, RawWakerVTable, Waker},
};
use tree_sitter::Parser;
use super::helpers::fixtures::get_language;
#[test]
fn test_node_in_fut() {
let (ret, pended) = tokio_like_spawn(async {

View file

@ -1,3 +1,8 @@
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter_proc_macro::test_with_seed;
use super::helpers::{
allocations,
edits::{get_random_edit, invert_edit},
@ -14,9 +19,6 @@ use crate::{
test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry},
util,
};
use std::{collections::HashMap, env, fs};
use tree_sitter::{LogType, Node, Parser, Point, Range, Tree};
use tree_sitter_proc_macro::test_with_seed;
#[test_with_seed(retry=10, seed=*START_SEED, seed_fn=new_seed)]
fn test_corpus_for_bash(seed: usize) {
@ -215,7 +217,7 @@ fn test_language_corpus(
// Perform a random series of edits and reparse.
let mut undo_stack = Vec::new();
for _ in 0..1 + rand.unsigned(*EDIT_COUNT) {
for _ in 0..=rand.unsigned(*EDIT_COUNT) {
let edit = get_random_edit(&mut rand, &input);
undo_stack.push(invert_edit(&input, &edit));
perform_edit(&mut tree, &mut input, &edit).unwrap();

View file

@ -1,8 +1,9 @@
use crate::tests::helpers::fixtures::scratch_dir;
use std::{fs, path::Path};
use std::path::Path;
use tree_sitter_loader::Loader;
use crate::tests::helpers::fixtures::scratch_dir;
#[test]
fn detect_language_by_first_line_regex() {
let strace_dir = tree_sitter_dir(
@ -32,25 +33,25 @@ fn detect_language_by_first_line_regex() {
assert_eq!(config[0].scope.as_ref().unwrap(), "source.strace");
let file_name = strace_dir.path().join("strace.log");
std::fs::write(&file_name, "execve\nworld").unwrap();
fs::write(&file_name, "execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
std::fs::write(&file_name, "447845 execve\nworld").unwrap();
fs::write(&file_name, "447845 execve\nworld").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.strace".into())
);
let file_name = strace_dir.path().join("strace.log");
std::fs::write(&file_name, "hello\nexecve").unwrap();
fs::write(&file_name, "hello\nexecve").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let file_name = strace_dir.path().join("strace.log");
std::fs::write(&file_name, "").unwrap();
fs::write(&file_name, "").unwrap();
assert!(get_lang_scope(&loader, &file_name).is_none());
let dummy_dir = tree_sitter_dir(
@ -75,7 +76,7 @@ fn detect_language_by_first_line_regex() {
.find_language_configurations_at_path(dummy_dir.path(), false)
.unwrap();
let file_name = dummy_dir.path().join("strace.dummy");
std::fs::write(&file_name, "execve").unwrap();
fs::write(&file_name, "execve").unwrap();
assert_eq!(
get_lang_scope(&loader, &file_name),
Some("source.dummy".into())
@ -84,15 +85,14 @@ fn detect_language_by_first_line_regex() {
fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir {
let temp_dir = tempfile::tempdir().unwrap();
std::fs::write(temp_dir.path().join("package.json"), package_json).unwrap();
std::fs::create_dir(temp_dir.path().join("src")).unwrap();
std::fs::create_dir(temp_dir.path().join("src/tree_sitter")).unwrap();
std::fs::write(
fs::write(temp_dir.path().join("package.json"), package_json).unwrap();
fs::create_dir_all(temp_dir.path().join("src/tree_sitter")).unwrap();
fs::write(
temp_dir.path().join("src/grammar.json"),
format!(r#"{{"name":"{name}"}}"#),
)
.unwrap();
std::fs::write(
fs::write(
temp_dir.path().join("src/parser.c"),
format!(
r##"
@ -107,7 +107,7 @@ fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir {
),
)
.unwrap();
std::fs::write(
fs::write(
temp_dir.path().join("src/tree_sitter/parser.h"),
include_str!("../../../lib/src/parser.h"),
)
@ -115,7 +115,7 @@ fn tree_sitter_dir(package_json: &str, name: &str) -> tempfile::TempDir {
temp_dir
}
// if we manage to get the language scope, it means we correctly detected the file-type
// If we manage to get the language scope, it means we correctly detected the file-type
fn get_lang_scope(loader: &Loader, file_name: &Path) -> Option<String> {
loader
.language_configuration_for_file_name(file_name)

View file

@ -1,7 +1,7 @@
use std::{ops::Range, str};
use super::random::Rand;
use crate::parse::Edit;
use std::ops::Range;
use std::str;
#[derive(Debug)]
pub struct ReadRecorder<'a> {
@ -31,7 +31,7 @@ impl<'a> ReadRecorder<'a> {
pub fn strings_read(&self) -> Vec<&'a str> {
let mut result = Vec::new();
let mut last_range: Option<Range<usize>> = None;
let mut last_range = Option::<Range<usize>>::None;
for index in &self.indices_read {
if let Some(ref mut range) = &mut last_range {
if range.end == *index {

View file

@ -1,7 +1,10 @@
use std::{
env, fs,
path::{Path, PathBuf},
};
use anyhow::Context;
use lazy_static::lazy_static;
use std::path::{Path, PathBuf};
use std::{env, fs};
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_loader::{CompileConfig, Loader};

View file

@ -5,9 +5,10 @@ pub(super) mod query_helpers;
pub(super) mod random;
pub(super) mod scope_sequence;
use std::env;
use lazy_static::lazy_static;
use rand::Rng;
use std::env;
lazy_static! {
pub static ref LOG_ENABLED: bool = env::var("TREE_SITTER_LOG").is_ok();

View file

@ -1,5 +1,6 @@
use rand::prelude::Rng;
use std::{cmp::Ordering, fmt::Write, ops::Range};
use rand::prelude::Rng;
use tree_sitter::{
Language, Node, Parser, Point, Query, QueryCapture, QueryCursor, QueryMatch, Tree, TreeCursor,
};

View file

@ -15,7 +15,7 @@ impl Rand {
}
pub fn unsigned(&mut self, max: usize) -> usize {
self.0.gen_range(0..max + 1)
self.0.gen_range(0..=max)
}
pub fn words(&mut self, max_count: usize) -> Vec<u8> {

View file

@ -1,13 +1,18 @@
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
use std::{
ffi::CString,
fs,
os::raw::c_char,
ptr, slice, str,
sync::atomic::{AtomicUsize, Ordering},
};
use lazy_static::lazy_static;
use std::ffi::CString;
use std::os::raw::c_char;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{fs, ptr, slice, str};
use tree_sitter_highlight::{
c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
};
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
lazy_static! {
static ref JS_HIGHLIGHT: HighlightConfiguration =
get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES);
@ -748,8 +753,7 @@ fn to_token_vector<'a>(
for (i, l) in s.split('\n').enumerate() {
let l = l.trim_end_matches('\r');
if i > 0 {
lines.push(line);
line = Vec::new();
lines.push(std::mem::take(&mut line));
}
if !l.is_empty() {
line.push((l, highlights.clone()));

View file

@ -1,6 +1,7 @@
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
use super::helpers::fixtures::get_language;
#[test]
fn test_lookahead_iterator() {
let mut parser = Parser::new();

View file

@ -1,11 +1,14 @@
use super::helpers::edits::get_random_edit;
use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
use super::helpers::random::Rand;
use crate::generate::generate_parser_for_grammar;
use crate::parse::perform_edit;
use std::fs;
use tree_sitter::{Node, Parser, Point, Tree};
use super::helpers::{
edits::get_random_edit,
fixtures::{fixtures_dir, get_language, get_test_language},
random::Rand,
};
use crate::{generate::generate_parser_for_grammar, parse::perform_edit};
const JSON_EXAMPLE: &str = r#"
[
@ -847,10 +850,11 @@ fn test_node_numeric_symbols_respect_simple_aliases() {
parser.set_language(&get_language("python")).unwrap();
// Example 1:
// Python argument lists can contain "splat" arguments, which are not allowed within
// other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These
// `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric
// `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`.
// Python argument lists can contain "splat" arguments, which are not allowed
// within other expressions. This includes `parenthesized_list_splat` nodes
// like `(*b)`. These `parenthesized_list_splat` nodes are aliased as
// `parenthesized_expression`. Their numeric `symbol`, aka `kind_id` should
// match that of a normal `parenthesized_expression`.
let tree = parser.parse("(a((*b)))", None).unwrap();
let root = tree.root_node();
assert_eq!(
@ -872,9 +876,9 @@ fn test_node_numeric_symbols_respect_simple_aliases() {
assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id());
// Example 2:
// Ruby handles the unary (negative) and binary (minus) `-` operators using two different
// tokens. One or more of these is an external token that's aliased as `-`. Their numeric
// kind ids should match.
// Ruby handles the unary (negative) and binary (minus) `-` operators using two
// different tokens. One or more of these is an external token that's
// aliased as `-`. Their numeric kind ids should match.
parser.set_language(&get_language("ruby")).unwrap();
let tree = parser.parse("-a - b", None).unwrap();
let root = tree.root_node();

View file

@ -1,15 +1,17 @@
// For some reasons `Command::spawn` doesn't work in CI env for many exotic arches.
#![cfg(all(any(target_arch = "x86_64", target_arch = "x86"), not(sanitizing)))]
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
use crate::{
generate::{generate_parser_for_grammar, load_grammar_file},
tests::helpers::fixtures::{fixtures_dir, get_test_language},
};
use std::{
env::VarError,
process::{Command, Stdio},
};
use tree_sitter::Parser;
// The `sanitizing` cfg is required to don't run tests under specific sunitizer
// because they don't work well with subprocesses _(it's an assumption)_.
@ -35,7 +37,7 @@ fn test_grammar_that_should_hang_and_not_segfault() {
let tests_exec_path = std::env::args()
.next()
.expect("Failed get get tests executable path");
.expect("Failed to get tests executable path");
match std::env::var(test_var) {
Ok(v) if v == test_name => {
@ -45,60 +47,59 @@ fn test_grammar_that_should_hang_and_not_segfault() {
Err(VarError::NotPresent) => {
eprintln!(" parent process id {}", std::process::id());
if true {
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child wasn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child wasn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test sub process id: {}, error: {e}",
child.id()
);
let mut command = Command::new(tests_exec_path);
command.arg(test_name).env(test_var, test_name);
if std::env::args().any(|x| x == "--nocapture") {
command.arg("--nocapture");
} else {
command.stdout(Stdio::null()).stderr(Stdio::null());
}
match command.spawn() {
Ok(mut child) => {
std::thread::sleep(std::time::Duration::from_millis(parent_sleep_millis));
match child.try_wait() {
Ok(Some(status)) if status.success() => {
panic!("Child didn't hang and exited successfully")
}
Ok(Some(status)) => panic!(
"Child didn't hang and exited with status code: {:?}",
status.code()
),
_ => (),
}
if let Err(e) = child.kill() {
eprintln!(
"Failed to kill hang test's process id: {}, error: {e}",
child.id()
);
}
Err(e) => panic!("{e}"),
}
Err(e) => panic!("{e}"),
}
}
Err(e) => panic!("Env var error: {e}"),
_ => unreachable!(),
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) =
generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let language =
get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}
}
fn hang_test() {
let test_grammar_dir = fixtures_dir()
.join("test_grammars")
.join("get_col_should_hang_not_crash");
let grammar_json = load_grammar_file(&test_grammar_dir.join("grammar.js"), None).unwrap();
let (parser_name, parser_code) = generate_parser_for_grammar(grammar_json.as_str()).unwrap();
let language = get_test_language(&parser_name, &parser_code, Some(test_grammar_dir.as_path()));
let mut parser = Parser::new();
parser.set_language(&language).unwrap();
let code_that_should_hang = "\nHello";
parser.parse(code_that_should_hang, None).unwrap();
}

View file

@ -1,3 +1,12 @@
use std::{
fs,
sync::atomic::{AtomicUsize, Ordering},
thread, time,
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_proc_macro::retry;
use super::helpers::{
allocations,
edits::{invert_edit, ReadRecorder},
@ -8,13 +17,6 @@ use crate::{
parse::{perform_edit, Edit},
tests::helpers::fixtures::fixtures_dir,
};
use std::{
fs,
sync::atomic::{AtomicUsize, Ordering},
thread, time,
};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
use tree_sitter_proc_macro::retry;
#[test]
fn test_parsing_simple_string() {
@ -97,7 +99,7 @@ fn test_parsing_with_debug_graph_enabled() {
parser.print_dot_graphs(&debug_graph_file);
parser.parse("const zero = 0", None).unwrap();
debug_graph_file.seek(std::io::SeekFrom::Start(0)).unwrap();
debug_graph_file.rewind().unwrap();
let log_reader = BufReader::new(debug_graph_file)
.lines()
.map(|l| l.expect("Failed to read line from graph log"));

View file

@ -1,6 +1,7 @@
use super::helpers::{allocations, fixtures::get_language};
use tree_sitter::Parser;
use super::helpers::{allocations, fixtures::get_language};
#[test]
fn test_pathological_example_1() {
let language = "cpp";

View file

@ -10,8 +10,8 @@ use syn::{
pub fn retry(args: TokenStream, input: TokenStream) -> TokenStream {
let count = parse_macro_input!(args as LitInt);
let input = parse_macro_input!(input as ItemFn);
let attrs = input.attrs.clone();
let name = input.sig.ident.clone();
let attrs = &input.attrs;
let name = &input.sig.ident;
TokenStream::from(quote! {
#(#attrs),*
@ -98,8 +98,8 @@ pub fn test_with_seed(args: TokenStream, input: TokenStream) -> TokenStream {
let seed_fn = seed_fn.iter();
let func = parse_macro_input!(input as ItemFn);
let attrs = func.attrs.clone();
let name = func.sig.ident.clone();
let attrs = &func.attrs;
let name = &func.sig.ident;
TokenStream::from(quote! {
#[test]

View file

@ -1,3 +1,14 @@
use std::{env, fmt::Write};
use indoc::indoc;
use lazy_static::lazy_static;
use rand::{prelude::StdRng, SeedableRng};
use tree_sitter::{
CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError,
QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty,
};
use unindent::Unindent;
use super::helpers::{
allocations,
fixtures::{get_language, get_test_language},
@ -8,15 +19,6 @@ use crate::{
generate::generate_parser_for_grammar,
tests::helpers::query_helpers::{collect_captures, collect_matches},
};
use indoc::indoc;
use lazy_static::lazy_static;
use rand::{prelude::StdRng, SeedableRng};
use std::{env, fmt::Write};
use tree_sitter::{
CaptureQuantifier, Language, Node, Parser, Point, Query, QueryCursor, QueryError,
QueryErrorKind, QueryPredicate, QueryPredicateArg, QueryProperty,
};
use unindent::Unindent;
lazy_static! {
static ref EXAMPLE_FILTER: Option<String> = env::var("TREE_SITTER_TEST_EXAMPLE_FILTER").ok();
@ -889,12 +891,12 @@ fn test_query_matches_with_immediate_siblings() {
let language = get_language("python");
// The immediate child operator '.' can be used in three similar ways:
// 1. Before the first child node in a pattern, it means that there cannot be any
// named siblings before that child node.
// 1. Before the first child node in a pattern, it means that there cannot be any named
// siblings before that child node.
// 2. After the last child node in a pattern, it means that there cannot be any named
// sibling after that child node.
// 2. Between two child nodes in a pattern, it specifies that there cannot be any
// named siblings between those two child snodes.
// 2. Between two child nodes in a pattern, it specifies that there cannot be any named
// siblings between those two child snodes.
let query = Query::new(
&language,
"
@ -1423,7 +1425,8 @@ fn test_query_matches_with_nested_optional_nodes() {
allocations::record(|| {
let language = get_language("javascript");
// A function call, optionally containing a function call, which optionally contains a number
// A function call, optionally containing a function call, which optionally contains a
// number
let query = Query::new(
&language,
"
@ -3267,8 +3270,8 @@ fn test_query_captures_with_too_many_nested_results() {
// appearance.
// 2. This pattern captures the root `call_expression`.
// 3. This pattern's result also depends on the final child (the template string).
// 4. In between the `call_expression` and the possible `template_string`, there can
// be an arbitrarily deep subtree.
// 4. In between the `call_expression` and the possible `template_string`, there can be an
// arbitrarily deep subtree.
//
// This means that, if any patterns match *after* the initial `call_expression` is
// captured, but before the final `template_string` is found, those matches must

View file

@ -1,14 +1,16 @@
use super::helpers::{
allocations,
fixtures::{get_language, get_language_queries_path},
};
use std::{
ffi::{CStr, CString},
fs, ptr, slice, str,
};
use tree_sitter::Point;
use tree_sitter_tags::{c_lib as c, Error, TagsConfiguration, TagsContext};
use super::helpers::{
allocations,
fixtures::{get_language, get_language_queries_path},
};
const PYTHON_TAG_QUERY: &str = r#"
(
(function_definition

View file

@ -1,9 +1,12 @@
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::query_testing::{parse_position_comments, Assertion};
use crate::test_highlight::get_highlight_positions;
use tree_sitter::{Parser, Point};
use tree_sitter_highlight::{Highlight, Highlighter};
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::{
query_testing::{parse_position_comments, Assertion},
test_highlight::get_highlight_positions,
};
#[test]
fn test_highlight_test_with_basic_test() {
let language = get_language("javascript");

View file

@ -1,9 +1,12 @@
use super::helpers::fixtures::{get_language, get_tags_config};
use crate::query_testing::{parse_position_comments, Assertion};
use crate::test_tags::get_tag_positions;
use tree_sitter::{Parser, Point};
use tree_sitter_tags::TagsContext;
use super::helpers::fixtures::{get_language, get_tags_config};
use crate::{
query_testing::{parse_position_comments, Assertion},
test_tags::get_tag_positions,
};
#[test]
fn test_tags_test_with_basic_test() {
let language = get_language("python");

View file

@ -1,8 +1,9 @@
use std::{iter, sync::Arc};
use crate::tests::helpers::fixtures::get_language;
use tree_sitter::{Language, Node, Parser, Point, Query, QueryCursor, TextProvider, Tree};
use crate::tests::helpers::fixtures::get_language;
fn parse_text(text: impl AsRef<[u8]>) -> (Tree, Language) {
let language = get_language("c");
let mut parser = Parser::new();

View file

@ -1,9 +1,10 @@
use super::helpers::edits::invert_edit;
use super::helpers::fixtures::get_language;
use crate::parse::{perform_edit, Edit};
use std::str;
use tree_sitter::{InputEdit, Parser, Point, Range, Tree};
use super::helpers::{edits::invert_edit, fixtures::get_language};
use crate::parse::{perform_edit, Edit};
#[test]
fn test_tree_edit() {
let mut parser = Parser::new();

View file

@ -1,10 +1,12 @@
use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
use lazy_static::lazy_static;
use std::fs;
use lazy_static::lazy_static;
use tree_sitter::{
wasmtime::Engine, Parser, Query, QueryCursor, WasmError, WasmErrorKind, WasmStore,
};
use crate::tests::helpers::{allocations, fixtures::WASM_DIR};
lazy_static! {
static ref ENGINE: Engine = Engine::default();
}

View file

@ -1,13 +1,15 @@
use super::generate::parse_grammar::GrammarJSON;
use anyhow::{anyhow, Context, Result};
use std::{
fs,
path::{Path, PathBuf},
};
use anyhow::{anyhow, Context, Result};
use tree_sitter::wasm_stdlib_symbols;
use tree_sitter_loader::Loader;
use wasmparser::Parser;
use super::generate::parse_grammar::GrammarJSON;
pub fn load_language_wasm_file(language_dir: &Path) -> Result<(String, Vec<u8>)> {
let grammar_name = get_grammar_name(language_dir)
.with_context(|| "Failed to get wasm filename")