Merge branch 'master' into HEAD

This commit is contained in:
Max Brunsfeld 2020-12-03 09:44:33 -08:00
commit 026231e93d
173 changed files with 22878 additions and 6961 deletions

View file

@ -1,6 +1,7 @@
use super::test_highlight;
use std::fmt::Write;
use std::io;
use tree_sitter_highlight::PropertySheetError;
use tree_sitter::{QueryError, QueryErrorKind};
#[derive(Debug)]
pub struct Error(pub Vec<String>);
@ -50,6 +51,34 @@ impl Error {
}
}
impl<'a> From<(&str, QueryError)> for Error {
fn from((path, error): (&str, QueryError)) -> Self {
let mut msg = format!("Query error at {}:{}. ", path, error.row + 1);
match error.kind {
QueryErrorKind::Capture => write!(&mut msg, "Invalid capture name {}", error.message),
QueryErrorKind::Field => write!(&mut msg, "Invalid field name {}", error.message),
QueryErrorKind::NodeType => write!(&mut msg, "Invalid node type {}", error.message),
QueryErrorKind::Syntax => write!(&mut msg, "Invalid syntax:\n{}", error.message),
QueryErrorKind::Structure => write!(&mut msg, "Impossible pattern:\n{}", error.message),
QueryErrorKind::Predicate => write!(&mut msg, "Invalid predicate: {}", error.message),
}
.unwrap();
Self::new(msg)
}
}
impl<'a> From<tree_sitter_highlight::Error> for Error {
fn from(error: tree_sitter_highlight::Error) -> Self {
Error::new(format!("{:?}", error))
}
}
impl<'a> From<tree_sitter_tags::Error> for Error {
fn from(error: tree_sitter_tags::Error) -> Self {
Error::new(format!("{}", error))
}
}
impl From<serde_json::Error> for Error {
fn from(error: serde_json::Error) -> Self {
Error::new(error.to_string())
@ -62,8 +91,14 @@ impl From<io::Error> for Error {
}
}
impl From<rsass::Error> for Error {
fn from(error: rsass::Error) -> Self {
impl From<glob::PatternError> for Error {
fn from(error: glob::PatternError) -> Self {
Error::new(error.to_string())
}
}
impl From<glob::GlobError> for Error {
fn from(error: glob::GlobError) -> Self {
Error::new(error.to_string())
}
}
@ -74,18 +109,14 @@ impl From<regex_syntax::ast::Error> for Error {
}
}
impl From<test_highlight::Failure> for Error {
fn from(error: test_highlight::Failure) -> Self {
Error::new(error.message())
}
}
impl From<String> for Error {
fn from(error: String) -> Self {
Error::new(error)
}
}
impl From<PropertySheetError> for Error {
fn from(error: PropertySheetError) -> Self {
match error {
PropertySheetError::InvalidFormat(e) => Self::from(e),
PropertySheetError::InvalidRegex(e) => Self::regex(&e.to_string()),
PropertySheetError::InvalidJSON(e) => Self::from(e),
}
}
}

View file

@ -2,7 +2,7 @@ use super::coincident_tokens::CoincidentTokenIndex;
use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::nfa::NfaCursor;
use crate::generate::rules::{Symbol, TokenSet};
use crate::generate::tables::{AdvanceAction, LexState, LexTable, ParseStateId, ParseTable};
use log::info;
@ -189,13 +189,10 @@ impl<'a> LexTableBuilder<'a> {
// character that leads to the empty set of NFA states.
if eof_valid {
let (next_state_id, _) = self.add_state(Vec::new(), false);
self.table.states[state_id].advance_actions.push((
CharacterSet::empty().add_char('\0'),
AdvanceAction {
state: next_state_id,
in_main_token: true,
},
));
self.table.states[state_id].eof_action = Some(AdvanceAction {
state: next_state_id,
in_main_token: true,
});
}
for transition in transitions {
@ -273,6 +270,7 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
let signature = (
i == 0,
state.accept_action,
state.eof_action.is_some(),
state
.advance_actions
.iter()
@ -320,6 +318,9 @@ fn minimize_lex_table(table: &mut LexTable, parse_table: &mut ParseTable) {
for (_, advance_action) in new_state.advance_actions.iter_mut() {
advance_action.state = group_ids_by_state_id[advance_action.state];
}
if let Some(eof_action) = &mut new_state.eof_action {
eof_action.state = group_ids_by_state_id[eof_action.state];
}
new_states.push(new_state);
}
@ -364,6 +365,9 @@ fn sort_states(table: &mut LexTable, parse_table: &mut ParseTable) {
for (_, advance_action) in state.advance_actions.iter_mut() {
advance_action.state = new_ids_by_old_id[advance_action.state];
}
if let Some(eof_action) = &mut state.eof_action {
eof_action.state = new_ids_by_old_id[eof_action.state];
}
state
})
.collect();

View file

@ -7,7 +7,7 @@ use crate::generate::grammars::{
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Symbol, SymbolType, TokenSet};
use crate::generate::tables::{
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
FieldLocation, GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use core::ops::Range;
@ -16,17 +16,19 @@ use std::collections::{BTreeMap, HashMap, HashSet, VecDeque};
use std::fmt::Write;
use std::u32;
// For conflict reporting, each parse state is associated with an example
// sequence of symbols that could lead to that parse state.
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
#[derive(Clone)]
struct AuxiliarySymbolInfo {
auxiliary_symbol: Symbol,
parent_symbols: Vec<Symbol>,
}
type SymbolSequence = Vec<Symbol>;
type AuxiliarySymbolSequence = Vec<AuxiliarySymbolInfo>;
pub(crate) type ParseStateInfo<'a> = (SymbolSequence, ParseItemSet<'a>);
struct ParseStateQueueEntry {
state_id: ParseStateId,
preceding_auxiliary_symbols: AuxiliarySymbolSequence,
@ -41,6 +43,7 @@ struct ParseTableBuilder<'a> {
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
parse_state_info_by_id: Vec<ParseStateInfo<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
non_terminal_extra_states: Vec<(Symbol, usize)>,
parse_table: ParseTable,
}
@ -52,7 +55,7 @@ impl<'a> ParseTableBuilder<'a> {
.push(ProductionInfo::default());
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default(), false);
// Add the starting state at index 1.
self.add_parse_state(
@ -66,8 +69,40 @@ impl<'a> ParseTableBuilder<'a> {
.iter()
.cloned(),
),
false,
);
// Compute the possible item sets for non-terminal extras.
let mut non_terminal_extra_item_sets_by_first_terminal = BTreeMap::new();
for extra_non_terminal in self
.syntax_grammar
.extra_symbols
.iter()
.filter(|s| s.is_non_terminal())
{
let variable = &self.syntax_grammar.variables[extra_non_terminal.index];
for production in &variable.productions {
non_terminal_extra_item_sets_by_first_terminal
.entry(production.first_symbol().unwrap())
.or_insert(ParseItemSet::default())
.insert(
ParseItem {
variable_index: extra_non_terminal.index as u32,
production,
step_index: 1,
},
&[Symbol::end()].iter().cloned().collect(),
);
}
}
// Add a state for each starting terminal of a non-terminal extra rule.
for (terminal, item_set) in non_terminal_extra_item_sets_by_first_terminal {
self.non_terminal_extra_states
.push((terminal, self.parse_table.states.len()));
self.add_parse_state(&Vec::new(), &Vec::new(), item_set, true);
}
while let Some(entry) = self.parse_state_queue.pop_front() {
let item_set = self
.item_set_builder
@ -91,9 +126,15 @@ impl<'a> ParseTableBuilder<'a> {
preceding_symbols: &SymbolSequence,
preceding_auxiliary_symbols: &AuxiliarySymbolSequence,
item_set: ParseItemSet<'a>,
is_non_terminal_extra: bool,
) -> ParseStateId {
match self.state_ids_by_item_set.entry(item_set) {
// If an equivalent item set has already been processed, then return
// the existing parse state index.
Entry::Occupied(o) => *o.get(),
// Otherwise, insert a new parse state and add it to the queue of
// parse states to populate.
Entry::Vacant(v) => {
let core = v.key().core();
let core_count = self.core_ids_by_core.len();
@ -116,6 +157,7 @@ impl<'a> ParseTableBuilder<'a> {
terminal_entries: HashMap::new(),
nonterminal_entries: HashMap::new(),
core_id,
is_non_terminal_extra,
});
self.parse_state_queue.push_back(ParseStateQueueEntry {
state_id,
@ -138,7 +180,12 @@ impl<'a> ParseTableBuilder<'a> {
let mut non_terminal_successors = BTreeMap::new();
let mut lookaheads_with_conflicts = TokenSet::new();
// Each item in the item set contributes to either or a Shift action or a Reduce
// action in this state.
for (item, lookaheads) in &item_set.entries {
// If the item is unfinished, then this state has a transition for the item's
// next symbol. Advance the item to its next step and insert the resulting
// item into the successor item set.
if let Some(next_symbol) = item.symbol() {
let successor = item.successor();
if next_symbol.is_non_terminal() {
@ -160,7 +207,10 @@ impl<'a> ParseTableBuilder<'a> {
.or_insert_with(|| ParseItemSet::default())
.insert(successor, lookaheads);
}
} else {
}
// If the item is finished, then add a Reduce action to this state based
// on this item.
else {
let action = if item.is_augmented() {
ParseAction::Accept
} else {
@ -179,6 +229,10 @@ impl<'a> ParseTableBuilder<'a> {
.terminal_entries
.entry(lookahead);
let entry = entry.or_insert_with(|| ParseTableEntry::new());
// While inserting Reduce actions, eagerly resolve conflicts related
// to precedence: avoid inserting lower-precedence reductions, and
// clear the action list when inserting higher-precedence reductions.
if entry.actions.is_empty() {
entry.actions.push(action);
} else if action.precedence() > entry.actions[0].precedence() {
@ -193,12 +247,16 @@ impl<'a> ParseTableBuilder<'a> {
}
}
// Having computed the the successor item sets for each symbol, add a new
// parse state for each of these item sets, and add a corresponding Shift
// action to this state.
for (symbol, next_item_set) in terminal_successors {
preceding_symbols.push(symbol);
let next_state_id = self.add_parse_state(
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
@ -226,13 +284,19 @@ impl<'a> ParseTableBuilder<'a> {
&preceding_symbols,
&preceding_auxiliary_symbols,
next_item_set,
self.parse_table.states[state_id].is_non_terminal_extra,
);
preceding_symbols.pop();
self.parse_table.states[state_id]
.nonterminal_entries
.insert(symbol, next_state_id);
.insert(symbol, GotoAction::Goto(next_state_id));
}
// For any symbol with multiple actions, perform conflict resolution.
// This will either
// * choose one action over the others using precedence or associativity
// * keep multiple actions if this conflict has been whitelisted in the grammar
// * fail, terminating the parser generation process
for symbol in lookaheads_with_conflicts.iter() {
self.handle_conflict(
&item_set,
@ -243,15 +307,50 @@ impl<'a> ParseTableBuilder<'a> {
)?;
}
// Finally, add actions for the grammar's `extra` symbols.
let state = &mut self.parse_table.states[state_id];
for extra_token in &self.syntax_grammar.extra_tokens {
state
.terminal_entries
.entry(*extra_token)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::ShiftExtra],
});
let is_non_terminal_extra = state.is_non_terminal_extra;
let is_end_of_non_terminal_extra =
is_non_terminal_extra && state.terminal_entries.len() == 1;
// Add actions for the start tokens of each non-terminal extra rule.
// These actions are added to every state except for the states that are
// alread within non-terminal extras. Non-terminal extras are not allowed
// to nest within each other.
if !is_non_terminal_extra {
for (terminal, state_id) in &self.non_terminal_extra_states {
state
.terminal_entries
.entry(*terminal)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::Shift {
state: *state_id,
is_repetition: false,
}],
});
}
}
// Add ShiftExtra actions for the terminal extra tokens. These actions
// are added to every state except for those at the ends of non-terminal
// extras.
if !is_end_of_non_terminal_extra {
for extra_token in &self.syntax_grammar.extra_symbols {
if extra_token.is_non_terminal() {
state
.nonterminal_entries
.insert(*extra_token, GotoAction::ShiftExtra);
} else {
state
.terminal_entries
.entry(*extra_token)
.or_insert(ParseTableEntry {
reusable: true,
actions: vec![ParseAction::ShiftExtra],
});
}
}
}
Ok(())
@ -362,8 +461,8 @@ impl<'a> ParseTableBuilder<'a> {
}
}
// If all reduce actions are left associative, remove the SHIFT action.
// If all reduce actions are right associative, remove the REDUCE actions.
// If all Reduce actions are left associative, remove the SHIFT action.
// If all Reduce actions are right associative, remove the REDUCE actions.
match (has_left, has_non, has_right) {
(true, false, false) => {
entry.actions.pop();
@ -744,7 +843,7 @@ fn populate_following_tokens(
}
}
}
for extra in &grammar.extra_tokens {
for extra in &grammar.extra_symbols {
if extra.is_terminal() {
for entry in result.iter_mut() {
entry.insert(*extra);
@ -774,6 +873,7 @@ pub(crate) fn build_parse_table<'a>(
lexical_grammar,
item_set_builder,
variable_info,
non_terminal_extra_states: Vec::new(),
state_ids_by_item_set: HashMap::new(),
core_ids_by_core: HashMap::new(),
parse_state_info_by_id: Vec::new(),

View file

@ -2,7 +2,9 @@ use super::token_conflicts::TokenConflictMap;
use crate::generate::dedup::split_state_id_groups;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use crate::generate::rules::{AliasMap, Symbol, TokenSet};
use crate::generate::tables::{ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry};
use crate::generate::tables::{
GotoAction, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
};
use log::info;
use std::collections::{HashMap, HashSet};
use std::mem;
@ -66,6 +68,7 @@ impl<'a> Minimizer<'a> {
..
} => {
if !self.simple_aliases.contains_key(&symbol)
&& !self.syntax_grammar.supertype_symbols.contains(&symbol)
&& !aliased_symbols.contains(&symbol)
&& self.syntax_grammar.variables[symbol.index].kind
!= VariableType::Named
@ -101,7 +104,10 @@ impl<'a> Minimizer<'a> {
state.update_referenced_states(|other_state_id, state| {
if let Some(symbol) = unit_reduction_symbols_by_state.get(&other_state_id) {
done = false;
state.nonterminal_entries[symbol]
match state.nonterminal_entries.get(symbol) {
Some(GotoAction::Goto(state_id)) => *state_id,
_ => other_state_id,
}
} else {
other_state_id
}
@ -194,6 +200,9 @@ impl<'a> Minimizer<'a> {
right_state: &ParseState,
group_ids_by_state_id: &Vec<ParseStateId>,
) -> bool {
if left_state.is_non_terminal_extra != right_state.is_non_terminal_extra {
return true;
}
for (token, left_entry) in &left_state.terminal_entries {
if let Some(right_entry) = right_state.terminal_entries.get(token) {
if self.entries_conflict(
@ -262,18 +271,24 @@ impl<'a> Minimizer<'a> {
for (symbol, s1) in &state1.nonterminal_entries {
if let Some(s2) = state2.nonterminal_entries.get(symbol) {
let group1 = group_ids_by_state_id[*s1];
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
state1.id,
state2.id,
self.symbol_name(symbol),
s1,
s2,
);
return true;
match (s1, s2) {
(GotoAction::ShiftExtra, GotoAction::ShiftExtra) => continue,
(GotoAction::Goto(s1), GotoAction::Goto(s2)) => {
let group1 = group_ids_by_state_id[*s1];
let group2 = group_ids_by_state_id[*s2];
if group1 != group2 {
info!(
"split states {} {} - successors for {} are split: {} {}",
state1.id,
state2.id,
self.symbol_name(symbol),
s1,
s2,
);
return true;
}
}
_ => return true,
}
}
}

View file

@ -271,6 +271,7 @@ fn identify_keywords(
cursor.reset(vec![variable.start_state]);
if all_chars_are_alphabetical(&cursor)
&& token_conflict_map.does_match_same_string(i, word_token.index)
&& !token_conflict_map.does_match_different_string(i, word_token.index)
{
info!(
"Keywords - add candidate {}",

View file

@ -1,9 +1,9 @@
use crate::generate::build_tables::item::{TokenSetDisplay};
use crate::generate::build_tables::item::TokenSetDisplay;
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor, NfaTransition};
use crate::generate::rules::TokenSet;
use std::collections::HashSet;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::fmt;
#[derive(Clone, Debug, Default, PartialEq, Eq)]
@ -13,6 +13,7 @@ struct TokenConflictStatus {
does_match_valid_continuation: bool,
does_match_separators: bool,
matches_same_string: bool,
matches_different_string: bool,
}
pub(crate) struct TokenConflictMap<'a> {
@ -25,6 +26,12 @@ pub(crate) struct TokenConflictMap<'a> {
}
impl<'a> TokenConflictMap<'a> {
/// Create a token conflict map based on a lexical grammar, which describes the structure
/// each token, and a `following_token` map, which indicates which tokens may be appear
/// immediately after each other token.
///
/// This analyzes the possible kinds of overlap between each pair of tokens and stores
/// them in a matrix.
pub fn new(grammar: &'a LexicalGrammar, following_tokens: Vec<TokenSet>) -> Self {
let mut cursor = NfaCursor::new(&grammar.nfa, Vec::new());
let starting_chars = get_starting_chars(&mut cursor, grammar);
@ -50,12 +57,21 @@ impl<'a> TokenConflictMap<'a> {
}
}
/// Does token `i` match any strings that token `j` also matches, such that token `i`
/// is preferred over token `j`?
pub fn has_same_conflict_status(&self, a: usize, b: usize, other: usize) -> bool {
let left = &self.status_matrix[matrix_index(self.n, a, other)];
let right = &self.status_matrix[matrix_index(self.n, b, other)];
left == right
}
/// Does token `i` match any strings that token `j` does *not* match?
pub fn does_match_different_string(&self, i: usize, j: usize) -> bool {
self.status_matrix[matrix_index(self.n, i, j)].matches_different_string
}
/// Does token `i` match any strings that token `j` also matches, where
/// token `i` is preferred over token `j`?
pub fn does_match_same_string(&self, i: usize, j: usize) -> bool {
self.status_matrix[matrix_index(self.n, i, j)].matches_same_string
}
@ -67,6 +83,7 @@ impl<'a> TokenConflictMap<'a> {
|| entry.matches_same_string
}
/// Does token `i` match any strings that are *prefixes* of strings matched by `j`?
pub fn does_match_prefix(&self, i: usize, j: usize) -> bool {
self.status_matrix[matrix_index(self.n, i, j)].matches_prefix
}
@ -239,19 +256,29 @@ fn compute_conflict_status(
);
while let Some(state_set) = state_set_queue.pop() {
// Don't pursue states where there's no potential for conflict.
if grammar.variable_indices_for_nfa_states(&state_set).count() > 1 {
cursor.reset(state_set);
} else {
let mut live_variable_indices = grammar.variable_indices_for_nfa_states(&state_set);
// If only one of the two tokens could possibly match from this state, then
// there is no reason to analyze any of its successors. Just record the fact
// that the token matches a string that the other token does not match.
let first_live_variable_index = live_variable_indices.next().unwrap();
if live_variable_indices.count() == 0 {
if first_live_variable_index == i {
result.0.matches_different_string = true;
} else {
result.1.matches_different_string = true;
}
continue;
}
let has_sep = cursor.transition_chars().any(|(_, sep)| sep);
// Don't pursue states where there's no potential for conflict.
cursor.reset(state_set);
let within_separator = cursor.transition_chars().any(|(_, sep)| sep);
// Examine each possible completed token in this state.
let mut completion = None;
for (id, precedence) in cursor.completions() {
if has_sep {
if within_separator {
if id == i {
result.0.does_match_separators = true;
} else {
@ -316,7 +343,7 @@ fn compute_conflict_status(
&transition,
completed_id,
completed_precedence,
has_sep,
within_separator,
) {
can_advance = true;
if advanced_id == i {

View file

@ -292,7 +292,12 @@ function grammar(baseGrammar, options) {
extras = options.extras
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
.map(normalize);
if (!Array.isArray(extras)) {
throw new Error("Grammar's 'extras' function must return an array.")
}
extras = extras.map(normalize);
}
let word = baseGrammar.word;

View file

@ -1,15 +1,15 @@
{
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "tree-sitter grammar specification",
"type": "object",
"required": [
"name",
"rules"
],
"required": ["name", "rules"],
"additionalProperties": false,
"properties": {
"name": {
"description": "the name of the grammar",
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
@ -60,6 +60,15 @@
"word": {
"type": "string",
"pattern": "^[a-zA-Z_]\\w*"
},
"supertypes": {
"description": "A list of hidden rule names that should be considered supertypes in the generated node types file. See http://tree-sitter.github.io/tree-sitter/using-parsers#static-node-types.",
"type": "array",
"items": {
"description": "the name of a rule in `rules` or `extras`",
"type": "string"
}
}
},
@ -96,20 +105,19 @@
"type": "string",
"pattern": "^PATTERN$"
},
"value": {"type": "string"}
"value": { "type": "string" }
},
"required": ["type", "value"]
},
"symbol-rule": {
"required": ["name"],
"type": "object",
"properties": {
"type": {
"type": "string",
"pattern": "^SYMBOL$"
},
"name": {"type": "string"}
"name": { "type": "string" }
},
"required": ["type", "name"]
},
@ -210,6 +218,20 @@
"required": ["type", "content"]
},
"field-rule": {
"properties": {
"name": { "type": "string" },
"type": {
"type": "string",
"pattern": "^FIELD$"
},
"content": {
"$ref": "#/definitions/rule"
}
},
"required": ["name", "type", "content"]
},
"prec-rule": {
"type": "object",
"properties": {
@ -239,6 +261,7 @@
{ "$ref": "#/definitions/repeat1-rule" },
{ "$ref": "#/definitions/repeat-rule" },
{ "$ref": "#/definitions/token-rule" },
{ "$ref": "#/definitions/field-rule" },
{ "$ref": "#/definitions/prec-rule" }
]
}

View file

@ -23,7 +23,7 @@ pub(crate) struct Variable {
pub(crate) struct InputGrammar {
pub name: String,
pub variables: Vec<Variable>,
pub extra_tokens: Vec<Rule>,
pub extra_symbols: Vec<Rule>,
pub expected_conflicts: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>,
@ -87,7 +87,7 @@ pub(crate) struct ExternalToken {
#[derive(Debug, Default)]
pub(crate) struct SyntaxGrammar {
pub variables: Vec<SyntaxVariable>,
pub extra_tokens: Vec<Symbol>,
pub extra_symbols: Vec<Symbol>,
pub expected_conflicts: Vec<Vec<Symbol>>,
pub external_tokens: Vec<ExternalToken>,
pub supertype_symbols: Vec<Symbol>,

View file

@ -6,13 +6,12 @@ mod node_types;
mod npm_files;
pub mod parse_grammar;
mod prepare_grammar;
pub mod properties;
mod render;
mod rules;
mod tables;
use self::build_tables::build_tables;
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType};
use self::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use self::parse_grammar::parse_grammar;
use self::prepare_grammar::prepare_grammar;
use self::render::render_c_code;
@ -20,9 +19,8 @@ use self::rules::AliasMap;
use crate::error::{Error, Result};
use lazy_static::lazy_static;
use regex::{Regex, RegexBuilder};
use std::collections::HashSet;
use std::fs::{self, File};
use std::io::{BufWriter, Write};
use std::fs;
use std::io::Write;
use std::path::{Path, PathBuf};
use std::process::{Command, Stdio};
@ -33,15 +31,9 @@ lazy_static! {
.unwrap();
}
const NEW_HEADER_PARTS: [&'static str; 2] = [
"
uint32_t large_state_count;
const uint16_t *small_parse_table;
const uint32_t *small_parse_table_map;",
"
#define SMALL_STATE(id) id - LARGE_STATE_COUNT
",
];
const NEW_HEADER_PARTS: &[&'static str] = &["
const uint16_t *alias_map;
uint32_t state_count;"];
struct GeneratedParser {
c_code: String,
@ -51,13 +43,11 @@ struct GeneratedParser {
pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
properties_only: bool,
next_abi: bool,
report_symbol_name: Option<&str>,
) -> Result<()> {
let src_path = repo_path.join("src");
let header_path = src_path.join("tree_sitter");
let properties_dir_path = repo_path.join("properties");
// Ensure that the output directories exist.
fs::create_dir_all(&src_path)?;
@ -82,71 +72,48 @@ pub fn generate_parser_in_directory(
prepare_grammar(&input_grammar)?;
let language_name = input_grammar.name;
// If run with no arguments, read all of the property sheets and compile them to JSON.
if grammar_path.is_none() {
let token_names = get_token_names(&syntax_grammar, &lexical_grammar);
if let Ok(entries) = fs::read_dir(properties_dir_path) {
for entry in entries {
let css_path = entry?.path();
let css = fs::read_to_string(&css_path)?;
let sheet = properties::generate_property_sheet(&css_path, &css, &token_names)?;
let property_sheet_json_path = src_path
.join(css_path.file_name().unwrap())
.with_extension("json");
let property_sheet_json_file =
File::create(&property_sheet_json_path).map_err(Error::wrap(|| {
format!("Failed to create {:?}", property_sheet_json_path)
}))?;
let mut writer = BufWriter::new(property_sheet_json_file);
serde_json::to_writer_pretty(&mut writer, &sheet)?;
}
}
}
// Generate the parser and related files.
if !properties_only {
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(
&language_name,
syntax_grammar,
lexical_grammar,
inlines,
simple_aliases,
next_abi,
report_symbol_name,
)?;
let GeneratedParser {
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(
&language_name,
syntax_grammar,
lexical_grammar,
inlines,
simple_aliases,
next_abi,
report_symbol_name,
)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
write_file(&src_path.join("parser.c"), c_code)?;
write_file(&src_path.join("node-types.json"), node_types_json)?;
if next_abi {
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
} else {
let mut header = tree_sitter::PARSER_HEADER.to_string();
if next_abi {
write_file(&header_path.join("parser.h"), tree_sitter::PARSER_HEADER)?;
} else {
let mut header = tree_sitter::PARSER_HEADER.to_string();
for part in &NEW_HEADER_PARTS {
let pos = header
.find(part)
.expect("Missing expected part of parser.h header");
header.replace_range(pos..(pos + part.len()), "");
}
write_file(&header_path.join("parser.h"), header)?;
for part in NEW_HEADER_PARTS.iter() {
let pos = header
.find(part)
.expect("Missing expected part of parser.h header");
header.replace_range(pos..(pos + part.len()), "");
}
ensure_file(&repo_path.join("index.js"), || {
npm_files::index_js(&language_name)
})?;
ensure_file(&src_path.join("binding.cc"), || {
npm_files::binding_cc(&language_name)
})?;
ensure_file(&repo_path.join("binding.gyp"), || {
npm_files::binding_gyp(&language_name)
})?;
write_file(&header_path.join("parser.h"), header)?;
}
ensure_file(&repo_path.join("index.js"), || {
npm_files::index_js(&language_name)
})?;
ensure_file(&src_path.join("binding.cc"), || {
npm_files::binding_cc(&language_name)
})?;
ensure_file(&repo_path.join("binding.gyp"), || {
npm_files::binding_gyp(&language_name)
})?;
Ok(())
}
@ -176,7 +143,8 @@ fn generate_parser_for_grammar_with_opts(
next_abi: bool,
report_symbol_name: Option<&str>,
) -> Result<GeneratedParser> {
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &inlines)?;
let variable_info =
node_types::get_variable_info(&syntax_grammar, &lexical_grammar, &simple_aliases)?;
let node_types_json = node_types::generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
@ -208,35 +176,6 @@ fn generate_parser_for_grammar_with_opts(
})
}
fn get_token_names(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> HashSet<String> {
let mut result = HashSet::new();
for variable in &lexical_grammar.variables {
if variable.kind == VariableType::Named {
result.insert(variable.name.clone());
}
}
for token in &syntax_grammar.external_tokens {
if token.kind == VariableType::Named {
result.insert(token.name.clone());
}
}
for variable in &syntax_grammar.variables {
for production in &variable.productions {
for step in &production.steps {
if let Some(alias) = &step.alias {
if !step.symbol.is_non_terminal() && alias.is_named {
result.insert(alias.value.clone());
}
}
}
}
}
result
}
fn load_grammar_file(grammar_path: &Path) -> Result<String> {
match grammar_path.extension().and_then(|e| e.to_str()) {
Some("js") => Ok(load_js_grammar_file(grammar_path)?),

View file

@ -1,8 +1,10 @@
use std::char;
use std::cmp::max;
use std::cmp::Ordering;
use std::collections::HashSet;
use std::fmt;
use std::mem::swap;
use std::ops::Range;
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub enum CharacterSet {
@ -178,6 +180,40 @@ impl CharacterSet {
}
}
pub fn ranges<'a>(
chars: &'a Vec<char>,
ruled_out_characters: &'a HashSet<u32>,
) -> impl Iterator<Item = Range<char>> + 'a {
let mut prev_range: Option<Range<char>> = None;
chars
.iter()
.map(|c| (*c, false))
.chain(Some(('\0', true)))
.filter_map(move |(c, done)| {
if done {
return prev_range.clone();
}
if ruled_out_characters.contains(&(c as u32)) {
return None;
}
if let Some(range) = prev_range.clone() {
let mut prev_range_successor = range.end as u32 + 1;
while prev_range_successor < c as u32 {
if !ruled_out_characters.contains(&prev_range_successor) {
prev_range = Some(c..c);
return Some(range);
}
prev_range_successor += 1;
}
prev_range = Some(range.start..c);
None
} else {
prev_range = Some(c..c);
None
}
})
}
#[cfg(test)]
pub fn contains(&self, c: char) -> bool {
match self {
@ -266,6 +302,13 @@ fn compare_chars(left: &Vec<char>, right: &Vec<char>) -> SetComparision {
result.common = true;
}
}
match (i, j) {
(Some(_), _) => result.left_only = true,
(_, Some(_)) => result.right_only = true,
_ => {}
}
result
}
@ -718,7 +761,7 @@ mod tests {
.add_range('d', 'e')
);
// A whitelist and an intersecting blacklist.
// An inclusion and an intersecting exclusion.
// Both sets contain 'e', 'f', and 'm'
let mut a = CharacterSet::empty()
.add_range('c', 'h')
@ -748,7 +791,7 @@ mod tests {
assert_eq!(a, CharacterSet::Include(vec!['c', 'd', 'g', 'h', 'k', 'l']));
assert_eq!(b, CharacterSet::empty().add_range('a', 'm').negate());
// A blacklist and an overlapping blacklist.
// An exclusion and an overlapping inclusion.
// Both sets exclude 'c', 'd', and 'e'
let mut a = CharacterSet::empty().add_range('a', 'e').negate();
let mut b = CharacterSet::empty().add_range('c', 'h').negate();
@ -759,7 +802,7 @@ mod tests {
assert_eq!(a, CharacterSet::Include(vec!['f', 'g', 'h']));
assert_eq!(b, CharacterSet::Include(vec!['a', 'b']));
// A blacklist and a larger blacklist.
// An exclusion and a larger exclusion.
let mut a = CharacterSet::empty().add_range('b', 'c').negate();
let mut b = CharacterSet::empty().add_range('a', 'd').negate();
assert_eq!(
@ -810,5 +853,53 @@ mod tests {
);
assert!(a.does_intersect(&b));
assert!(b.does_intersect(&a));
let (a, b) = (
CharacterSet::Include(vec!['c']),
CharacterSet::Exclude(vec!['a']),
);
assert!(a.does_intersect(&b));
assert!(b.does_intersect(&a));
}
#[test]
fn test_character_set_get_ranges() {
struct Row {
chars: Vec<char>,
ruled_out_chars: Vec<char>,
expected_ranges: Vec<Range<char>>,
}
let table = [
Row {
chars: vec!['a'],
ruled_out_chars: vec![],
expected_ranges: vec!['a'..'a'],
},
Row {
chars: vec!['a', 'b', 'c', 'e', 'z'],
ruled_out_chars: vec![],
expected_ranges: vec!['a'..'c', 'e'..'e', 'z'..'z'],
},
Row {
chars: vec!['a', 'b', 'c', 'e', 'h', 'z'],
ruled_out_chars: vec!['d', 'f', 'g'],
expected_ranges: vec!['a'..'h', 'z'..'z'],
},
];
for Row {
chars,
ruled_out_chars,
expected_ranges,
} in table.iter()
{
let ruled_out_chars = ruled_out_chars
.into_iter()
.map(|c: &char| *c as u32)
.collect();
let ranges = CharacterSet::ranges(chars, &ruled_out_chars).collect::<Vec<_>>();
assert_eq!(ranges, *expected_ranges);
}
}
}

File diff suppressed because it is too large Load diff

View file

@ -87,7 +87,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
})
}
let extra_tokens = grammar_json
let extra_symbols = grammar_json
.extras
.unwrap_or(Vec::new())
.into_iter()
@ -107,7 +107,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
name: grammar_json.name,
word_token: grammar_json.word,
variables,
extra_tokens,
extra_symbols,
expected_conflicts,
external_tokens,
supertype_symbols,

View file

@ -283,7 +283,7 @@ mod tests {
fn build_grammar(variables: Vec<Variable>) -> ExtractedSyntaxGrammar {
ExtractedSyntaxGrammar {
variables,
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),

View file

@ -0,0 +1,293 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
aliases: Vec<(Alias, usize)>,
appears_unaliased: bool,
}
// Update the grammar by finding symbols that always are aliased, and for each such symbol,
// promoting one of its aliases to a "default alias", which is applied globally instead
// of in a context-specific way.
//
// This has two benefits:
// * It reduces the overhead of storing production-specific alias info in the parse table.
// * Within an `ERROR` node, no context-specific aliases will be applied. This transformation
// ensures that the children of an `ERROR` node have symbols that are consistent with the
// way that they would appear in a valid syntax tree.
pub(super) fn extract_default_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
// For each grammar symbol, find all of the aliases under which the symbol appears,
// and determine whether or not the symbol ever appears *unaliased*.
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// Default aliases don't work for inlined variables.
if syntax_grammar.variables_to_inline.contains(&step.symbol) {
continue;
}
if let Some(alias) = &step.alias {
if let Some(count_for_alias) = status
.aliases
.iter_mut()
.find_map(|(a, count)| if a == alias { Some(count) } else { None })
{
*count_for_alias += 1;
} else {
status.aliases.push((alias.clone(), 1));
}
} else {
status.appears_unaliased = true;
}
}
}
}
let symbols_with_statuses = (terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::terminal(i), status)))
.chain(
non_terminal_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::non_terminal(i), status)),
)
.chain(
external_status_list
.iter_mut()
.enumerate()
.map(|(i, status)| (Symbol::external(i), status)),
);
// For each symbol that always appears aliased, find the alias the occurs most often,
// and designate that alias as the symbol's "default alias". Store all of these
// default aliases in a map that will be returned.
let mut result = AliasMap::new();
for (symbol, status) in symbols_with_statuses {
if status.appears_unaliased {
status.aliases.clear();
} else {
if let Some(default_entry) = status
.aliases
.iter()
.enumerate()
.max_by_key(|(i, (_, count))| (count, -(*i as i64)))
.map(|(_, entry)| entry.clone())
{
status.aliases.clear();
status.aliases.push(default_entry.clone());
result.insert(symbol, default_entry.0);
}
}
}
// Wherever a symbol is aliased as its default alias, remove the usage of the alias,
// because it will now be redundant.
let mut alias_positions_to_clear = Vec::new();
for variable in syntax_grammar.variables.iter_mut() {
alias_positions_to_clear.clear();
for (i, production) in variable.productions.iter().enumerate() {
for (j, step) in production.steps.iter().enumerate() {
let status = match step.symbol.kind {
SymbolType::External => &mut external_status_list[step.symbol.index],
SymbolType::NonTerminal => &mut non_terminal_status_list[step.symbol.index],
SymbolType::Terminal => &mut terminal_status_list[step.symbol.index],
SymbolType::End => panic!("Unexpected end token"),
};
// If this step is aliased as the symbol's default alias, then remove that alias.
if step.alias.is_some()
&& step.alias.as_ref() == status.aliases.get(0).map(|t| &t.0)
{
let mut other_productions_must_use_this_alias_at_this_index = false;
for (other_i, other_production) in variable.productions.iter().enumerate() {
if other_i != i
&& other_production.steps.len() > j
&& other_production.steps[j].alias == step.alias
&& result.get(&other_production.steps[j].symbol) != step.alias.as_ref()
{
other_productions_must_use_this_alias_at_this_index = true;
break;
}
}
if !other_productions_must_use_this_alias_at_this_index {
alias_positions_to_clear.push((i, j));
}
}
}
}
for (production_index, step_index) in &alias_positions_to_clear {
variable.productions[*production_index].steps[*step_index].alias = None;
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased within rule `v1` above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently here than in `v1`. The alias from
// `v1` should be promoted to the default alias, because `v1` appears
// first in the grammar.
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
// Token 3 is also aliased differently here than in `v1`. In this case,
// this alias should be promoted to the default alias, because it is
// used a greater number of times (twice).
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
ProductionStep::new(Symbol::terminal(3)).with_alias("a6", true),
],
}],
},
],
extra_symbols: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t0".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(default_aliases.len(), 3);
assert_eq!(
default_aliases.get(&Symbol::terminal(0)),
Some(&Alias {
value: "a1".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(2)),
Some(&Alias {
value: "a3".to_string(),
is_named: true,
})
);
assert_eq!(
default_aliases.get(&Symbol::terminal(3)),
Some(&Alias {
value: "a6".to_string(),
is_named: true,
})
);
assert_eq!(default_aliases.get(&Symbol::terminal(1)), None);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_alias("a4", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a5", true),
ProductionStep::new(Symbol::terminal(3)),
ProductionStep::new(Symbol::terminal(3)),
],
},],
},
]
);
}
}

View file

@ -1,223 +0,0 @@
use crate::generate::grammars::{LexicalGrammar, SyntaxGrammar};
use crate::generate::rules::{Alias, AliasMap, Symbol, SymbolType};
#[derive(Clone, Default)]
struct SymbolStatus {
alias: Option<Alias>,
conflicting: bool,
}
pub(super) fn extract_simple_aliases(
syntax_grammar: &mut SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> AliasMap {
// Determine which symbols in the grammars are *always* aliased to a single name.
let mut terminal_status_list = vec![SymbolStatus::default(); lexical_grammar.variables.len()];
let mut non_terminal_status_list =
vec![SymbolStatus::default(); syntax_grammar.variables.len()];
let mut external_status_list =
vec![SymbolStatus::default(); syntax_grammar.external_tokens.len()];
for variable in syntax_grammar.variables.iter() {
for production in variable.productions.iter() {
for step in production.steps.iter() {
let mut status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &mut external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &mut non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &mut terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if step.alias.is_none() {
status.alias = None;
status.conflicting = true;
}
if !status.conflicting {
if status.alias.is_none() {
status.alias = step.alias.clone();
} else if status.alias != step.alias {
status.alias = None;
status.conflicting = true;
}
}
}
}
}
// Remove the aliases for those symbols.
for variable in syntax_grammar.variables.iter_mut() {
for production in variable.productions.iter_mut() {
for step in production.steps.iter_mut() {
let status = match step.symbol {
Symbol {
kind: SymbolType::External,
index,
} => &external_status_list[index],
Symbol {
kind: SymbolType::NonTerminal,
index,
} => &non_terminal_status_list[index],
Symbol {
kind: SymbolType::Terminal,
index,
} => &terminal_status_list[index],
Symbol {
kind: SymbolType::End,
..
} => panic!("Unexpected end token"),
};
if status.alias.is_some() {
step.alias = None;
}
}
}
}
// Populate a map of the symbols to their aliases.
let mut result = AliasMap::new();
for (i, status) in terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::terminal(i), alias);
}
}
for (i, status) in non_terminal_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::non_terminal(i), alias);
}
}
for (i, status) in external_status_list.into_iter().enumerate() {
if let Some(alias) = status.alias {
result.insert(Symbol::external(i), alias);
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
LexicalVariable, Production, ProductionStep, SyntaxVariable, VariableType,
};
use crate::generate::nfa::Nfa;
#[test]
fn test_extract_simple_aliases() {
let mut syntax_grammar = SyntaxGrammar {
variables: vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
}],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// Token 0 is always aliased as "a1".
ProductionStep::new(Symbol::terminal(0)).with_alias("a1", true),
// Token 1 is aliased above, but not here.
ProductionStep::new(Symbol::terminal(1)),
// Token 2 is aliased differently than above.
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
}],
},
],
extra_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};
let lexical_grammar = LexicalGrammar {
nfa: Nfa::new(),
variables: vec![
LexicalVariable {
name: "t1".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t2".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
LexicalVariable {
name: "t3".to_string(),
kind: VariableType::Anonymous,
implicit_precedence: 0,
start_state: 0,
},
],
};
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
assert_eq!(simple_aliases.len(), 1);
assert_eq!(
simple_aliases[&Symbol::terminal(0)],
Alias {
value: "a1".to_string(),
is_named: true,
}
);
assert_eq!(
syntax_grammar.variables,
vec![
SyntaxVariable {
name: "v1".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
// 'Simple' alias removed
ProductionStep::new(Symbol::terminal(0)),
// Other aliases unchanged
ProductionStep::new(Symbol::terminal(1)).with_alias("a2", true),
ProductionStep::new(Symbol::terminal(2)).with_alias("a3", true),
],
},],
},
SyntaxVariable {
name: "v2".to_owned(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(1)),
ProductionStep::new(Symbol::terminal(2)).with_alias("a4", true),
],
},],
},
]
);
}
}

View file

@ -90,21 +90,13 @@ pub(super) fn extract_tokens(
.collect();
let mut separators = Vec::new();
let mut extra_tokens = Vec::new();
for rule in grammar.extra_tokens {
let mut extra_symbols = Vec::new();
for rule in grammar.extra_symbols {
if let Rule::Symbol(symbol) = rule {
let new_symbol = symbol_replacer.replace_symbol(symbol);
if new_symbol.is_non_terminal() {
return Error::err(format!(
"Non-token symbol '{}' cannot be used as an extra token",
&variables[new_symbol.index].name
));
} else {
extra_tokens.push(new_symbol);
}
extra_symbols.push(symbol_replacer.replace_symbol(symbol));
} else {
if let Some(index) = lexical_variables.iter().position(|v| v.rule == rule) {
extra_tokens.push(Symbol::terminal(index));
extra_symbols.push(Symbol::terminal(index));
} else {
separators.push(rule);
}
@ -158,7 +150,7 @@ pub(super) fn extract_tokens(
ExtractedSyntaxGrammar {
variables,
expected_conflicts,
extra_tokens,
extra_symbols,
variables_to_inline,
supertype_symbols,
external_tokens,
@ -415,15 +407,15 @@ mod test {
}
#[test]
fn test_extracting_extra_tokens() {
fn test_extracting_extra_symbols() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::string("x")),
Variable::named("comment", Rule::pattern("//.*")),
]);
grammar.extra_tokens = vec![Rule::string(" "), Rule::non_terminal(1)];
grammar.extra_symbols = vec![Rule::string(" "), Rule::non_terminal(1)];
let (syntax_grammar, lexical_grammar) = extract_tokens(grammar).unwrap();
assert_eq!(syntax_grammar.extra_tokens, vec![Symbol::terminal(1),]);
assert_eq!(syntax_grammar.extra_symbols, vec![Symbol::terminal(1),]);
assert_eq!(lexical_grammar.separators, vec![Rule::string(" "),]);
}
@ -472,28 +464,6 @@ mod test {
);
}
#[test]
fn test_error_on_non_terminal_symbol_extras() {
let mut grammar = build_grammar(vec![
Variable::named("rule_0", Rule::non_terminal(1)),
Variable::named("rule_1", Rule::non_terminal(2)),
Variable::named("rule_2", Rule::string("x")),
]);
grammar.extra_tokens = vec![Rule::non_terminal(1)];
match extract_tokens(grammar) {
Err(e) => {
assert_eq!(
e.message(),
"Non-token symbol 'rule_1' cannot be used as an extra token"
);
}
_ => {
panic!("Expected an error but got no error");
}
}
}
#[test]
fn test_error_on_external_with_same_name_as_non_terminal() {
let mut grammar = build_grammar(vec![
@ -522,7 +492,7 @@ mod test {
fn build_grammar(variables: Vec<Variable>) -> InternedGrammar {
InternedGrammar {
variables,
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),

View file

@ -199,7 +199,7 @@ unless they are used only as the grammar's start rule.
}
}
Ok(SyntaxGrammar {
extra_tokens: grammar.extra_tokens,
extra_symbols: grammar.extra_symbols,
expected_conflicts: grammar.expected_conflicts,
variables_to_inline: grammar.variables_to_inline,
external_tokens: grammar.external_tokens,

View file

@ -30,9 +30,9 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
external_tokens.push(Variable { name, kind, rule });
}
let mut extra_tokens = Vec::with_capacity(grammar.extra_tokens.len());
for extra_token in grammar.extra_tokens.iter() {
extra_tokens.push(interner.intern_rule(extra_token)?);
let mut extra_symbols = Vec::with_capacity(grammar.extra_symbols.len());
for extra_token in grammar.extra_symbols.iter() {
extra_symbols.push(interner.intern_rule(extra_token)?);
}
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
@ -73,10 +73,16 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
);
}
for (i, variable) in variables.iter_mut().enumerate() {
if supertype_symbols.contains(&Symbol::non_terminal(i)) {
variable.kind = VariableType::Hidden;
}
}
Ok(InternedGrammar {
variables,
external_tokens,
extra_tokens,
extra_symbols,
expected_conflicts,
variables_to_inline,
supertype_symbols,
@ -236,7 +242,7 @@ mod tests {
InputGrammar {
variables,
name: "the_language".to_string(),
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),

View file

@ -1,6 +1,6 @@
mod expand_repeats;
mod expand_tokens;
mod extract_simple_aliases;
mod extract_default_aliases;
mod extract_tokens;
mod flatten_grammar;
mod intern_symbols;
@ -8,7 +8,7 @@ mod process_inlines;
use self::expand_repeats::expand_repeats;
pub(crate) use self::expand_tokens::expand_tokens;
use self::extract_simple_aliases::extract_simple_aliases;
use self::extract_default_aliases::extract_default_aliases;
use self::extract_tokens::extract_tokens;
use self::flatten_grammar::flatten_grammar;
use self::intern_symbols::intern_symbols;
@ -21,7 +21,7 @@ use crate::generate::rules::{AliasMap, Rule, Symbol};
pub(crate) struct IntermediateGrammar<T, U> {
variables: Vec<Variable>,
extra_tokens: Vec<T>,
extra_symbols: Vec<T>,
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>,
@ -52,7 +52,7 @@ pub(crate) fn prepare_grammar(
let syntax_grammar = expand_repeats(syntax_grammar);
let mut syntax_grammar = flatten_grammar(syntax_grammar)?;
let lexical_grammar = expand_tokens(lexical_grammar)?;
let simple_aliases = extract_simple_aliases(&mut syntax_grammar, &lexical_grammar);
let default_aliases = extract_default_aliases(&mut syntax_grammar, &lexical_grammar);
let inlines = process_inlines(&syntax_grammar);
Ok((syntax_grammar, lexical_grammar, inlines, simple_aliases))
Ok((syntax_grammar, lexical_grammar, inlines, default_aliases))
}

View file

@ -127,6 +127,9 @@ impl InlinedProductionMapBuilder {
last_inserted_step.associativity = removed_step.associativity;
}
}
if p.dynamic_precedence.abs() > production.dynamic_precedence.abs() {
production.dynamic_precedence = p.dynamic_precedence;
}
production
}),
);
@ -196,7 +199,7 @@ mod tests {
fn test_basic_inlining() {
let grammar = SyntaxGrammar {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
@ -226,7 +229,7 @@ mod tests {
],
},
Production {
dynamic_precedence: 0,
dynamic_precedence: -2,
steps: vec![ProductionStep::new(Symbol::terminal(14))],
},
],
@ -258,7 +261,7 @@ mod tests {
],
},
Production {
dynamic_precedence: 0,
dynamic_precedence: -2,
steps: vec![
ProductionStep::new(Symbol::terminal(10)),
ProductionStep::new(Symbol::terminal(14)),
@ -327,7 +330,7 @@ mod tests {
Symbol::non_terminal(3),
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
@ -429,7 +432,7 @@ mod tests {
},
],
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
extra_symbols: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff

View file

@ -1,3 +1,4 @@
use super::grammars::VariableType;
use smallbitvec::SmallBitVec;
use std::collections::HashMap;
use std::iter::FromIterator;
@ -139,6 +140,16 @@ impl Rule {
}
}
impl Alias {
pub fn kind(&self) -> VariableType {
if self.is_named {
VariableType::Named
} else {
VariableType::Anonymous
}
}
}
#[cfg(test)]
impl Rule {
pub fn terminal(index: usize) -> Self {
@ -366,7 +377,7 @@ impl FromIterator<Symbol> for TokenSet {
fn add_metadata<T: FnOnce(&mut MetadataParams)>(input: Rule, f: T) -> Rule {
match input {
Rule::Metadata { rule, mut params } => {
Rule::Metadata { rule, mut params } if !params.is_token => {
f(&mut params);
Rule::Metadata { rule, params }
}

View file

@ -24,6 +24,12 @@ pub(crate) enum ParseAction {
},
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub(crate) enum GotoAction {
Goto(ParseStateId),
ShiftExtra,
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub(crate) struct ParseTableEntry {
pub actions: Vec<ParseAction>,
@ -34,10 +40,11 @@ pub(crate) struct ParseTableEntry {
pub(crate) struct ParseState {
pub id: ParseStateId,
pub terminal_entries: HashMap<Symbol, ParseTableEntry>,
pub nonterminal_entries: HashMap<Symbol, ParseStateId>,
pub nonterminal_entries: HashMap<Symbol, GotoAction>,
pub lex_state_id: usize,
pub external_lex_state_id: usize,
pub core_id: usize,
pub is_non_terminal_extra: bool,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
@ -70,6 +77,7 @@ pub(crate) struct AdvanceAction {
#[derive(Clone, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct LexState {
pub accept_action: Option<Symbol>,
pub eof_action: Option<AdvanceAction>,
pub advance_actions: Vec<(CharacterSet, AdvanceAction)>,
}
@ -103,7 +111,13 @@ impl ParseState {
_ => None,
})
})
.chain(self.nonterminal_entries.iter().map(|(_, state)| *state))
.chain(self.nonterminal_entries.iter().filter_map(|(_, action)| {
if let GotoAction::Goto(state) = action {
Some(*state)
} else {
None
}
}))
}
pub fn update_referenced_states<F>(&mut self, mut f: F)
@ -121,15 +135,18 @@ impl ParseState {
}
}
}
for (symbol, other_state) in &self.nonterminal_entries {
let result = f(*other_state, self);
if result != *other_state {
updates.push((*symbol, 0, result));
for (symbol, action) in &self.nonterminal_entries {
if let GotoAction::Goto(other_state) = action {
let result = f(*other_state, self);
if result != *other_state {
updates.push((*symbol, 0, result));
}
}
}
for (symbol, action_index, new_state) in updates {
if symbol.is_non_terminal() {
self.nonterminal_entries.insert(symbol, new_state);
self.nonterminal_entries
.insert(symbol, GotoAction::Goto(new_state));
} else {
let entry = self.terminal_entries.get_mut(&symbol).unwrap();
if let ParseAction::Shift { is_repetition, .. } = entry.actions[action_index] {

View file

@ -1,26 +1,58 @@
use super::util;
use crate::error::Result;
use crate::loader::Loader;
use ansi_term::{Color, Style};
use ansi_term::Color;
use lazy_static::lazy_static;
use serde::ser::SerializeMap;
use serde::{Deserialize, Deserializer, Serialize, Serializer};
use serde_json::{json, Value};
use std::collections::HashMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fmt, fs, io, path, thread};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{highlight, highlight_html, Highlight, HighlightEvent, Properties};
use std::{fs, io, path, str, usize};
use tree_sitter_highlight::{HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer};
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
lazy_static! {
static ref CSS_STYLES_BY_COLOR_ID: Vec<String> =
serde_json::from_str(include_str!("../vendor/xterm-colors.json")).unwrap();
}
#[derive(Debug, Default)]
pub struct Style {
pub ansi: ansi_term::Style,
pub css: Option<String>,
}
#[derive(Debug)]
pub struct Theme {
ansi_styles: Vec<Option<Style>>,
css_styles: Vec<Option<String>>,
pub styles: Vec<Style>,
pub highlight_names: Vec<String>,
}
impl Theme {
@ -29,14 +61,8 @@ impl Theme {
Ok(serde_json::from_str(&json).unwrap_or_default())
}
fn ansi_style(&self, highlight: Highlight) -> Option<&Style> {
self.ansi_styles[highlight as usize].as_ref()
}
fn css_style(&self, highlight: Highlight) -> Option<&str> {
self.css_styles[highlight as usize]
.as_ref()
.map(|s| s.as_str())
pub fn default_style(&self) -> Style {
Style::default()
}
}
@ -45,20 +71,21 @@ impl<'de> Deserialize<'de> for Theme {
where
D: Deserializer<'de>,
{
let highlight_count = Highlight::Unknown as usize + 1;
let mut ansi_styles = vec![None; highlight_count];
let mut css_styles = vec![None; highlight_count];
if let Ok(colors) = HashMap::<Highlight, Value>::deserialize(deserializer) {
for (highlight, style_value) in colors {
let mut styles = Vec::new();
let mut highlight_names = Vec::new();
if let Ok(colors) = HashMap::<String, Value>::deserialize(deserializer) {
highlight_names.reserve(colors.len());
styles.reserve(colors.len());
for (name, style_value) in colors {
let mut style = Style::default();
parse_style(&mut style, style_value);
ansi_styles[highlight as usize] = Some(style);
css_styles[highlight as usize] = Some(style_to_css(style));
highlight_names.push(name);
styles.push(style);
}
}
Ok(Self {
ansi_styles,
css_styles,
styles,
highlight_names,
})
}
}
@ -68,48 +95,40 @@ impl Serialize for Theme {
where
S: Serializer,
{
let entry_count = self.ansi_styles.iter().filter(|i| i.is_some()).count();
let mut map = serializer.serialize_map(Some(entry_count))?;
for (i, style) in self.ansi_styles.iter().enumerate() {
let highlight = Highlight::from_usize(i).unwrap();
if highlight == Highlight::Unknown {
break;
}
if let Some(style) = style {
let color = style.foreground.map(|color| match color {
Color::Black => json!("black"),
Color::Blue => json!("blue"),
Color::Cyan => json!("cyan"),
Color::Green => json!("green"),
Color::Purple => json!("purple"),
Color::Red => json!("red"),
Color::White => json!("white"),
Color::Yellow => json!("yellow"),
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
Color::Fixed(n) => json!(n),
});
if style.is_bold || style.is_italic || style.is_underline {
let mut entry = HashMap::new();
if let Some(color) = color {
entry.insert("color", color);
}
if style.is_bold {
entry.insert("bold", Value::Bool(true));
}
if style.is_italic {
entry.insert("italic", Value::Bool(true));
}
if style.is_underline {
entry.insert("underline", Value::Bool(true));
}
map.serialize_entry(&highlight, &entry)?;
} else if let Some(color) = color {
map.serialize_entry(&highlight, &color)?;
} else {
map.serialize_entry(&highlight, &Value::Null)?;
let mut map = serializer.serialize_map(Some(self.styles.len()))?;
for (name, style) in self.highlight_names.iter().zip(&self.styles) {
let style = &style.ansi;
let color = style.foreground.map(|color| match color {
Color::Black => json!("black"),
Color::Blue => json!("blue"),
Color::Cyan => json!("cyan"),
Color::Green => json!("green"),
Color::Purple => json!("purple"),
Color::Red => json!("red"),
Color::White => json!("white"),
Color::Yellow => json!("yellow"),
Color::RGB(r, g, b) => json!(format!("#{:x?}{:x?}{:x?}", r, g, b)),
Color::Fixed(n) => json!(n),
});
if style.is_bold || style.is_italic || style.is_underline {
let mut style_json = HashMap::new();
if let Some(color) = color {
style_json.insert("color", color);
}
if style.is_bold {
style_json.insert("bold", Value::Bool(true));
}
if style.is_italic {
style_json.insert("italic", Value::Bool(true));
}
if style.is_underline {
style_json.insert("underline", Value::Bool(true));
}
map.serialize_entry(&name, &style_json)?;
} else if let Some(color) = color {
map.serialize_entry(&name, &color)?;
} else {
map.serialize_entry(&highlight, &Value::Null)?;
map.serialize_entry(&name, &Value::Null)?;
}
}
map.end()
@ -149,42 +168,39 @@ impl Default for Theme {
}
}
impl fmt::Debug for Theme {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{{")?;
let mut first = true;
for (i, style) in self.ansi_styles.iter().enumerate() {
if let Some(style) = style {
let highlight = Highlight::from_usize(i).unwrap();
if !first {
write!(f, ", ")?;
}
write!(f, "{:?}: {:?}", highlight, style)?;
first = false;
}
}
write!(f, "}}")?;
Ok(())
}
}
fn parse_style(style: &mut Style, json: Value) {
if let Value::Object(entries) = json {
for (property_name, value) in entries {
match property_name.as_str() {
"bold" => *style = style.bold(),
"italic" => *style = style.italic(),
"underline" => *style = style.underline(),
"bold" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.bold()
}
}
"italic" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.italic()
}
}
"underline" => {
if value == Value::Bool(true) {
style.ansi = style.ansi.underline()
}
}
"color" => {
if let Some(color) = parse_color(value) {
*style = style.fg(color);
style.ansi = style.ansi.fg(color);
}
}
_ => {}
}
}
style.css = Some(style_to_css(style.ansi));
} else if let Some(color) = parse_color(json) {
*style = style.fg(color);
style.ansi = style.ansi.fg(color);
style.css = Some(style_to_css(style.ansi));
} else {
style.css = None;
}
}
@ -223,9 +239,12 @@ fn parse_color(json: Value) -> Option<Color> {
}
}
fn style_to_css(style: Style) -> String {
fn style_to_css(style: ansi_term::Style) -> String {
use std::fmt::Write;
let mut result = "style='".to_string();
if style.is_underline {
write!(&mut result, "text-decoration: underline;").unwrap();
}
if style.is_bold {
write!(&mut result, "font-weight: bold;").unwrap();
}
@ -254,163 +273,95 @@ fn color_to_css(color: Color) -> &'static str {
}
}
fn cancel_on_stdin() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
result
}
pub fn ansi(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
config: &HighlightConfiguration,
print_time: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
let cancellation_flag = cancel_on_stdin();
let time = Instant::now();
let mut highlight_stack = Vec::new();
for event in highlight(
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
)
.map_err(|e| e.to_string())?
{
let event = event.map_err(|e| e.to_string())?;
match event {
HighlightEvent::Source { start, end } => {
if let Some(style) = highlight_stack.last().and_then(|s| theme.ansi_style(*s)) {
style.paint(&source[start..end]).write_to(&mut stdout)?;
} else {
stdout.write_all(&source[start..end])?;
}
}
HighlightEvent::HighlightStart(h) => {
highlight_stack.push(h);
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(config, source, cancellation_flag, |string| {
loader.highlight_config_for_injection_string(string)
})?;
let mut style_stack = vec![theme.default_style().ansi];
for event in events {
match event? {
HighlightEvent::HighlightStart(highlight) => {
style_stack.push(theme.styles[highlight.0].ansi);
}
HighlightEvent::HighlightEnd => {
highlight_stack.pop();
style_stack.pop();
}
HighlightEvent::Source { start, end } => {
style_stack
.last()
.unwrap()
.paint(&source[start..end])
.write_to(&mut stdout)?;
}
}
}
if print_time {
let duration = time.elapsed();
let duration_ms = duration.as_secs() * 1000 + duration.subsec_nanos() as u64 / 1000000;
eprintln!("{} ms", duration_ms);
eprintln!("Time: {}ms", time.elapsed().as_millis());
}
Ok(())
}
pub const HTML_HEADER: &'static str = "
<!doctype HTML>
<head>
<title>Tree-sitter Highlighting</title>
<style>
body {
font-family: monospace
}
.line-number {
user-select: none;
text-align: right;
color: rgba(27,31,35,.3);
padding: 0 10px;
}
.line {
white-space: pre;
}
</style>
</head>
<body>
";
pub const HTML_FOOTER: &'static str = "
</body>
";
pub fn html(
loader: &Loader,
theme: &Theme,
source: &[u8],
language: Language,
property_sheet: &PropertySheet<Properties>,
config: &HighlightConfiguration,
quiet: bool,
print_time: bool,
) -> Result<()> {
use std::io::Write;
let stdout = io::stdout();
let mut stdout = stdout.lock();
write!(&mut stdout, "<table>\n")?;
let time = Instant::now();
let cancellation_flag = util::cancel_on_stdin();
let mut highlighter = Highlighter::new();
let cancellation_flag = cancel_on_stdin();
let lines = highlight_html(
source,
language,
property_sheet,
Some(cancellation_flag.as_ref()),
|s| language_for_injection_string(loader, s),
|highlight| {
if let Some(css_style) = theme.css_style(highlight) {
css_style
} else {
""
}
},
)
.map_err(|e| e.to_string())?;
for (i, line) in lines.into_iter().enumerate() {
write!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
i + 1,
line
)?;
let events = highlighter.highlight(config, source, Some(&cancellation_flag), |string| {
loader.highlight_config_for_injection_string(string)
})?;
let mut renderer = HtmlRenderer::new();
renderer.render(events, source, &move |highlight| {
if let Some(css_style) = &theme.styles[highlight.0].css {
css_style.as_bytes()
} else {
"".as_bytes()
}
})?;
if !quiet {
write!(&mut stdout, "<table>\n")?;
for (i, line) in renderer.lines().enumerate() {
write!(
&mut stdout,
"<tr><td class=line-number>{}</td><td class=line>{}</td></tr>\n",
i + 1,
line
)?;
}
write!(&mut stdout, "</table>\n")?;
}
write!(&mut stdout, "</table>\n")?;
if print_time {
eprintln!("Time: {}ms", time.elapsed().as_millis());
}
Ok(())
}
fn language_for_injection_string<'a>(
loader: &'a Loader,
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
match loader.language_configuration_for_injection_string(string) {
Err(e) => {
eprintln!(
"Failed to load language for injection string '{}': {}",
string,
e.message()
);
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => {
match configuration.highlight_property_sheet(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string,
e.message()
);
None
}
Ok(None) => None,
Ok(Some(sheet)) => Some((language, sheet)),
}
}
}
}

View file

@ -5,7 +5,11 @@ pub mod highlight;
pub mod loader;
pub mod logger;
pub mod parse;
pub mod query;
pub mod query_testing;
pub mod tags;
pub mod test;
pub mod test_highlight;
pub mod util;
pub mod wasm;
pub mod web_ui;

View file

@ -5,12 +5,15 @@ use regex::{Regex, RegexBuilder};
use serde_derive::Deserialize;
use std::collections::HashMap;
use std::io::BufReader;
use std::ops::Range;
use std::path::{Path, PathBuf};
use std::process::Command;
use std::sync::Mutex;
use std::time::SystemTime;
use std::{fs, mem};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
use tree_sitter::{Language, QueryError};
use tree_sitter_highlight::HighlightConfiguration;
use tree_sitter_tags::{Error as TagsError, TagsConfiguration};
#[cfg(unix)]
const DYLIB_EXTENSION: &'static str = "so";
@ -20,23 +23,31 @@ const DYLIB_EXTENSION: &'static str = "dll";
const BUILD_TARGET: &'static str = env!("BUILD_TARGET");
#[derive(Default)]
pub struct LanguageConfiguration {
pub struct LanguageConfiguration<'a> {
pub scope: Option<String>,
pub content_regex: Option<Regex>,
pub _first_line_regex: Option<Regex>,
pub injection_regex: Option<Regex>,
pub file_types: Vec<String>,
pub highlight_property_sheet_path: Option<PathBuf>,
pub root_path: PathBuf,
pub highlights_filenames: Option<Vec<String>>,
pub injections_filenames: Option<Vec<String>>,
pub locals_filenames: Option<Vec<String>>,
pub tags_filenames: Option<Vec<String>>,
language_id: usize,
highlight_property_sheet: OnceCell<Option<PropertySheet<Properties>>>,
highlight_config: OnceCell<Option<HighlightConfiguration>>,
tags_config: OnceCell<Option<TagsConfiguration>>,
highlight_names: &'a Mutex<Vec<String>>,
use_all_highlight_names: bool,
}
pub struct Loader {
parser_lib_path: PathBuf,
languages_by_id: Vec<(PathBuf, OnceCell<Language>)>,
language_configurations: Vec<LanguageConfiguration>,
language_configurations: Vec<LanguageConfiguration<'static>>,
language_configuration_ids_by_file_type: HashMap<String, Vec<usize>>,
highlight_names: Box<Mutex<Vec<String>>>,
use_all_highlight_names: bool,
}
unsafe impl Send for Loader {}
@ -49,9 +60,22 @@ impl Loader {
languages_by_id: Vec::new(),
language_configurations: Vec::new(),
language_configuration_ids_by_file_type: HashMap::new(),
highlight_names: Box::new(Mutex::new(Vec::new())),
use_all_highlight_names: true,
}
}
pub fn configure_highlights(&mut self, names: &Vec<String>) {
self.use_all_highlight_names = false;
let mut highlights = self.highlight_names.lock().unwrap();
highlights.clear();
highlights.extend(names.iter().cloned());
}
pub fn highlight_names(&self) -> Vec<String> {
self.highlight_names.lock().unwrap().clone()
}
pub fn find_all_languages(&mut self, parser_src_paths: &Vec<PathBuf>) -> Result<()> {
for parser_container_dir in parser_src_paths.iter() {
if let Ok(entries) = fs::read_dir(parser_container_dir) {
@ -134,11 +158,12 @@ impl Loader {
if configuration_ids.len() == 1 {
configuration = &self.language_configurations[configuration_ids[0]];
}
// If multiple language configurations match, then determine which
// one to use by applying the configurations' content regexes.
else {
let file_contents = fs::read_to_string(path)?;
let file_contents = fs::read(path)
.map_err(Error::wrap(|| format!("Failed to read path {:?}", path)))?;
let file_contents = String::from_utf8_lossy(&file_contents);
let mut best_score = -2isize;
let mut best_configuration_id = None;
for configuration_id in configuration_ids {
@ -151,7 +176,6 @@ impl Loader {
if let Some(mat) = content_regex.find(&file_contents) {
score = (mat.end() - mat.start()) as isize;
}
// If the content regex does not match, then *penalize* this
// language configuration, so that language configurations
// without content regexes are preferred over those with
@ -338,10 +362,63 @@ impl Loader {
Ok(language)
}
fn find_language_configurations_at_path<'a>(
pub fn highlight_config_for_injection_string<'a>(
&'a self,
string: &str,
) -> Option<&'a HighlightConfiguration> {
match self.language_configuration_for_injection_string(string) {
Err(e) => {
eprintln!(
"Failed to load language for injection string '{}': {}",
string,
e.message()
);
None
}
Ok(None) => None,
Ok(Some((language, configuration))) => match configuration.highlight_config(language) {
Err(e) => {
eprintln!(
"Failed to load property sheet for injection string '{}': {}",
string,
e.message()
);
None
}
Ok(None) => None,
Ok(Some(config)) => Some(config),
},
}
}
pub fn find_language_configurations_at_path<'a>(
&'a mut self,
parser_path: &Path,
) -> Result<&[LanguageConfiguration]> {
#[derive(Deserialize)]
#[serde(untagged)]
enum PathsJSON {
Empty,
Single(String),
Multiple(Vec<String>),
}
impl Default for PathsJSON {
fn default() -> Self {
PathsJSON::Empty
}
}
impl PathsJSON {
fn into_vec(self) -> Option<Vec<String>> {
match self {
PathsJSON::Empty => None,
PathsJSON::Single(s) => Some(vec![s]),
PathsJSON::Multiple(s) => Some(s),
}
}
}
#[derive(Deserialize)]
struct LanguageConfigurationJSON {
#[serde(default)]
@ -355,7 +432,14 @@ impl Loader {
first_line_regex: Option<String>,
#[serde(rename = "injection-regex")]
injection_regex: Option<String>,
highlights: Option<String>,
#[serde(default)]
highlights: PathsJSON,
#[serde(default)]
injections: PathsJSON,
#[serde(default)]
locals: PathsJSON,
#[serde(default)]
tags: PathsJSON,
}
#[derive(Deserialize)]
@ -394,22 +478,21 @@ impl Loader {
});
let configuration = LanguageConfiguration {
root_path: parser_path.to_path_buf(),
scope: config_json.scope,
language_id,
file_types: config_json.file_types.unwrap_or(Vec::new()),
content_regex: config_json
.content_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
_first_line_regex: config_json
.first_line_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
injection_regex: config_json
.injection_regex
.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok()),
highlight_property_sheet_path: config_json
.highlights
.map(|h| parser_path.join(h)),
highlight_property_sheet: OnceCell::new(),
content_regex: Self::regex(config_json.content_regex),
_first_line_regex: Self::regex(config_json.first_line_regex),
injection_regex: Self::regex(config_json.injection_regex),
injections_filenames: config_json.injections.into_vec(),
locals_filenames: config_json.locals.into_vec(),
tags_filenames: config_json.tags.into_vec(),
highlights_filenames: config_json.highlights.into_vec(),
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
for file_type in &configuration.file_types {
@ -419,7 +502,8 @@ impl Loader {
.push(self.language_configurations.len());
}
self.language_configurations.push(configuration);
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
}
}
}
@ -427,52 +511,184 @@ impl Loader {
if self.language_configurations.len() == initial_language_configuration_count
&& parser_path.join("src").join("grammar.json").exists()
{
self.language_configurations.push(LanguageConfiguration {
let configuration = LanguageConfiguration {
root_path: parser_path.to_owned(),
language_id: self.languages_by_id.len(),
file_types: Vec::new(),
scope: None,
content_regex: None,
injection_regex: None,
file_types: Vec::new(),
_first_line_regex: None,
highlight_property_sheet_path: None,
highlight_property_sheet: OnceCell::new(),
});
injection_regex: None,
injections_filenames: None,
locals_filenames: None,
highlights_filenames: None,
tags_filenames: None,
highlight_config: OnceCell::new(),
tags_config: OnceCell::new(),
highlight_names: &*self.highlight_names,
use_all_highlight_names: self.use_all_highlight_names,
};
self.language_configurations
.push(unsafe { mem::transmute(configuration) });
self.languages_by_id
.push((parser_path.to_owned(), OnceCell::new()));
}
Ok(&self.language_configurations[initial_language_configuration_count..])
}
fn regex(pattern: Option<String>) -> Option<Regex> {
pattern.and_then(|r| RegexBuilder::new(&r).multi_line(true).build().ok())
}
}
impl LanguageConfiguration {
pub fn highlight_property_sheet(
&self,
language: Language,
) -> Result<Option<&PropertySheet<Properties>>> {
self.highlight_property_sheet
impl<'a> LanguageConfiguration<'a> {
pub fn highlight_config(&self, language: Language) -> Result<Option<&HighlightConfiguration>> {
return self
.highlight_config
.get_or_try_init(|| {
if let Some(path) = &self.highlight_property_sheet_path {
let sheet_json = fs::read_to_string(path).map_err(Error::wrap(|| {
format!(
"Failed to read property sheet {:?}",
path.file_name().unwrap()
)
}))?;
let sheet =
load_property_sheet(language, &sheet_json).map_err(Error::wrap(|| {
format!(
"Failed to parse property sheet {:?}",
path.file_name().unwrap()
)
}))?;
Ok(Some(sheet))
} else {
let (highlights_query, highlight_ranges) =
self.read_queries(&self.highlights_filenames, "highlights.scm")?;
let (injections_query, injection_ranges) =
self.read_queries(&self.injections_filenames, "injections.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
if highlights_query.is_empty() {
Ok(None)
} else {
let mut result = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.map_err(|error| {
if error.offset < injections_query.len() {
Self::include_path_in_query_error(
error,
&injection_ranges,
&injections_query,
0,
)
} else if error.offset < injections_query.len() + locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
injections_query.len(),
)
} else {
Self::include_path_in_query_error(
error,
&highlight_ranges,
&highlights_query,
injections_query.len() + locals_query.len(),
)
}
})?;
let mut all_highlight_names = self.highlight_names.lock().unwrap();
if self.use_all_highlight_names {
for capture_name in result.query.capture_names() {
if !all_highlight_names.contains(capture_name) {
all_highlight_names.push(capture_name.clone());
}
}
}
result.configure(&all_highlight_names);
Ok(Some(result))
}
})
.map(Option::as_ref);
}
pub fn tags_config(&self, language: Language) -> Result<Option<&TagsConfiguration>> {
self.tags_config
.get_or_try_init(|| {
let (tags_query, tags_ranges) =
self.read_queries(&self.tags_filenames, "tags.scm")?;
let (locals_query, locals_ranges) =
self.read_queries(&self.locals_filenames, "locals.scm")?;
if tags_query.is_empty() {
Ok(None)
} else {
TagsConfiguration::new(language, &tags_query, &locals_query)
.map(Some)
.map_err(|error| {
if let TagsError::Query(error) = error {
if error.offset < locals_query.len() {
Self::include_path_in_query_error(
error,
&locals_ranges,
&locals_query,
0,
)
} else {
Self::include_path_in_query_error(
error,
&tags_ranges,
&tags_query,
locals_query.len(),
)
}
.into()
} else {
error.into()
}
})
}
})
.map(Option::as_ref)
}
fn include_path_in_query_error<'b>(
mut error: QueryError,
ranges: &'b Vec<(String, Range<usize>)>,
source: &str,
start_offset: usize,
) -> (&'b str, QueryError) {
let offset_within_section = error.offset - start_offset;
let (path, range) = ranges
.iter()
.find(|(_, range)| range.contains(&offset_within_section))
.unwrap();
error.offset = offset_within_section - range.start;
error.row = source[range.start..offset_within_section]
.chars()
.filter(|c| *c == '\n')
.count();
(path.as_ref(), error)
}
fn read_queries(
&self,
paths: &Option<Vec<String>>,
default_path: &str,
) -> Result<(String, Vec<(String, Range<usize>)>)> {
let mut query = String::new();
let mut path_ranges = Vec::new();
if let Some(paths) = paths.as_ref() {
for path in paths {
let abs_path = self.root_path.join(path);
let prev_query_len = query.len();
query += &fs::read_to_string(&abs_path).map_err(Error::wrap(|| {
format!("Failed to read query file {:?}", path)
}))?;
path_ranges.push((path.clone(), prev_query_len..query.len()));
}
} else {
let queries_path = self.root_path.join("queries");
let path = queries_path.join(default_path);
if path.exists() {
query = fs::read_to_string(&path).map_err(Error::wrap(|| {
format!("Failed to read query file {:?}", path)
}))?;
path_ranges.push((default_path.to_string(), 0..query.len()));
}
}
Ok((query, path_ranges))
}
}
fn needs_recompile(

View file

@ -1,10 +1,13 @@
use clap::{App, AppSettings, Arg, SubCommand};
use error::Error;
use glob::glob;
use std::path::Path;
use std::process::exit;
use std::{env, fs, u64};
use tree_sitter::Language;
use tree_sitter_cli::{
config, error, generate, highlight, loader, logger, parse, test, wasm, web_ui,
config, error, generate, highlight, loader, logger, parse, query, tags, test, test_highlight,
util, wasm, web_ui,
};
const BUILD_VERSION: &'static str = env!("CARGO_PKG_VERSION");
@ -38,8 +41,7 @@ fn run() -> error::Result<()> {
.about("Generate a parser")
.arg(Arg::with_name("grammar-path").index(1))
.arg(Arg::with_name("log").long("log"))
.arg(Arg::with_name("next-abi").long("next-abi"))
.arg(Arg::with_name("properties-only").long("properties"))
.arg(Arg::with_name("prev-abi").long("prev-abi"))
.arg(
Arg::with_name("report-states-for-rule")
.long("report-states-for-rule")
@ -50,19 +52,20 @@ fn run() -> error::Result<()> {
)
.subcommand(
SubCommand::with_name("parse")
.about("Parse a file")
.about("Parse files")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("path")
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(true),
.required(false),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("debug").long("debug").short("d"))
.arg(Arg::with_name("debug-graph").long("debug-graph").short("D"))
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("stat").long("stat").short("s"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("allow-cancellation").long("cancel"))
.arg(Arg::with_name("timeout").long("timeout").takes_value(true))
.arg(
Arg::with_name("edits")
@ -73,6 +76,40 @@ fn run() -> error::Result<()> {
.number_of_values(1),
),
)
.subcommand(
SubCommand::with_name("query")
.about("Search files using a syntax tree query")
.arg(Arg::with_name("query-path").index(1).required(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.index(2)
.multiple(true)
.required(false),
)
.arg(
Arg::with_name("byte-range")
.help("The range of byte offsets in which the query will be executed")
.long("byte-range")
.takes_value(true),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("captures").long("captures").short("c"))
.arg(Arg::with_name("test").long("test")),
)
.subcommand(
SubCommand::with_name("tags")
.arg(Arg::with_name("quiet").long("quiet").short("q"))
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("paths")
.help("The source file to use")
.index(1)
.multiple(true),
),
)
.subcommand(
SubCommand::with_name("test")
.about("Run a parser's tests")
@ -89,15 +126,17 @@ fn run() -> error::Result<()> {
.subcommand(
SubCommand::with_name("highlight")
.about("Highlight a file")
.arg(Arg::with_name("paths-file").long("paths").takes_value(true))
.arg(
Arg::with_name("path")
Arg::with_name("paths")
.index(1)
.multiple(true)
.required(true),
.required(false),
)
.arg(Arg::with_name("scope").long("scope").takes_value(true))
.arg(Arg::with_name("html").long("html").short("h"))
.arg(Arg::with_name("time").long("time").short("t")),
.arg(Arg::with_name("time").long("time").short("t"))
.arg(Arg::with_name("quiet").long("quiet").short("q")),
)
.subcommand(
SubCommand::with_name("build-wasm")
@ -110,7 +149,14 @@ fn run() -> error::Result<()> {
.arg(Arg::with_name("path").index(1).multiple(true)),
)
.subcommand(
SubCommand::with_name("web-ui").about("Test a parser interactively in the browser"),
SubCommand::with_name("web-ui")
.about("Test a parser interactively in the browser")
.arg(
Arg::with_name("quiet")
.long("quiet")
.short("q")
.help("open in default browser"),
),
)
.subcommand(
SubCommand::with_name("dump-languages")
@ -128,7 +174,6 @@ fn run() -> error::Result<()> {
config.save(&home_dir)?;
} else if let Some(matches) = matches.subcommand_matches("generate") {
let grammar_path = matches.value_of("grammar-path");
let properties_only = matches.is_present("properties-only");
let report_symbol_name = matches.value_of("report-states-for-rule").or_else(|| {
if matches.is_present("report-states") {
Some("")
@ -139,24 +184,40 @@ fn run() -> error::Result<()> {
if matches.is_present("log") {
logger::init();
}
let next_abi = matches.is_present("next-abi");
let prev_abi = matches.is_present("prev-abi");
generate::generate_parser_in_directory(
&current_dir,
grammar_path,
properties_only,
next_abi,
!prev_abi,
report_symbol_name,
)?;
} else if let Some(matches) = matches.subcommand_matches("test") {
let debug = matches.is_present("debug");
let debug_graph = matches.is_present("debug-graph");
let filter = matches.value_of("filter");
let update = matches.is_present("update");
let corpus_path = current_dir.join("corpus");
if let Some(language) = loader.languages_at_path(&current_dir)?.first() {
test::run_tests_at_path(*language, &corpus_path, debug, debug_graph, filter, update)?;
} else {
eprintln!("No language found");
let filter = matches.value_of("filter");
let languages = loader.languages_at_path(&current_dir)?;
let language = languages
.first()
.ok_or_else(|| "No language found".to_string())?;
let test_dir = current_dir.join("test");
// Run the corpus tests. Look for them at two paths: `test/corpus` and `corpus`.
let mut test_corpus_dir = test_dir.join("corpus");
if !test_corpus_dir.is_dir() {
test_corpus_dir = current_dir.join("corpus");
}
if test_corpus_dir.is_dir() {
test::run_tests_at_path(*language, &test_corpus_dir, debug, debug_graph, filter, update)?;
}
// Check that all of the queries are valid.
test::check_queries_at_path(*language, &current_dir.join("queries"))?;
// Run the syntax highlighting tests.
let test_highlight_dir = test_dir.join("highlight");
if test_highlight_dir.is_dir() {
test_highlight::test_highlights(&loader, &test_highlight_dir)?;
}
} else if let Some(matches) = matches.subcommand_matches("parse") {
let debug = matches.is_present("debug");
@ -166,56 +227,27 @@ fn run() -> error::Result<()> {
let edits = matches
.values_of("edits")
.map_or(Vec::new(), |e| e.collect());
let allow_cancellation = matches.is_present("allow-cancellation");
let cancellation_flag = util::cancel_on_stdin();
let timeout = matches
.value_of("timeout")
.map_or(0, |t| u64::from_str_radix(t, 10).unwrap());
loader.find_all_languages(&config.parser_directories)?;
let paths = matches
.values_of("path")
.unwrap()
.into_iter()
.collect::<Vec<_>>();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
let max_path_length = paths.iter().map(|p| p.chars().count()).max().unwrap();
let mut has_error = false;
loader.find_all_languages(&config.parser_directories)?;
let should_track_stats = matches.is_present("stat");
let mut stats = parse::Stats::default();
for path in paths {
let path = Path::new(path);
let language = if let Some(scope) = matches.value_of("scope") {
if let Some(config) =
loader
.language_configuration_for_scope(scope)
.map_err(Error::wrap(|| {
format!("Failed to load language for scope '{}'", scope)
}))?
{
config.0
} else {
return Error::err(format!("Unknown scope '{}'", scope));
}
} else if let Some((lang, _)) = loader
.language_configuration_for_file_name(path)
.map_err(Error::wrap(|| {
format!(
"Failed to load language for file name {:?}",
path.file_name().unwrap()
)
}))?
{
lang
} else if let Some(lang) = loader
.languages_at_path(&current_dir)
.map_err(Error::wrap(|| {
"Failed to load language in current directory"
}))?
.first()
.cloned()
{
lang
} else {
eprintln!("No language found");
return Ok(());
};
has_error |= parse::parse_file_at_path(
let path = Path::new(&path);
let language =
select_language(&mut loader, path, &current_dir, matches.value_of("scope"))?;
let this_file_errored = parse::parse_file_at_path(
language,
path,
&edits,
@ -225,36 +257,86 @@ fn run() -> error::Result<()> {
timeout,
debug,
debug_graph,
allow_cancellation,
Some(&cancellation_flag),
)?;
if should_track_stats {
stats.total_parses += 1;
if !this_file_errored {
stats.successful_parses += 1;
}
}
has_error |= this_file_errored;
}
if should_track_stats {
println!("{}", stats)
}
if has_error {
return Error::err(String::new());
}
} else if let Some(matches) = matches.subcommand_matches("query") {
let ordered_captures = matches.values_of("captures").is_some();
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
loader.find_all_languages(&config.parser_directories)?;
let language = select_language(
&mut loader,
Path::new(&paths[0]),
&current_dir,
matches.value_of("scope"),
)?;
let query_path = Path::new(matches.value_of("query-path").unwrap());
let range = matches.value_of("byte-range").map(|br| {
let r: Vec<&str> = br.split(":").collect();
(r[0].parse().unwrap(), r[1].parse().unwrap())
});
let should_test = matches.is_present("test");
query::query_files_at_paths(
language,
paths,
query_path,
ordered_captures,
range,
should_test,
)?;
} else if let Some(matches) = matches.subcommand_matches("tags") {
loader.find_all_languages(&config.parser_directories)?;
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
tags::generate_tags(
&loader,
matches.value_of("scope"),
&paths,
matches.is_present("quiet"),
matches.is_present("time"),
)?;
} else if let Some(matches) = matches.subcommand_matches("highlight") {
let paths = matches.values_of("path").unwrap().into_iter();
let html_mode = matches.is_present("html");
let time = matches.is_present("time");
loader.configure_highlights(&config.theme.highlight_names);
loader.find_all_languages(&config.parser_directories)?;
if html_mode {
let time = matches.is_present("time");
let quiet = matches.is_present("quiet");
let html_mode = quiet || matches.is_present("html");
let paths = collect_paths(matches.value_of("paths-file"), matches.values_of("paths"))?;
if html_mode && !quiet {
println!("{}", highlight::HTML_HEADER);
}
let language_config;
let cancellation_flag = util::cancel_on_stdin();
let mut lang = None;
if let Some(scope) = matches.value_of("scope") {
language_config = loader.language_configuration_for_scope(scope)?;
if language_config.is_none() {
lang = loader.language_configuration_for_scope(scope)?;
if lang.is_none() {
return Error::err(format!("Unknown scope '{}'", scope));
}
} else {
language_config = None;
}
for path in paths {
let path = Path::new(path);
let (language, language_config) = match language_config {
let path = Path::new(&path);
let (language, language_config) = match lang {
Some(v) => v,
None => match loader.language_configuration_for_file_name(path)? {
Some(v) => v,
@ -265,30 +347,56 @@ fn run() -> error::Result<()> {
},
};
if let Some(sheet) = language_config.highlight_property_sheet(language)? {
if let Some(highlight_config) = language_config.highlight_config(language)? {
let source = fs::read(path)?;
if html_mode {
highlight::html(&loader, &config.theme, &source, language, sheet)?;
highlight::html(
&loader,
&config.theme,
&source,
highlight_config,
quiet,
time,
)?;
} else {
highlight::ansi(&loader, &config.theme, &source, language, sheet, time)?;
highlight::ansi(
&loader,
&config.theme,
&source,
highlight_config,
time,
Some(&cancellation_flag),
)?;
}
} else {
return Error::err(format!("No syntax highlighting property sheet specified"));
eprintln!("No syntax highlighting config found for path {:?}", path);
}
}
if html_mode && !quiet {
println!("{}", highlight::HTML_FOOTER);
}
} else if let Some(matches) = matches.subcommand_matches("build-wasm") {
let grammar_path = current_dir.join(matches.value_of("path").unwrap_or(""));
wasm::compile_language_to_wasm(&grammar_path, matches.is_present("docker"))?;
} else if matches.subcommand_matches("web-ui").is_some() {
web_ui::serve(&current_dir);
} else if let Some(matches) = matches.subcommand_matches("web-ui") {
let open_in_browser = !matches.is_present("quiet");
web_ui::serve(&current_dir, open_in_browser);
} else if matches.subcommand_matches("dump-languages").is_some() {
loader.find_all_languages(&config.parser_directories)?;
for (configuration, language_path) in loader.get_all_language_configurations() {
println!(
"scope: {}\nparser: {:?}\nproperties: {:?}\nfile_types: {:?}\ncontent_regex: {:?}\ninjection_regex: {:?}\n",
concat!(
"scope: {}\n",
"parser: {:?}\n",
"highlights: {:?}\n",
"file_types: {:?}\n",
"content_regex: {:?}\n",
"injection_regex: {:?}\n",
),
configuration.scope.as_ref().unwrap_or(&String::new()),
language_path,
configuration.highlight_property_sheet_path,
configuration.highlights_filenames,
configuration.file_types,
configuration.content_regex,
configuration.injection_regex,
@ -298,3 +406,107 @@ fn run() -> error::Result<()> {
Ok(())
}
fn collect_paths<'a>(
paths_file: Option<&str>,
paths: Option<impl Iterator<Item = &'a str>>,
) -> error::Result<Vec<String>> {
if let Some(paths_file) = paths_file {
return Ok(fs::read_to_string(paths_file)
.map_err(Error::wrap(|| {
format!("Failed to read paths file {}", paths_file)
}))?
.trim()
.split_ascii_whitespace()
.map(String::from)
.collect::<Vec<_>>());
}
if let Some(paths) = paths {
let mut result = Vec::new();
let mut incorporate_path = |path: &str, positive| {
if positive {
result.push(path.to_string());
} else {
if let Some(index) = result.iter().position(|p| p == path) {
result.remove(index);
}
}
};
for mut path in paths {
let mut positive = true;
if path.starts_with("!") {
positive = false;
path = path.trim_start_matches("!");
}
if Path::new(path).exists() {
incorporate_path(path, positive);
} else {
let paths = glob(path)
.map_err(Error::wrap(|| format!("Invalid glob pattern {:?}", path)))?;
for path in paths {
if let Some(path) = path?.to_str() {
incorporate_path(path, positive);
}
}
}
}
if result.is_empty() {
Error::err(
"No files were found at or matched by the provided pathname/glob".to_string(),
)?;
}
return Ok(result);
}
Err(Error::new("Must provide one or more paths".to_string()))
}
fn select_language(
loader: &mut loader::Loader,
path: &Path,
current_dir: &Path,
scope: Option<&str>,
) -> Result<Language, Error> {
if let Some(scope) = scope {
if let Some(config) =
loader
.language_configuration_for_scope(scope)
.map_err(Error::wrap(|| {
format!("Failed to load language for scope '{}'", scope)
}))?
{
Ok(config.0)
} else {
return Error::err(format!("Unknown scope '{}'", scope));
}
} else if let Some((lang, _)) =
loader
.language_configuration_for_file_name(path)
.map_err(Error::wrap(|| {
format!(
"Failed to load language for file name {:?}",
path.file_name().unwrap()
)
}))?
{
Ok(lang)
} else if let Some(lang) = loader
.languages_at_path(&current_dir)
.map_err(Error::wrap(|| {
"Failed to load language in current directory"
}))?
.first()
.cloned()
{
Ok(lang)
} else {
eprintln!("No language found");
Error::err("No language found".to_string())
}
}

View file

@ -2,9 +2,9 @@ use super::error::{Error, Result};
use super::util;
use std::io::{self, Write};
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::atomic::AtomicUsize;
use std::time::Instant;
use std::{fs, thread, usize};
use std::{fmt, fs, usize};
use tree_sitter::{InputEdit, Language, LogType, Parser, Point, Tree};
#[derive(Debug)]
@ -14,6 +14,22 @@ pub struct Edit {
pub inserted_text: Vec<u8>,
}
#[derive(Debug, Default)]
pub struct Stats {
pub successful_parses: usize,
pub total_parses: usize,
}
impl fmt::Display for Stats {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
return writeln!(f, "Total parses: {}; successful parses: {}; failed parses: {}; success percentage: {:.2}%",
self.total_parses,
self.successful_parses,
self.total_parses - self.successful_parses,
(self.successful_parses as f64) / (self.total_parses as f64) * 100.0);
}
}
pub fn parse_file_at_path(
language: Language,
path: &Path,
@ -24,7 +40,7 @@ pub fn parse_file_at_path(
timeout: u64,
debug: bool,
debug_graph: bool,
allow_cancellation: bool,
cancellation_flag: Option<&AtomicUsize>,
) -> Result<bool> {
let mut _log_session = None;
let mut parser = Parser::new();
@ -35,16 +51,7 @@ pub fn parse_file_at_path(
// If the `--cancel` flag was passed, then cancel the parse
// when the user types a newline.
if allow_cancellation {
let flag = Box::new(AtomicUsize::new(0));
unsafe { parser.set_cancellation_flag(Some(&flag)) };
thread::spawn(move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
eprintln!("Cancelling");
flag.store(1, Ordering::Relaxed);
});
}
unsafe { parser.set_cancellation_flag(cancellation_flag) };
// Set a timeout based on the `--time` flag.
parser.set_timeout_micros(timeout);
@ -70,10 +77,18 @@ pub fn parse_file_at_path(
let mut stdout = stdout.lock();
if let Some(mut tree) = tree {
for edit in edits {
if debug_graph && !edits.is_empty() {
println!("BEFORE:\n{}", String::from_utf8_lossy(&source_code));
}
for (i, edit) in edits.iter().enumerate() {
let edit = parse_edit_flag(&source_code, edit)?;
perform_edit(&mut tree, &mut source_code, &edit);
tree = parser.parse(&source_code, Some(&tree)).unwrap();
if debug_graph {
println!("AFTER {}:\n{}", i, String::from_utf8_lossy(&source_code));
}
}
let duration = time.elapsed();

100
cli/src/query.rs Normal file
View file

@ -0,0 +1,100 @@
use super::error::{Error, Result};
use crate::query_testing;
use std::fs;
use std::io::{self, Write};
use std::path::Path;
use tree_sitter::{Language, Node, Parser, Query, QueryCursor};
pub fn query_files_at_paths(
language: Language,
paths: Vec<String>,
query_path: &Path,
ordered_captures: bool,
range: Option<(usize, usize)>,
should_test: bool,
) -> Result<()> {
let stdout = io::stdout();
let mut stdout = stdout.lock();
let query_source = fs::read_to_string(query_path).map_err(Error::wrap(|| {
format!("Error reading query file {:?}", query_path)
}))?;
let query = Query::new(language, &query_source)
.map_err(|e| Error::new(format!("Query compilation failed: {:?}", e)))?;
let mut query_cursor = QueryCursor::new();
if let Some((beg, end)) = range {
query_cursor.set_byte_range(beg, end);
}
let mut parser = Parser::new();
parser.set_language(language).map_err(|e| e.to_string())?;
for path in paths {
let mut results = Vec::new();
writeln!(&mut stdout, "{}", path)?;
let source_code = fs::read(&path).map_err(Error::wrap(|| {
format!("Error reading source file {:?}", path)
}))?;
let text_callback = |n: Node| &source_code[n.byte_range()];
let tree = parser.parse(&source_code, None).unwrap();
if ordered_captures {
for (mat, capture_index) in
query_cursor.captures(&query, tree.root_node(), text_callback)
{
let capture = mat.captures[capture_index];
let capture_name = &query.capture_names()[capture.index as usize];
writeln!(
&mut stdout,
" pattern: {}, capture: {}, row: {}, text: {:?}",
mat.pattern_index,
capture_name,
capture.node.start_position().row,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
});
}
} else {
for m in query_cursor.matches(&query, tree.root_node(), text_callback) {
writeln!(&mut stdout, " pattern: {}", m.pattern_index)?;
for capture in m.captures {
let start = capture.node.start_position();
let end = capture.node.end_position();
let capture_name = &query.capture_names()[capture.index as usize];
if end.row == start.row {
writeln!(
&mut stdout,
" capture: {}, start: {}, text: {:?}",
capture_name,
start,
capture.node.utf8_text(&source_code).unwrap_or("")
)?;
} else {
writeln!(
&mut stdout,
" capture: {}, start: {}, end: {}",
capture_name, start, end,
)?;
}
results.push(query_testing::CaptureInfo {
name: capture_name.to_string(),
start: capture.node.start_position(),
end: capture.node.end_position(),
});
}
}
}
if should_test {
query_testing::assert_expected_captures(results, path, &mut parser, language)?
}
}
Ok(())
}

150
cli/src/query_testing.rs Normal file
View file

@ -0,0 +1,150 @@
use crate::error;
use crate::error::Result;
use lazy_static::lazy_static;
use regex::Regex;
use std::fs;
use tree_sitter::{Language, Parser, Point};
lazy_static! {
static ref CAPTURE_NAME_REGEX: Regex = Regex::new("[\\w_\\-.]+").unwrap();
}
#[derive(Debug, Eq, PartialEq)]
pub struct CaptureInfo {
pub name: String,
pub start: Point,
pub end: Point,
}
#[derive(Debug, PartialEq, Eq)]
pub struct Assertion {
pub position: Point,
pub expected_capture_name: String,
}
/// Parse the given source code, finding all of the comments that contain
/// highlighting assertions. Return a vector of (position, expected highlight name)
/// pairs.
pub fn parse_position_comments(
parser: &mut Parser,
language: Language,
source: &[u8],
) -> Result<Vec<Assertion>> {
let mut result = Vec::new();
let mut assertion_ranges = Vec::new();
// Parse the code.
parser.set_included_ranges(&[]).unwrap();
parser.set_language(language).unwrap();
let tree = parser.parse(source, None).unwrap();
// Walk the tree, finding comment nodes that contain assertions.
let mut ascending = false;
let mut cursor = tree.root_node().walk();
loop {
if ascending {
let node = cursor.node();
// Find every comment node.
if node.kind().contains("comment") {
if let Ok(text) = node.utf8_text(source) {
let mut position = node.start_position();
if position.row == 0 {
continue;
}
// Find the arrow character ("^" or '<-") in the comment. A left arrow
// refers to the column where the comment node starts. An up arrow refers
// to its own column.
let mut has_left_caret = false;
let mut has_arrow = false;
let mut arrow_end = 0;
for (i, c) in text.char_indices() {
arrow_end = i + 1;
if c == '-' && has_left_caret {
has_arrow = true;
break;
}
if c == '^' {
has_arrow = true;
position.column += i;
break;
}
has_left_caret = c == '<';
}
// If the comment node contains an arrow and a highlight name, record the
// highlight name and the position.
if let (true, Some(mat)) =
(has_arrow, CAPTURE_NAME_REGEX.find(&text[arrow_end..]))
{
assertion_ranges.push((node.start_position(), node.end_position()));
result.push(Assertion {
position: position,
expected_capture_name: mat.as_str().to_string(),
});
}
}
}
// Continue walking the tree.
if cursor.goto_next_sibling() {
ascending = false;
} else if !cursor.goto_parent() {
break;
}
} else if !cursor.goto_first_child() {
ascending = true;
}
}
// Adjust the row number in each assertion's position to refer to the line of
// code *above* the assertion. There can be multiple lines of assertion comments,
// so the positions may have to be decremented by more than one row.
let mut i = 0;
for assertion in result.iter_mut() {
loop {
let on_assertion_line = assertion_ranges[i..]
.iter()
.any(|(start, _)| start.row == assertion.position.row);
if on_assertion_line {
assertion.position.row -= 1;
} else {
while i < assertion_ranges.len()
&& assertion_ranges[i].0.row < assertion.position.row
{
i += 1;
}
break;
}
}
}
// The assertions can end up out of order due to the line adjustments.
result.sort_unstable_by_key(|a| a.position);
Ok(result)
}
pub fn assert_expected_captures(
infos: Vec<CaptureInfo>,
path: String,
parser: &mut Parser,
language: Language,
) -> Result<()> {
let contents = fs::read_to_string(path)?;
let pairs = parse_position_comments(parser, language, contents.as_bytes())?;
for info in &infos {
if let Some(found) = pairs.iter().find(|p| {
p.position.row == info.start.row && p.position >= info.start && p.position < info.end
}) {
if found.expected_capture_name != info.name && info.name != "name" {
Err(error::Error::new(format!(
"Assertion failed: at {}, found {}, expected {}",
info.start, found.expected_capture_name, info.name
)))?
}
}
}
Ok(())
}

98
cli/src/tags.rs Normal file
View file

@ -0,0 +1,98 @@
use super::loader::Loader;
use super::util;
use crate::error::{Error, Result};
use std::io::{self, Write};
use std::path::Path;
use std::time::Instant;
use std::{fs, str};
use tree_sitter_tags::TagsContext;
pub fn generate_tags(
loader: &Loader,
scope: Option<&str>,
paths: &[String],
quiet: bool,
time: bool,
) -> Result<()> {
let mut lang = None;
if let Some(scope) = scope {
lang = loader.language_configuration_for_scope(scope)?;
if lang.is_none() {
return Error::err(format!("Unknown scope '{}'", scope));
}
}
let mut context = TagsContext::new();
let cancellation_flag = util::cancel_on_stdin();
let stdout = io::stdout();
let mut stdout = stdout.lock();
for path in paths {
let path = Path::new(&path);
let (language, language_config) = match lang {
Some(v) => v,
None => match loader.language_configuration_for_file_name(path)? {
Some(v) => v,
None => {
eprintln!("No language found for path {:?}", path);
continue;
}
},
};
if let Some(tags_config) = language_config.tags_config(language)? {
let indent;
if paths.len() > 1 {
if !quiet {
writeln!(&mut stdout, "{}", path.to_string_lossy())?;
}
indent = "\t"
} else {
indent = "";
};
let source = fs::read(path)?;
let t0 = Instant::now();
for tag in context
.generate_tags(tags_config, &source, Some(&cancellation_flag))?
.0
{
let tag = tag?;
if !quiet {
write!(
&mut stdout,
"{}{:<10}\t | {:<8}\t{} {} - {} `{}`",
indent,
str::from_utf8(&source[tag.name_range]).unwrap_or(""),
&tags_config.syntax_type_name(tag.syntax_type_id),
if tag.is_definition { "def" } else { "ref" },
tag.span.start,
tag.span.end,
str::from_utf8(&source[tag.line_range]).unwrap_or(""),
)?;
if let Some(docs) = tag.docs {
if docs.len() > 120 {
write!(&mut stdout, "\t{:?}...", docs.get(0..120).unwrap_or(""))?;
} else {
write!(&mut stdout, "\t{:?}", &docs)?;
}
}
writeln!(&mut stdout, "")?;
}
}
if time {
writeln!(
&mut stdout,
"{}time: {}ms",
indent,
t0.elapsed().as_millis(),
)?;
}
} else {
eprintln!("No tags config found for path {:?}", path);
}
}
Ok(())
}

View file

@ -11,7 +11,7 @@ use std::fs;
use std::io::{self, Write};
use std::path::{Path, PathBuf};
use std::str;
use tree_sitter::{Language, LogType, Parser};
use tree_sitter::{Language, LogType, Parser, Query};
lazy_static! {
static ref HEADER_REGEX: ByteRegex = ByteRegexBuilder::new(r"^===+\r?\n([^=]*)\r?\n===+\r?\n")
@ -112,6 +112,24 @@ pub fn run_tests_at_path(
}
}
pub fn check_queries_at_path(language: Language, path: &Path) -> Result<()> {
if path.exists() {
for entry in fs::read_dir(path)? {
let entry = entry?;
let filepath = entry.file_name();
let filepath = filepath.to_str().unwrap_or("");
let hidden = filepath.starts_with(".");
if !hidden {
let content = fs::read_to_string(entry.path()).map_err(Error::wrap(|| {
format!("Error reading query file {:?}", entry.file_name())
}))?;
Query::new(language, &content).map_err(|e| (filepath, e))?;
}
}
}
Ok(())
}
pub fn print_diff_key() {
println!(
"\n{} / {}",

275
cli/src/test_highlight.rs Normal file
View file

@ -0,0 +1,275 @@
use super::error::Result;
use crate::loader::Loader;
use crate::query_testing::{parse_position_comments, Assertion};
use ansi_term::Colour;
use std::fs;
use std::path::Path;
use tree_sitter::Point;
use tree_sitter_highlight::{Highlight, HighlightConfiguration, HighlightEvent, Highlighter};
pub struct Failure {
row: usize,
column: usize,
expected_highlight: String,
actual_highlights: Vec<String>,
}
impl Failure {
pub fn message(&self) -> String {
let mut result = format!(
"Failure - row: {}, column: {}, expected highlight '{}', actual highlights: ",
self.row, self.column, self.expected_highlight
);
if self.actual_highlights.is_empty() {
result += "none.";
} else {
for (i, actual_highlight) in self.actual_highlights.iter().enumerate() {
if i > 0 {
result += ", ";
}
result += "'";
result += actual_highlight;
result += "'";
}
}
result
}
}
pub fn test_highlights(loader: &Loader, directory: &Path) -> Result<()> {
let mut failed = false;
let mut highlighter = Highlighter::new();
println!("syntax highlighting:");
for highlight_test_file in fs::read_dir(directory)? {
let highlight_test_file = highlight_test_file?;
let test_file_path = highlight_test_file.path();
let test_file_name = highlight_test_file.file_name();
let (language, language_config) = loader
.language_configuration_for_file_name(&test_file_path)?
.ok_or_else(|| format!("No language found for path {:?}", test_file_path))?;
let highlight_config = language_config
.highlight_config(language)?
.ok_or_else(|| format!("No highlighting config found for {:?}", test_file_path))?;
match test_highlight(
&loader,
&mut highlighter,
highlight_config,
fs::read(&test_file_path)?.as_slice(),
) {
Ok(assertion_count) => {
println!(
" ✓ {} ({} assertions)",
Colour::Green.paint(test_file_name.to_string_lossy().as_ref()),
assertion_count
);
}
Err(e) => {
println!(
" ✗ {}",
Colour::Red.paint(test_file_name.to_string_lossy().as_ref())
);
println!(" {}", e.message());
failed = true;
}
}
}
if failed {
Err(String::new().into())
} else {
Ok(())
}
}
pub fn iterate_assertions(
assertions: &Vec<Assertion>,
highlights: &Vec<(Point, Point, Highlight)>,
highlight_names: &Vec<String>,
) -> Result<usize> {
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
for Assertion {
position,
expected_capture_name: expected_highlight,
} in assertions
{
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
}
} else {
break;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: position.column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
}
pub fn test_highlight(
loader: &Loader,
highlighter: &mut Highlighter,
highlight_config: &HighlightConfiguration,
source: &[u8],
) -> Result<usize> {
// Highlight the file, and parse out all of the highlighting assertions.
let highlight_names = loader.highlight_names();
let highlights = get_highlight_positions(loader, highlighter, highlight_config, source)?;
let assertions =
parse_position_comments(highlighter.parser(), highlight_config.language, source)?;
iterate_assertions(&assertions, &highlights, &highlight_names)?;
// Iterate through all of the highlighting assertions, checking each one against the
// actual highlights.
let mut i = 0;
let mut actual_highlights = Vec::<&String>::new();
for Assertion {
position,
expected_capture_name: expected_highlight,
} in &assertions
{
let mut passed = false;
actual_highlights.clear();
'highlight_loop: loop {
// The assertions are ordered by position, so skip past all of the highlights that
// end at or before this assertion's position.
if let Some(highlight) = highlights.get(i) {
if highlight.1 <= *position {
i += 1;
continue;
}
// Iterate through all of the highlights that start at or before this assertion's,
// position, looking for one that matches the assertion.
let mut j = i;
while let (false, Some(highlight)) = (passed, highlights.get(j)) {
if highlight.0 > *position {
break 'highlight_loop;
}
// If the highlight matches the assertion, this test passes. Otherwise,
// add this highlight to the list of actual highlights that span the
// assertion's position, in order to generate an error message in the event
// of a failure.
let highlight_name = &highlight_names[(highlight.2).0];
if *highlight_name == *expected_highlight {
passed = true;
break 'highlight_loop;
} else {
actual_highlights.push(highlight_name);
}
j += 1;
}
} else {
break;
}
}
if !passed {
return Err(Failure {
row: position.row,
column: position.column,
expected_highlight: expected_highlight.clone(),
actual_highlights: actual_highlights.into_iter().cloned().collect(),
}
.into());
}
}
Ok(assertions.len())
}
pub fn get_highlight_positions(
loader: &Loader,
highlighter: &mut Highlighter,
highlight_config: &HighlightConfiguration,
source: &[u8],
) -> Result<Vec<(Point, Point, Highlight)>> {
let mut row = 0;
let mut column = 0;
let mut byte_offset = 0;
let mut was_newline = false;
let mut result = Vec::new();
let mut highlight_stack = Vec::new();
let source = String::from_utf8_lossy(source);
let mut char_indices = source.char_indices();
for event in highlighter.highlight(highlight_config, source.as_bytes(), None, |string| {
loader.highlight_config_for_injection_string(string)
})? {
match event? {
HighlightEvent::HighlightStart(h) => highlight_stack.push(h),
HighlightEvent::HighlightEnd => {
highlight_stack.pop();
}
HighlightEvent::Source { start, end } => {
let mut start_position = Point::new(row, column);
while byte_offset < end {
if byte_offset <= start {
start_position = Point::new(row, column);
}
if let Some((i, c)) = char_indices.next() {
if was_newline {
row += 1;
column = 0;
} else {
column += i - byte_offset;
}
was_newline = c == '\n';
byte_offset = i;
} else {
break;
}
}
if let Some(highlight) = highlight_stack.last() {
result.push((start_position, Point::new(row, column), *highlight))
}
}
}
}
Ok(result)
}

View file

@ -21,7 +21,11 @@ const LANGUAGES: &'static [&'static str] = &[
"go",
"html",
"javascript",
"json",
"php",
"python",
"ruby",
"rust",
];
lazy_static! {
@ -57,7 +61,11 @@ fn test_real_language_corpus_files() {
}
let language = get_language(language_name);
let corpus_dir = grammars_dir.join(language_name).join("corpus");
let mut corpus_dir = grammars_dir.join(language_name).join("corpus");
if !corpus_dir.is_dir() {
corpus_dir = grammars_dir.join(language_name).join("test").join("corpus");
}
let error_corpus_file = error_corpus_dir.join(&format!("{}_errors.txt", language_name));
let main_tests = parse_tests(&corpus_dir).unwrap();
let error_tests = parse_tests(&error_corpus_file).unwrap_or(TestEntry::default());
@ -300,7 +308,8 @@ fn check_consistent_sizes(tree: &Tree, input: &Vec<u8>) {
let mut last_child_end_point = start_point;
let mut some_child_has_changes = false;
let mut actual_named_child_count = 0;
for child in node.children() {
for i in 0..node.child_count() {
let child = node.child(i).unwrap();
assert!(child.start_byte() >= last_child_end_byte);
assert!(child.start_position() >= last_child_end_point);
check(child, line_offsets);

View file

@ -51,6 +51,12 @@ pub fn stop_recording() {
}
}
pub fn record(f: impl FnOnce()) {
start_recording();
f();
stop_recording();
}
fn record_alloc(ptr: *mut c_void) {
let mut recorder = RECORDER.lock();
if recorder.enabled {

View file

@ -2,8 +2,8 @@ use crate::loader::Loader;
use lazy_static::lazy_static;
use std::fs;
use std::path::{Path, PathBuf};
use tree_sitter::{Language, PropertySheet};
use tree_sitter_highlight::{load_property_sheet, Properties};
use tree_sitter::Language;
use tree_sitter_highlight::HighlightConfiguration;
include!("./dirs.rs");
@ -11,6 +11,10 @@ lazy_static! {
static ref TEST_LOADER: Loader = Loader::new(SCRATCH_DIR.clone());
}
pub fn test_loader<'a>() -> &'a Loader {
&*TEST_LOADER
}
pub fn fixtures_dir<'a>() -> &'static Path {
&FIXTURES_DIR
}
@ -21,18 +25,33 @@ pub fn get_language(name: &str) -> Language {
.unwrap()
}
pub fn get_property_sheet_json(language_name: &str, sheet_name: &str) -> String {
let path = GRAMMARS_DIR
.join(language_name)
.join("src")
.join(sheet_name);
fs::read_to_string(path).unwrap()
pub fn get_language_queries_path(language_name: &str) -> PathBuf {
GRAMMARS_DIR.join(language_name).join("queries")
}
pub fn get_property_sheet(language_name: &str, sheet_name: &str) -> PropertySheet<Properties> {
let json = get_property_sheet_json(language_name, sheet_name);
pub fn get_highlight_config(
language_name: &str,
injection_query_filename: Option<&str>,
highlight_names: &[String],
) -> HighlightConfiguration {
let language = get_language(language_name);
load_property_sheet(language, &json).unwrap()
let queries_path = get_language_queries_path(language_name);
let highlights_query = fs::read_to_string(queries_path.join("highlights.scm")).unwrap();
let injections_query = if let Some(injection_query_filename) = injection_query_filename {
fs::read_to_string(queries_path.join(injection_query_filename)).unwrap()
} else {
String::new()
};
let locals_query = fs::read_to_string(queries_path.join("locals.scm")).unwrap_or(String::new());
let mut result = HighlightConfiguration::new(
language,
&highlights_query,
&injections_query,
&locals_query,
)
.unwrap();
result.configure(highlight_names);
result
}
pub fn get_test_language(name: &str, parser_code: &str, path: Option<&Path>) -> Language {

View file

@ -1,5 +1,5 @@
pub(super) mod allocations;
pub(super) mod edits;
pub(super) mod fixtures;
pub(super) mod random;
pub(super) mod scope_sequence;
pub(super) mod edits;

View file

@ -1,32 +1,92 @@
use super::helpers::fixtures::{get_language, get_property_sheet, get_property_sheet_json};
use super::helpers::fixtures::{get_highlight_config, get_language, get_language_queries_path};
use lazy_static::lazy_static;
use std::ffi::CString;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{ptr, slice, str};
use tree_sitter::{Language, PropertySheet};
use std::{fs, ptr, slice, str};
use tree_sitter_highlight::{
c, highlight, highlight_html, Error, Highlight, HighlightEvent, Properties,
c, Error, Highlight, HighlightConfiguration, HighlightEvent, Highlighter, HtmlRenderer,
};
lazy_static! {
static ref JS_SHEET: PropertySheet<Properties> =
get_property_sheet("javascript", "highlights.json");
static ref HTML_SHEET: PropertySheet<Properties> =
get_property_sheet("html", "highlights.json");
static ref EJS_SHEET: PropertySheet<Properties> =
get_property_sheet("embedded-template", "highlights-ejs.json");
static ref RUST_SHEET: PropertySheet<Properties> =
get_property_sheet("rust", "highlights.json");
static ref SCOPE_CLASS_STRINGS: Vec<String> = {
let mut result = Vec::new();
let mut i = 0;
while let Some(highlight) = Highlight::from_usize(i) {
result.push(format!("class={:?}", highlight));
i += 1;
}
result
};
static ref JS_HIGHLIGHT: HighlightConfiguration =
get_highlight_config("javascript", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref JSDOC_HIGHLIGHT: HighlightConfiguration =
get_highlight_config("jsdoc", None, &HIGHLIGHT_NAMES);
static ref HTML_HIGHLIGHT: HighlightConfiguration =
get_highlight_config("html", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref EJS_HIGHLIGHT: HighlightConfiguration = get_highlight_config(
"embedded-template",
Some("injections-ejs.scm"),
&HIGHLIGHT_NAMES
);
static ref RUST_HIGHLIGHT: HighlightConfiguration =
get_highlight_config("rust", Some("injections.scm"), &HIGHLIGHT_NAMES);
static ref HIGHLIGHT_NAMES: Vec<String> = [
"attribute",
"carriage-return",
"comment",
"constant",
"constructor",
"function.builtin",
"function",
"embedded",
"keyword",
"operator",
"property.builtin",
"property",
"punctuation",
"punctuation.bracket",
"punctuation.delimiter",
"punctuation.special",
"string",
"tag",
"type.builtin",
"type",
"variable.builtin",
"variable.parameter",
"variable",
]
.iter()
.cloned()
.map(String::from)
.collect();
static ref HTML_ATTRS: Vec<String> = HIGHLIGHT_NAMES
.iter()
.map(|s| format!("class={}", s))
.collect();
}
#[test]
fn test_highlighting_javascript() {
let source = "const a = function(b) { return b + c; }";
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec!["keyword"]),
(" ", vec![]),
("a", vec!["function"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec!["keyword"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec!["punctuation.bracket"]),
(" ", vec![]),
("return", vec!["keyword"]),
(" ", vec![]),
("b", vec!["variable.parameter"]),
(" ", vec![]),
("+", vec!["operator"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("}", vec!["punctuation.bracket"]),
]]
);
}
#[test]
@ -34,57 +94,65 @@ fn test_highlighting_injected_html_in_javascript() {
let source = vec!["const s = html `<div>${a < b}</div>`;"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[vec![
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("s", vec![Highlight::Variable]),
("s", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("html", vec![Highlight::Function]),
("html", vec!["function"]),
(" ", vec![]),
("`<", vec![Highlight::String]),
("div", vec![Highlight::String, Highlight::Tag]),
(">", vec![Highlight::String]),
(
"${",
vec![
Highlight::String,
Highlight::Embedded,
Highlight::PunctuationSpecial
]
),
(
"a",
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
),
(" ", vec![Highlight::String, Highlight::Embedded]),
(
"<",
vec![Highlight::String, Highlight::Embedded, Highlight::Operator]
),
(" ", vec![Highlight::String, Highlight::Embedded]),
(
"b",
vec![Highlight::String, Highlight::Embedded, Highlight::Variable]
),
(
"}",
vec![
Highlight::String,
Highlight::Embedded,
Highlight::PunctuationSpecial
]
),
("</", vec![Highlight::String]),
("div", vec![Highlight::String, Highlight::Tag]),
(">`", vec![Highlight::String]),
(";", vec![Highlight::PunctuationDelimiter]),
("`", vec!["string"]),
("<", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("${", vec!["string", "embedded", "punctuation.special"]),
("a", vec!["string", "embedded", "variable"]),
(" ", vec!["string", "embedded"]),
("<", vec!["string", "embedded", "operator"]),
(" ", vec!["string", "embedded"]),
("b", vec!["string", "embedded", "variable"]),
("}", vec!["string", "embedded", "punctuation.special"]),
("</", vec!["string", "punctuation.bracket"]),
("div", vec!["string", "tag"]),
(">", vec!["string", "punctuation.bracket"]),
("`", vec!["string"]),
(";", vec!["punctuation.delimiter"]),
]]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html_mini() {
let source = "<script>const x = new Thing();</script>";
assert_eq!(
&to_token_vector(source, &HTML_HIGHLIGHT).unwrap(),
&[vec![
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],]
);
}
#[test]
fn test_highlighting_injected_javascript_in_html() {
let source = vec![
@ -97,38 +165,44 @@ fn test_highlighting_injected_javascript_in_html() {
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("html"), &HTML_SHEET).unwrap(),
&to_token_vector(&source, &HTML_HIGHLIGHT).unwrap(),
&[
vec![("<", vec![]), ("body", vec![Highlight::Tag]), (">", vec![]),],
vec![
(" <", vec![]),
("script", vec![Highlight::Tag]),
(">", vec![]),
("<", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
(" ", vec![]),
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("x", vec![Highlight::Variable]),
("x", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("new", vec![Highlight::Keyword]),
("new", vec!["keyword"]),
(" ", vec![]),
("Thing", vec![Highlight::Constructor]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
(";", vec![Highlight::PunctuationDelimiter]),
("Thing", vec!["constructor"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![
(" </", vec![]),
("script", vec![Highlight::Tag]),
(">", vec![]),
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
vec![
("</", vec![]),
("body", vec![Highlight::Tag]),
(">", vec![]),
("</", vec!["punctuation.bracket"]),
("body", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
],
]
);
@ -147,13 +221,13 @@ fn test_highlighting_multiline_nodes_to_html() {
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=Keyword>const</span> <span class=Constant>SOMETHING</span> <span class=Operator>=</span> <span class=String>`</span>\n".to_string(),
"<span class=String> one <span class=Embedded><span class=PunctuationSpecial>${</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=Function>two</span><span class=PunctuationBracket>(</span><span class=PunctuationBracket>)</span></span></span>\n".to_string(),
"<span class=String><span class=Embedded> <span class=PunctuationSpecial>}</span></span> three</span>\n".to_string(),
"<span class=String>`</span>\n".to_string(),
"<span class=keyword>const</span> <span class=constant>SOMETHING</span> <span class=operator>=</span> <span class=string>`</span>\n".to_string(),
"<span class=string> one <span class=embedded><span class=punctuation.special>${</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=function>two</span><span class=punctuation.bracket>(</span><span class=punctuation.bracket>)</span></span></span>\n".to_string(),
"<span class=string><span class=embedded> <span class=punctuation.special>}</span></span> three</span>\n".to_string(),
"<span class=string>`</span>\n".to_string(),
]
);
}
@ -169,51 +243,51 @@ fn test_highlighting_with_local_variable_tracking() {
.join("\n");
assert_eq!(
&to_token_vector(&source, get_language("javascript"), &JS_SHEET).unwrap(),
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[
vec![
("module", vec![Highlight::VariableBuiltin]),
(".", vec![Highlight::PunctuationDelimiter]),
("exports", vec![Highlight::Property]),
("module", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("exports", vec!["function"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("function", vec![Highlight::Keyword]),
("function", vec!["keyword"]),
(" ", vec![]),
("a", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("b", vec![Highlight::VariableParameter]),
(")", vec![Highlight::PunctuationBracket]),
("a", vec!["function"]),
("(", vec!["punctuation.bracket"]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("{", vec![Highlight::PunctuationBracket])
("{", vec!["punctuation.bracket"])
],
vec![
(" ", vec![]),
("const", vec![Highlight::Keyword]),
("const", vec!["keyword"]),
(" ", vec![]),
("module", vec![Highlight::Variable]),
("module", vec!["variable"]),
(" ", vec![]),
("=", vec![Highlight::Operator]),
("=", vec!["operator"]),
(" ", vec![]),
("c", vec![Highlight::Variable]),
(";", vec![Highlight::PunctuationDelimiter])
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"])
],
vec![
(" ", vec![]),
("console", vec![Highlight::VariableBuiltin]),
(".", vec![Highlight::PunctuationDelimiter]),
("log", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("console", vec!["variable.builtin"]),
(".", vec!["punctuation.delimiter"]),
("log", vec!["function"]),
("(", vec!["punctuation.bracket"]),
// Not a builtin, because `module` was defined as a variable above.
("module", vec![Highlight::Variable]),
(",", vec![Highlight::PunctuationDelimiter]),
("module", vec!["variable"]),
(",", vec!["punctuation.delimiter"]),
(" ", vec![]),
// A parameter, because `b` was defined as a parameter above.
("b", vec![Highlight::VariableParameter]),
(")", vec![Highlight::PunctuationBracket]),
(";", vec![Highlight::PunctuationDelimiter]),
("b", vec!["variable.parameter"]),
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
],
vec![("}", vec![Highlight::PunctuationBracket])]
vec![("}", vec!["punctuation.bracket"])]
],
);
}
@ -234,41 +308,95 @@ fn test_highlighting_empty_lines() {
.join("\n");
assert_eq!(
&to_html(&source, get_language("javascript"), &JS_SHEET,).unwrap(),
&to_html(&source, &JS_HIGHLIGHT,).unwrap(),
&[
"<span class=Keyword>class</span> <span class=Constructor>A</span> <span class=PunctuationBracket>{</span>\n".to_string(),
"<span class=keyword>class</span> <span class=constructor>A</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=Function>b</span><span class=PunctuationBracket>(</span><span class=VariableParameter>c</span><span class=PunctuationBracket>)</span> <span class=PunctuationBracket>{</span>\n".to_string(),
" <span class=function>b</span><span class=punctuation.bracket>(</span><span class=variable.parameter>c</span><span class=punctuation.bracket>)</span> <span class=punctuation.bracket>{</span>\n".to_string(),
"\n".to_string(),
" <span class=Function>d</span><span class=PunctuationBracket>(</span><span class=Variable>e</span><span class=PunctuationBracket>)</span>\n".to_string(),
" <span class=function>d</span><span class=punctuation.bracket>(</span><span class=variable>e</span><span class=punctuation.bracket>)</span>\n".to_string(),
"\n".to_string(),
" <span class=PunctuationBracket>}</span>\n".to_string(),
" <span class=punctuation.bracket>}</span>\n".to_string(),
"\n".to_string(),
"<span class=PunctuationBracket>}</span>\n".to_string(),
"<span class=punctuation.bracket>}</span>\n".to_string(),
]
);
}
#[test]
fn test_highlighting_ejs() {
let source = vec!["<div><% foo() %></div>"].join("\n");
fn test_highlighting_carriage_returns() {
let source = "a = \"a\rb\"\r\nb\r";
assert_eq!(
&to_token_vector(&source, get_language("embedded-template"), &EJS_SHEET).unwrap(),
&to_html(&source, &JS_HIGHLIGHT).unwrap(),
&[
"<span class=variable>a</span> <span class=operator>=</span> <span class=string>&quot;a<span class=carriage-return></span>b&quot;</span>\n",
"<span class=variable>b</span>\n",
],
);
}
#[test]
fn test_highlighting_ejs_with_html_and_javascript() {
let source = vec!["<div><% foo() %></div><script> bar() </script>"].join("\n");
assert_eq!(
&to_token_vector(&source, &EJS_HIGHLIGHT).unwrap(),
&[[
("<", vec![]),
("div", vec![Highlight::Tag]),
(">", vec![]),
("<%", vec![Highlight::Keyword]),
("<", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<%", vec!["keyword"]),
(" ", vec![]),
("foo", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
("foo", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("%>", vec![Highlight::Keyword]),
("</", vec![]),
("div", vec![Highlight::Tag]),
(">", vec![])
("%>", vec!["keyword"]),
("</", vec!["punctuation.bracket"]),
("div", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
("<", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
(" ", vec![]),
("bar", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" ", vec![]),
("</", vec!["punctuation.bracket"]),
("script", vec!["tag"]),
(">", vec!["punctuation.bracket"]),
]],
);
}
#[test]
fn test_highlighting_javascript_with_jsdoc() {
// Regression test: the middle comment has no highlights. This should not prevent
// later injections from highlighting properly.
let source = vec!["a /* @see a */ b; /* nothing */ c; /* @see b */"].join("\n");
assert_eq!(
&to_token_vector(&source, &JS_HIGHLIGHT).unwrap(),
&[[
("a", vec!["variable"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" a */", vec!["comment"]),
(" ", vec![]),
("b", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* nothing */", vec!["comment"]),
(" ", vec![]),
("c", vec!["variable"]),
(";", vec!["punctuation.delimiter"]),
(" ", vec![]),
("/* ", vec!["comment"]),
("@see", vec!["comment", "keyword"]),
(" b */", vec!["comment"])
]],
);
}
@ -278,33 +406,36 @@ fn test_highlighting_with_content_children_included() {
let source = vec!["assert!(", " a.b.c() < D::e::<F>()", ");"].join("\n");
assert_eq!(
&to_token_vector(&source, get_language("rust"), &RUST_SHEET).unwrap(),
&to_token_vector(&source, &RUST_HIGHLIGHT).unwrap(),
&[
vec![
("assert", vec![Highlight::Function]),
("!", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
("assert", vec!["function"]),
("!", vec!["function"]),
("(", vec!["punctuation.bracket"]),
],
vec![
(" a", vec![]),
(".", vec![Highlight::PunctuationDelimiter]),
("b", vec![Highlight::Property]),
(".", vec![Highlight::PunctuationDelimiter]),
("c", vec![Highlight::Function]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
(".", vec!["punctuation.delimiter"]),
("b", vec!["property"]),
(".", vec!["punctuation.delimiter"]),
("c", vec!["function"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
(" < ", vec![]),
("D", vec![Highlight::Type]),
("::", vec![Highlight::PunctuationDelimiter]),
("e", vec![Highlight::Function]),
("::", vec![Highlight::PunctuationDelimiter]),
("<", vec![Highlight::PunctuationBracket]),
("F", vec![Highlight::Type]),
(">", vec![Highlight::PunctuationBracket]),
("(", vec![Highlight::PunctuationBracket]),
(")", vec![Highlight::PunctuationBracket]),
("D", vec!["type"]),
("::", vec!["punctuation.delimiter"]),
("e", vec!["function"]),
("::", vec!["punctuation.delimiter"]),
("<", vec!["punctuation.bracket"]),
("F", vec!["type"]),
(">", vec!["punctuation.bracket"]),
("(", vec!["punctuation.bracket"]),
(")", vec!["punctuation.bracket"]),
],
vec![(")", vec![Highlight::PunctuationBracket]), (";", vec![]),]
vec![
(")", vec!["punctuation.bracket"]),
(";", vec!["punctuation.delimiter"]),
]
],
);
}
@ -325,73 +456,97 @@ fn test_highlighting_cancellation() {
test_language_for_injection_string(name)
};
// Constructing the highlighter, which eagerly parses the outer document,
// should not fail.
let highlighter = highlight(
source.as_bytes(),
get_language("html"),
&HTML_SHEET,
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// The initial `highlight` call, which eagerly parses the outer document, should not fail.
let mut highlighter = Highlighter::new();
let events = highlighter
.highlight(
&HTML_HIGHLIGHT,
source.as_bytes(),
Some(&cancellation_flag),
injection_callback,
)
.unwrap();
// Iterating the scopes should not panic. It should return an error
// once the cancellation is detected.
for event in highlighter {
// Iterating the scopes should not panic. It should return an error once the
// cancellation is detected.
for event in events {
if let Err(e) = event {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected an error while iterating highlighter");
}
#[test]
fn test_highlighting_via_c_api() {
let js_lang = get_language("javascript");
let html_lang = get_language("html");
let js_sheet = get_property_sheet_json("javascript", "highlights.json");
let js_sheet = c_string(&js_sheet);
let html_sheet = get_property_sheet_json("html", "highlights.json");
let html_sheet = c_string(&html_sheet);
let highlights = vec![
"class=tag\0",
"class=function\0",
"class=string\0",
"class=keyword\0",
];
let highlight_names = highlights
.iter()
.map(|h| h["class=".len()..].as_ptr() as *const i8)
.collect::<Vec<_>>();
let highlight_attrs = highlights
.iter()
.map(|h| h.as_bytes().as_ptr() as *const i8)
.collect::<Vec<_>>();
let highlighter = c::ts_highlighter_new(
&highlight_names[0] as *const *const i8,
&highlight_attrs[0] as *const *const i8,
highlights.len() as u32,
);
let class_tag = c_string("class=tag");
let class_function = c_string("class=function");
let class_string = c_string("class=string");
let class_keyword = c_string("class=keyword");
let js_scope_name = c_string("source.js");
let html_scope_name = c_string("text.html.basic");
let injection_regex = c_string("^(javascript|js)$");
let source_code = c_string("<script>\nconst a = b('c');\nc.d();\n</script>");
let attribute_strings = &mut [ptr::null(); Highlight::Unknown as usize + 1];
attribute_strings[Highlight::Tag as usize] = class_tag.as_ptr();
attribute_strings[Highlight::String as usize] = class_string.as_ptr();
attribute_strings[Highlight::Keyword as usize] = class_keyword.as_ptr();
attribute_strings[Highlight::Function as usize] = class_function.as_ptr();
let js_scope = c_string("source.js");
let js_injection_regex = c_string("^javascript");
let language = get_language("javascript");
let queries = get_language_queries_path("javascript");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
let locals_query = fs::read_to_string(queries.join("locals.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
js_scope.as_ptr(),
js_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const i8,
injections_query.as_ptr() as *const i8,
locals_query.as_ptr() as *const i8,
highlights_query.len() as u32,
injections_query.len() as u32,
locals_query.len() as u32,
);
let html_scope = c_string("text.html.basic");
let html_injection_regex = c_string("^html");
let language = get_language("html");
let queries = get_language_queries_path("html");
let highlights_query = fs::read_to_string(queries.join("highlights.scm")).unwrap();
let injections_query = fs::read_to_string(queries.join("injections.scm")).unwrap();
c::ts_highlighter_add_language(
highlighter,
html_scope.as_ptr(),
html_injection_regex.as_ptr(),
language,
highlights_query.as_ptr() as *const i8,
injections_query.as_ptr() as *const i8,
ptr::null(),
highlights_query.len() as u32,
injections_query.len() as u32,
0,
);
let highlighter = c::ts_highlighter_new(attribute_strings.as_ptr());
let buffer = c::ts_highlight_buffer_new();
c::ts_highlighter_add_language(
highlighter,
html_scope_name.as_ptr(),
html_lang,
html_sheet.as_ptr(),
ptr::null_mut(),
);
c::ts_highlighter_add_language(
highlighter,
js_scope_name.as_ptr(),
js_lang,
js_sheet.as_ptr(),
injection_regex.as_ptr(),
);
c::ts_highlighter_highlight(
highlighter,
html_scope_name.as_ptr(),
html_scope.as_ptr(),
source_code.as_ptr(),
source_code.as_bytes().len() as u32,
buffer,
@ -421,8 +576,8 @@ fn test_highlighting_via_c_api() {
lines,
vec![
"&lt;<span class=tag>script</span>&gt;\n",
"<span class=keyword>const</span> <span>a</span> <span>=</span> <span class=function>b</span><span>(</span><span class=string>&#39;c&#39;</span><span>)</span><span>;</span>\n",
"<span>c</span><span>.</span><span class=function>d</span><span>(</span><span>)</span><span>;</span>\n",
"<span class=keyword>const</span> a = <span class=function>b</span>(<span class=string>&#39;c&#39;</span>);\n",
"c.<span class=function>d</span>();\n",
"&lt;/<span class=tag>script</span>&gt;\n",
]
);
@ -433,7 +588,7 @@ fn test_highlighting_via_c_api() {
#[test]
fn test_decode_utf8_lossy() {
use tree_sitter_highlight::util::LossyUtf8;
use tree_sitter::LossyUtf8;
let parts = LossyUtf8::new(b"hi").collect::<Vec<_>>();
assert_eq!(parts, vec!["hi"]);
@ -452,50 +607,60 @@ fn c_string(s: &str) -> CString {
CString::new(s.as_bytes().to_vec()).unwrap()
}
fn test_language_for_injection_string<'a>(
string: &str,
) -> Option<(Language, &'a PropertySheet<Properties>)> {
fn test_language_for_injection_string<'a>(string: &str) -> Option<&'a HighlightConfiguration> {
match string {
"javascript" => Some((get_language("javascript"), &JS_SHEET)),
"html" => Some((get_language("html"), &HTML_SHEET)),
"rust" => Some((get_language("rust"), &RUST_SHEET)),
"javascript" => Some(&JS_HIGHLIGHT),
"html" => Some(&HTML_HIGHLIGHT),
"rust" => Some(&RUST_HIGHLIGHT),
"jsdoc" => Some(&JSDOC_HIGHLIGHT),
_ => None,
}
}
fn to_html<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
language_config: &'a HighlightConfiguration,
) -> Result<Vec<String>, Error> {
highlight_html(
src.as_bytes(),
language,
property_sheet,
let src = src.as_bytes();
let mut renderer = HtmlRenderer::new();
let mut highlighter = Highlighter::new();
let events = highlighter.highlight(
language_config,
src,
None,
&test_language_for_injection_string,
&|highlight| SCOPE_CLASS_STRINGS[highlight as usize].as_str(),
)
)?;
renderer.set_carriage_return_highlight(
HIGHLIGHT_NAMES
.iter()
.position(|s| s == "carriage-return")
.map(Highlight),
);
renderer
.render(events, src, &|highlight| HTML_ATTRS[highlight.0].as_bytes())
.unwrap();
Ok(renderer.lines().map(|s| s.to_string()).collect())
}
fn to_token_vector<'a>(
src: &'a str,
language: Language,
property_sheet: &'a PropertySheet<Properties>,
) -> Result<Vec<Vec<(&'a str, Vec<Highlight>)>>, Error> {
language_config: &'a HighlightConfiguration,
) -> Result<Vec<Vec<(&'a str, Vec<&'static str>)>>, Error> {
let src = src.as_bytes();
let mut highlighter = Highlighter::new();
let mut lines = Vec::new();
let mut highlights = Vec::new();
let mut line = Vec::new();
for event in highlight(
let events = highlighter.highlight(
language_config,
src,
language,
property_sheet,
None,
&test_language_for_injection_string,
)? {
)?;
for event in events {
match event? {
HighlightEvent::HighlightStart(s) => highlights.push(s),
HighlightEvent::HighlightStart(s) => highlights.push(HIGHLIGHT_NAMES[s.0].as_str()),
HighlightEvent::HighlightEnd => {
highlights.pop();
}

View file

@ -3,5 +3,8 @@ mod helpers;
mod highlight_test;
mod node_test;
mod parser_test;
mod properties_test;
mod pathological_test;
mod query_test;
mod tags_test;
mod test_highlight_test;
mod tree_test;

View file

@ -1,62 +0,0 @@
use super::helpers::fixtures::get_test_language;
use crate::generate::generate_parser_for_grammar;
use tree_sitter::Parser;
#[test]
fn test_basic_node_refs() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_refs",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
"rules": {
"rule_a": {
"type": "SEQ",
"members": [
{
"type": "REF",
"value": "ref_1",
"content": {
"type": "STRING",
"value": "child-1"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "child-2"
},
{
"type": "BLANK"
}
]
},
{
"type": "REF",
"value": "ref_2",
"content": {
"type": "STRING",
"value": "child-3"
}
}
]
}
}
}
"#,
)
.unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0));
assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2));
}

View file

@ -167,6 +167,79 @@ fn test_node_child() {
assert_eq!(tree.root_node().parent(), None);
}
#[test]
fn test_node_children() {
let tree = parse_json_example();
let mut cursor = tree.walk();
let array_node = tree.root_node().child(0).unwrap();
assert_eq!(
array_node
.children(&mut cursor)
.map(|n| n.kind())
.collect::<Vec<_>>(),
&["[", "number", ",", "false", ",", "object", "]",]
);
assert_eq!(
array_node
.named_children(&mut cursor)
.map(|n| n.kind())
.collect::<Vec<_>>(),
&["number", "false", "object"]
);
let object_node = array_node
.named_children(&mut cursor)
.find(|n| n.kind() == "object")
.unwrap();
assert_eq!(
object_node
.children(&mut cursor)
.map(|n| n.kind())
.collect::<Vec<_>>(),
&["{", "pair", "}",]
);
}
#[test]
fn test_node_children_by_field_name() {
let mut parser = Parser::new();
parser.set_language(get_language("python")).unwrap();
let source = "
if one:
a()
elif two:
b()
elif three:
c()
elif four:
d()
";
let tree = parser.parse(source, None).unwrap();
let node = tree.root_node().child(0).unwrap();
assert_eq!(node.kind(), "if_statement");
let mut cursor = tree.walk();
let alternatives = node.children_by_field_name("alternative", &mut cursor);
let alternative_texts =
alternatives.map(|n| &source[n.child_by_field_name("condition").unwrap().byte_range()]);
assert_eq!(
alternative_texts.collect::<Vec<_>>(),
&["two", "three", "four",]
);
}
#[test]
fn test_node_parent_of_child_by_field_name() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let tree = parser.parse("foo(a().b[0].c.d.e())", None).unwrap();
let call_node = tree.root_node().named_child(0).unwrap().named_child(0).unwrap();
assert_eq!(call_node.kind(), "call_expression");
// Regression test - when a field points to a hidden node (in this case, `_expression`)
// the hidden node should not be added to the node parent cache.
assert_eq!(call_node.child_by_field_name("function").unwrap().parent(), Some(call_node));
}
#[test]
fn test_node_named_child() {
let tree = parse_json_example();
@ -627,6 +700,63 @@ fn test_node_is_named_but_aliased_as_anonymous() {
assert_eq!(root_node.named_child(0).unwrap().kind(), "c");
}
#[test]
fn test_node_numeric_symbols_respect_simple_aliases() {
let mut parser = Parser::new();
parser.set_language(get_language("python")).unwrap();
// Example 1:
// Python argument lists can contain "splat" arguments, which are not allowed within
// other expressions. This includes `parenthesized_list_splat` nodes like `(*b)`. These
// `parenthesized_list_splat` nodes are aliased as `parenthesized_expression`. Their numeric
// `symbol`, aka `kind_id` should match that of a normal `parenthesized_expression`.
let tree = parser.parse("(a((*b)))", None).unwrap();
let root = tree.root_node();
assert_eq!(
root.to_sexp(),
"(module (expression_statement (parenthesized_expression (call function: (identifier) arguments: (argument_list (parenthesized_expression (list_splat (identifier))))))))",
);
let outer_expr_node = root.child(0).unwrap().child(0).unwrap();
assert_eq!(outer_expr_node.kind(), "parenthesized_expression");
let inner_expr_node = outer_expr_node
.named_child(0)
.unwrap()
.child_by_field_name("arguments")
.unwrap()
.named_child(0)
.unwrap();
assert_eq!(inner_expr_node.kind(), "parenthesized_expression");
assert_eq!(inner_expr_node.kind_id(), outer_expr_node.kind_id());
// Example 2:
// Ruby handles the unary (negative) and binary (minus) `-` operators using two different
// tokens. One or more of these is an external token that's aliased as `-`. Their numeric
// kind ids should match.
parser.set_language(get_language("ruby")).unwrap();
let tree = parser.parse("-a - b", None).unwrap();
let root = tree.root_node();
assert_eq!(
root.to_sexp(),
"(program (binary left: (unary operand: (identifier)) right: (identifier)))",
);
let binary_node = root.child(0).unwrap();
assert_eq!(binary_node.kind(), "binary");
let unary_minus_node = binary_node
.child_by_field_name("left")
.unwrap()
.child(0)
.unwrap();
assert_eq!(unary_minus_node.kind(), "-");
let binary_minus_node = binary_node.child_by_field_name("operator").unwrap();
assert_eq!(binary_minus_node.kind(), "-");
assert_eq!(unary_minus_node.kind_id(), binary_minus_node.kind_id());
}
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut result = Vec::new();
let mut visited_children = false;

View file

@ -1,13 +1,14 @@
use super::helpers::allocations;
use super::helpers::edits::ReadRecorder;
use super::helpers::fixtures::{get_language, get_test_language};
use crate::generate::generate_parser_for_grammar;
use crate::parse::{perform_edit, Edit};
use std::sync::atomic::{AtomicUsize, Ordering};
use std::{thread, time};
use tree_sitter::{InputEdit, LogType, Parser, Point, Range};
use tree_sitter::{IncludedRangesError, InputEdit, LogType, Parser, Point, Range};
#[test]
fn test_basic_parsing() {
fn test_parsing_simple_string() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
@ -26,7 +27,11 @@ fn test_basic_parsing() {
assert_eq!(
root_node.to_sexp(),
"(source_file (struct_item (type_identifier) (field_declaration_list)) (function_item (identifier) (parameters) (block)))"
concat!(
"(source_file ",
"(struct_item name: (type_identifier) body: (field_declaration_list)) ",
"(function_item name: (identifier) parameters: (parameters) body: (block)))"
)
);
let struct_node = root_node.child(0).unwrap();
@ -118,7 +123,17 @@ fn test_parsing_with_custom_utf8_input() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
concat!(
"(source_file ",
"(function_item ",
"(visibility_modifier) ",
"name: (identifier) ",
"parameters: (parameters) ",
"body: (block (integer_literal))))"
)
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
@ -154,7 +169,10 @@ fn test_parsing_with_custom_utf16_input() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
assert_eq!(root.kind(), "source_file");
assert_eq!(root.has_error(), false);
assert_eq!(root.child(0).unwrap().kind(), "function_item");
@ -175,7 +193,10 @@ fn test_parsing_with_callback_returning_owned_strings() {
.unwrap();
let root = tree.root_node();
assert_eq!(root.to_sexp(), "(source_file (function_item (visibility_modifier) (identifier) (parameters) (block (integer_literal))))");
assert_eq!(
root.to_sexp(),
"(source_file (function_item (visibility_modifier) name: (identifier) parameters: (parameters) body: (block (integer_literal))))"
);
}
#[test]
@ -192,7 +213,7 @@ fn test_parsing_text_with_byte_order_mark() {
.unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 2);
@ -200,7 +221,7 @@ fn test_parsing_text_with_byte_order_mark() {
let mut tree = parser.parse("\u{FEFF}fn a() {}", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 3);
@ -216,7 +237,7 @@ fn test_parsing_text_with_byte_order_mark() {
let mut tree = parser.parse(" \u{FEFF}fn a() {}", Some(&tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (ERROR (UNEXPECTED 65279)) (function_item (identifier) (parameters) (block)))"
"(source_file (ERROR (UNEXPECTED 65279)) (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 1);
@ -232,11 +253,52 @@ fn test_parsing_text_with_byte_order_mark() {
let tree = parser.parse("\u{FEFF}fn a() {}", Some(&tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(source_file (function_item (identifier) (parameters) (block)))"
"(source_file (function_item name: (identifier) parameters: (parameters) body: (block)))"
);
assert_eq!(tree.root_node().start_byte(), 3);
}
#[test]
fn test_parsing_invalid_chars_at_eof() {
let mut parser = Parser::new();
parser.set_language(get_language("json")).unwrap();
let tree = parser.parse(b"\xdf", None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(ERROR (UNEXPECTED INVALID))");
}
#[test]
fn test_parsing_unexpected_null_characters_within_source() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let tree = parser.parse(b"var \0 something;", None).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
"(program (variable_declaration (ERROR (UNEXPECTED '\\0')) (variable_declarator name: (identifier))))"
);
}
#[test]
fn test_parsing_ends_when_input_callback_returns_empty() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
let mut i = 0;
let source = b"abcdefghijklmnoqrs";
let tree = parser
.parse_with(
&mut |offset, _| {
i += 1;
if offset >= 6 {
b""
} else {
&source[offset..usize::min(source.len(), offset + 3)]
}
},
None,
)
.unwrap();
assert_eq!(tree.root_node().end_byte(), 6);
}
// Incremental parsing
#[test]
@ -333,6 +395,18 @@ fn test_parsing_after_editing_end_of_code() {
assert_eq!(recorder.strings_read(), vec![" * ", "abc.d)",]);
}
#[test]
fn test_parsing_empty_file_with_reused_tree() {
let mut parser = Parser::new();
parser.set_language(get_language("rust")).unwrap();
let tree = parser.parse("", None);
parser.parse("", tree.as_ref());
let tree = parser.parse("\n ", None);
parser.parse("\n ", tree.as_ref());
}
// Thread safety
#[test]
@ -388,7 +462,7 @@ fn test_parsing_on_multiple_threads() {
#[test]
fn test_parsing_cancelled_by_another_thread() {
let cancellation_flag = Box::new(AtomicUsize::new(0));
let cancellation_flag = std::sync::Arc::new(AtomicUsize::new(0));
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
@ -409,9 +483,10 @@ fn test_parsing_cancelled_by_another_thread() {
);
assert!(tree.is_some());
let flag = cancellation_flag.clone();
let cancel_thread = thread::spawn(move || {
thread::sleep(time::Duration::from_millis(100));
cancellation_flag.store(1, Ordering::SeqCst);
flag.store(1, Ordering::SeqCst);
});
// Infinite input
@ -547,6 +622,56 @@ fn test_parsing_with_a_timeout_and_a_reset() {
);
}
#[test]
fn test_parsing_with_a_timeout_and_implicit_reset() {
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_timeout_micros(5);
let tree = parser.parse(
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
);
assert!(tree.is_none());
// Changing the parser's language implicitly resets, discarding
// the previous partial parse.
parser.set_language(get_language("json")).unwrap();
parser.set_timeout_micros(0);
let tree = parser.parse(
"[null, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
).unwrap();
assert_eq!(
tree.root_node()
.named_child(0)
.unwrap()
.named_child(0)
.unwrap()
.kind(),
"null"
);
});
}
#[test]
fn test_parsing_with_timeout_and_no_completion() {
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_timeout_micros(5);
let tree = parser.parse(
"[\"ok\", 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32]",
None,
);
assert!(tree.is_none());
// drop the parser when it has an unfinished parse
});
}
// Included Ranges
#[test]
@ -559,7 +684,9 @@ fn test_parsing_with_one_included_range() {
let script_content_node = html_tree.root_node().child(1).unwrap().child(1).unwrap();
assert_eq!(script_content_node.kind(), "raw_text");
parser.set_included_ranges(&[script_content_node.range()]);
parser
.set_included_ranges(&[script_content_node.range()])
.unwrap();
parser.set_language(get_language("javascript")).unwrap();
let js_tree = parser.parse(source_code, None).unwrap();
@ -599,26 +726,28 @@ fn test_parsing_with_multiple_included_ranges() {
let close_quote_node = template_string_node.child(3).unwrap();
parser.set_language(get_language("html")).unwrap();
parser.set_included_ranges(&[
Range {
start_byte: open_quote_node.end_byte(),
start_point: open_quote_node.end_position(),
end_byte: interpolation_node1.start_byte(),
end_point: interpolation_node1.start_position(),
},
Range {
start_byte: interpolation_node1.end_byte(),
start_point: interpolation_node1.end_position(),
end_byte: interpolation_node2.start_byte(),
end_point: interpolation_node2.start_position(),
},
Range {
start_byte: interpolation_node2.end_byte(),
start_point: interpolation_node2.end_position(),
end_byte: close_quote_node.start_byte(),
end_point: close_quote_node.start_position(),
},
]);
parser
.set_included_ranges(&[
Range {
start_byte: open_quote_node.end_byte(),
start_point: open_quote_node.end_position(),
end_byte: interpolation_node1.start_byte(),
end_point: interpolation_node1.start_position(),
},
Range {
start_byte: interpolation_node1.end_byte(),
start_point: interpolation_node1.end_position(),
end_byte: interpolation_node2.start_byte(),
end_point: interpolation_node2.start_position(),
},
Range {
start_byte: interpolation_node2.end_byte(),
start_point: interpolation_node2.end_position(),
end_byte: close_quote_node.start_byte(),
end_point: close_quote_node.start_position(),
},
])
.unwrap();
let html_tree = parser.parse(source_code, None).unwrap();
assert_eq!(
@ -667,6 +796,47 @@ fn test_parsing_with_multiple_included_ranges() {
);
}
#[test]
fn test_parsing_error_in_invalid_included_ranges() {
let mut parser = Parser::new();
// Ranges are not ordered
let error = parser
.set_included_ranges(&[
Range {
start_byte: 23,
end_byte: 29,
start_point: Point::new(0, 23),
end_point: Point::new(0, 29),
},
Range {
start_byte: 0,
end_byte: 5,
start_point: Point::new(0, 0),
end_point: Point::new(0, 5),
},
Range {
start_byte: 50,
end_byte: 60,
start_point: Point::new(0, 50),
end_point: Point::new(0, 60),
},
])
.unwrap_err();
assert_eq!(error, IncludedRangesError(1));
// Range ends before it starts
let error = parser
.set_included_ranges(&[Range {
start_byte: 10,
end_byte: 5,
start_point: Point::new(0, 10),
end_point: Point::new(0, 5),
}])
.unwrap_err();
assert_eq!(error, IncludedRangesError(0));
}
#[test]
fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
let source_code = "<script>a.</script>";
@ -677,12 +847,14 @@ fn test_parsing_utf16_code_with_errors_at_the_end_of_an_included_range() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_included_ranges(&[Range {
start_byte,
end_byte,
start_point: Point::new(0, start_byte),
end_point: Point::new(0, end_byte),
}]);
parser
.set_included_ranges(&[Range {
start_byte,
end_byte,
start_point: Point::new(0, start_byte),
end_point: Point::new(0, end_byte),
}])
.unwrap();
let tree = parser.parse_utf16(&utf16_source_code, None).unwrap();
assert_eq!(tree.root_node().to_sexp(), "(program (ERROR (identifier)))");
}
@ -697,20 +869,22 @@ fn test_parsing_with_external_scanner_that_uses_included_range_boundaries() {
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_included_ranges(&[
Range {
start_byte: range1_start_byte,
end_byte: range1_end_byte,
start_point: Point::new(0, range1_start_byte),
end_point: Point::new(0, range1_end_byte),
},
Range {
start_byte: range2_start_byte,
end_byte: range2_end_byte,
start_point: Point::new(0, range2_start_byte),
end_point: Point::new(0, range2_end_byte),
},
]);
parser
.set_included_ranges(&[
Range {
start_byte: range1_start_byte,
end_byte: range1_end_byte,
start_point: Point::new(0, range1_start_byte),
end_point: Point::new(0, range1_end_byte),
},
Range {
start_byte: range2_start_byte,
end_byte: range2_end_byte,
start_point: Point::new(0, range2_start_byte),
end_point: Point::new(0, range2_end_byte),
},
])
.unwrap();
let tree = parser.parse(source_code, None).unwrap();
let root = tree.root_node();
@ -758,20 +932,22 @@ fn test_parsing_with_a_newly_excluded_range() {
let directive_start = source_code.find("<%=").unwrap();
let directive_end = source_code.find("</span>").unwrap();
let source_code_end = source_code.len();
parser.set_included_ranges(&[
Range {
start_byte: 0,
end_byte: directive_start,
start_point: Point::new(0, 0),
end_point: Point::new(0, directive_start),
},
Range {
start_byte: directive_end,
end_byte: source_code_end,
start_point: Point::new(0, directive_end),
end_point: Point::new(0, source_code_end),
},
]);
parser
.set_included_ranges(&[
Range {
start_byte: 0,
end_byte: directive_start,
start_point: Point::new(0, 0),
end_point: Point::new(0, directive_start),
},
Range {
start_byte: directive_end,
end_byte: source_code_end,
start_point: Point::new(0, directive_end),
end_point: Point::new(0, source_code_end),
},
])
.unwrap();
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
assert_eq!(
@ -809,59 +985,73 @@ fn test_parsing_with_a_newly_excluded_range() {
#[test]
fn test_parsing_with_a_newly_included_range() {
let source_code = "<div><%= foo() %></div><div><%= bar() %>";
let first_code_start_index = source_code.find(" foo").unwrap();
let first_code_end_index = first_code_start_index + 7;
let second_code_start_index = source_code.find(" bar").unwrap();
let second_code_end_index = second_code_start_index + 7;
let ranges = [
Range {
start_byte: first_code_start_index,
end_byte: first_code_end_index,
start_point: Point::new(0, first_code_start_index),
end_point: Point::new(0, first_code_end_index),
},
Range {
start_byte: second_code_start_index,
end_byte: second_code_end_index,
start_point: Point::new(0, second_code_start_index),
end_point: Point::new(0, second_code_end_index),
},
];
let source_code = "<div><%= foo() %></div><span><%= bar() %></span><%= baz() %>";
let range1_start = source_code.find(" foo").unwrap();
let range2_start = source_code.find(" bar").unwrap();
let range3_start = source_code.find(" baz").unwrap();
let range1_end = range1_start + 7;
let range2_end = range2_start + 7;
let range3_end = range3_start + 7;
// Parse only the first code directive as JavaScript
let mut parser = Parser::new();
parser.set_language(get_language("javascript")).unwrap();
parser.set_included_ranges(&ranges[0..1]);
let first_tree = parser.parse(source_code, None).unwrap();
parser
.set_included_ranges(&[simple_range(range1_start, range1_end)])
.unwrap();
let tree = parser.parse(source_code, None).unwrap();
assert_eq!(
first_tree.root_node().to_sexp(),
tree.root_node().to_sexp(),
concat!(
"(program",
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
)
);
// Parse both the code directives as JavaScript, using the old tree as a reference.
parser.set_included_ranges(&ranges);
let tree = parser.parse(&source_code, Some(&first_tree)).unwrap();
// Parse both the first and third code directives as JavaScript, using the old tree as a
// reference.
parser
.set_included_ranges(&[
simple_range(range1_start, range1_end),
simple_range(range3_start, range3_end),
])
.unwrap();
let tree2 = parser.parse(&source_code, Some(&tree)).unwrap();
assert_eq!(
tree.root_node().to_sexp(),
tree2.root_node().to_sexp(),
concat!(
"(program",
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
)
);
assert_eq!(
tree.changed_ranges(&first_tree).collect::<Vec<_>>(),
vec![Range {
start_byte: first_code_end_index + 1,
end_byte: second_code_end_index + 1,
start_point: Point::new(0, first_code_end_index + 1),
end_point: Point::new(0, second_code_end_index + 1),
}]
tree2.changed_ranges(&tree).collect::<Vec<_>>(),
&[simple_range(range1_end, range3_end)]
);
// Parse all three code directives as JavaScript, using the old tree as a
// reference.
parser
.set_included_ranges(&[
simple_range(range1_start, range1_end),
simple_range(range2_start, range2_end),
simple_range(range3_start, range3_end),
])
.unwrap();
let tree3 = parser.parse(&source_code, Some(&tree)).unwrap();
assert_eq!(
tree3.root_node().to_sexp(),
concat!(
"(program",
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
" (expression_statement (call_expression function: (identifier) arguments: (arguments)))",
" (expression_statement (call_expression function: (identifier) arguments: (arguments))))",
)
);
assert_eq!(
tree3.changed_ranges(&tree2).collect::<Vec<_>>(),
&[simple_range(range2_start + 1, range2_end - 1)]
);
}
@ -899,20 +1089,22 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
// There's a missing `a` token at the beginning of the code. It must be inserted
// at the beginning of the first included range, not at {0, 0}.
let source_code = "__bc__bc__";
parser.set_included_ranges(&[
Range {
start_byte: 2,
end_byte: 4,
start_point: Point::new(0, 2),
end_point: Point::new(0, 4),
},
Range {
start_byte: 6,
end_byte: 8,
start_point: Point::new(0, 6),
end_point: Point::new(0, 8),
},
]);
parser
.set_included_ranges(&[
Range {
start_byte: 2,
end_byte: 4,
start_point: Point::new(0, 2),
end_point: Point::new(0, 4),
},
Range {
start_byte: 6,
end_byte: 8,
start_point: Point::new(0, 6),
end_point: Point::new(0, 8),
},
])
.unwrap();
let tree = parser.parse(source_code, None).unwrap();
let root = tree.root_node();
@ -923,3 +1115,12 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
assert_eq!(root.start_byte(), 2);
assert_eq!(root.child(3).unwrap().start_byte(), 4);
}
fn simple_range(start: usize, end: usize) -> Range {
Range {
start_byte: start,
end_byte: end,
start_point: Point::new(0, start),
end_point: Point::new(0, end),
}
}

View file

@ -0,0 +1,15 @@
use super::helpers::allocations;
use super::helpers::fixtures::get_language;
use tree_sitter::Parser;
#[test]
fn test_pathological_example_1() {
let language = "cpp";
let source = r#"*ss<s"ss<sqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<qssqXqss<s._<s<sq<(qqX<sqss<s.ss<sqsssq<(qss<sqss<sqss<s._<s<sq>(qqX<sqss<s.ss<sqsssq<(qss<sq&=ss<s<sqss<s._<s<sq<(qqX<sqss<s.ss<sqs"#;
allocations::record(|| {
let mut parser = Parser::new();
parser.set_language(get_language(language)).unwrap();
parser.parse(source, None).unwrap();
});
}

View file

@ -1,265 +0,0 @@
use super::helpers::fixtures::get_language;
use crate::generate::properties;
use serde_derive::Deserialize;
use serde_json;
use std::collections::HashSet;
use tree_sitter::{Parser, PropertySheet};
#[derive(Debug, Default, Deserialize, PartialEq, Eq)]
struct Properties {
a: Option<String>,
b: Option<String>,
}
#[test]
fn test_walk_with_properties_with_nth_child() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
binary_expression > identifier:nth-child(2) {
a: x;
}
binary_expression > identifier {
a: y;
}
identifier {
a: z;
}
",
),
)
.unwrap();
let source_code = "a = b || c;";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert_eq!(cursor.node().kind(), "program");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "expression_statement");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "assignment_expression");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("z".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "=");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "binary_expression");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "||");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None
}
);
}
#[test]
fn test_walk_with_properties_with_regexes() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
identifier {
&[text='^[A-Z]'] {
a: y;
}
&[text='^[A-Z_]+$'] {
a: z;
}
a: x;
}
",
),
)
.unwrap();
let source_code = "const ABC = Def(ghi);";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert_eq!(cursor.node().kind(), "program");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "lexical_declaration");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "const");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "variable_declarator");
// The later selector with a text regex overrides the earlier one.
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("z".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "=");
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "call_expression");
// The selectors with text regexes override the selector without one.
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "arguments");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "(");
// This node doesn't match either of the regexes.
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None
}
);
}
#[test]
fn test_walk_with_properties_based_on_fields() {
let language = get_language("javascript");
let property_sheet = PropertySheet::<Properties>::new(
language,
&generate_property_sheet_string(
"/some/path.css",
"
arrow_function > .parameter {
a: x;
}
function_declaration {
& > .parameters > identifier {
a: y;
}
& > .name {
b: z;
}
}
identifier {
a: w;
}
",
),
)
.unwrap();
let source_code = "function a(b) { return c => c + b; }";
let mut parser = Parser::new();
parser.set_language(language).unwrap();
let tree = parser.parse(source_code, None).unwrap();
let mut cursor = tree.walk_with_properties(&property_sheet, source_code.as_bytes());
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "function_declaration");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "function");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("w".to_string()),
b: Some("z".to_string())
}
);
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "formal_parameters");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "(");
assert_eq!(*cursor.node_properties(), Properties::default());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("y".to_string()),
b: None,
}
);
assert!(cursor.goto_parent());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "statement_block");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "return_statement");
assert!(cursor.goto_first_child());
assert!(cursor.goto_next_sibling());
assert_eq!(cursor.node().kind(), "arrow_function");
assert!(cursor.goto_first_child());
assert_eq!(cursor.node().kind(), "identifier");
assert_eq!(
*cursor.node_properties(),
Properties {
a: Some("x".to_string()),
b: None,
}
);
}
fn generate_property_sheet_string(path: &str, css: &str) -> String {
serde_json::to_string(&properties::generate_property_sheet(path, css, &HashSet::new()).unwrap())
.unwrap()
}

3027
cli/src/tests/query_test.rs Normal file

File diff suppressed because it is too large Load diff

437
cli/src/tests/tags_test.rs Normal file
View file

@ -0,0 +1,437 @@
use super::helpers::allocations;
use super::helpers::fixtures::{get_language, get_language_queries_path};
use std::ffi::CStr;
use std::ffi::CString;
use std::{fs, ptr, slice, str};
use tree_sitter::Point;
use tree_sitter_tags::c_lib as c;
use tree_sitter_tags::{Error, TagsConfiguration, TagsContext};
const PYTHON_TAG_QUERY: &'static str = r#"
(
(function_definition
name: (identifier) @name
body: (block . (expression_statement (string) @doc))) @definition.function
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(function_definition
name: (identifier) @name) @definition.function
(
(class_definition
name: (identifier) @name
body: (block
. (expression_statement (string) @doc))) @definition.class
(#strip! @doc "(^['\"\\s]*)|(['\"\\s]*$)")
)
(class_definition
name: (identifier) @name) @definition.class
(call
function: (identifier) @name) @reference.call
(call
function: (attribute
attribute: (identifier) @name)) @reference.call
"#;
const JS_TAG_QUERY: &'static str = r#"
(
(comment)* @doc .
(class_declaration
name: (identifier) @name) @definition.class
(#select-adjacent! @doc @definition.class)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(method_definition
name: (property_identifier) @name) @definition.method
(#select-adjacent! @doc @definition.method)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(
(comment)* @doc .
(function_declaration
name: (identifier) @name) @definition.function
(#select-adjacent! @doc @definition.function)
(#strip! @doc "(^[/\\*\\s]*)|([/\\*\\s]*$)")
)
(call_expression
function: (identifier) @name) @reference.call
"#;
const RUBY_TAG_QUERY: &'static str = r#"
(method
name: (_) @name) @definition.method
(method_call
method: (identifier) @name) @reference.call
(setter (identifier) @ignore)
((identifier) @name @reference.call
(#is-not? local))
"#;
#[test]
fn test_tags_python() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br#"
class Customer:
"""
Data about a customer
"""
def age(self):
'''
Get the customer's age
'''
compute_age(self.id)
}
"#;
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", "class"),
("age", "function"),
("compute_age", "call"),
]
);
assert_eq!(substr(source, &tags[0].line_range), "class Customer:");
assert_eq!(substr(source, &tags[1].line_range), "def age(self):");
assert_eq!(tags[0].docs.as_ref().unwrap(), "Data about a customer");
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
}
#[test]
fn test_tags_javascript() {
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let source = br#"
// hi
// Data about a customer.
// bla bla bla
class Customer {
/*
* Get the customer's age
*/
getAge() {
}
}
// ok
class Agent {
}
"#;
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source, &t.name_range),
t.span.clone(),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Customer", Point::new(5, 10)..Point::new(5, 18), "class",),
("getAge", Point::new(9, 8)..Point::new(9, 14), "method",),
("Agent", Point::new(15, 10)..Point::new(15, 15), "class",)
]
);
assert_eq!(
tags[0].docs.as_ref().unwrap(),
"Data about a customer.\nbla bla bla"
);
assert_eq!(tags[1].docs.as_ref().unwrap(), "Get the customer's age");
assert_eq!(tags[2].docs, None);
}
#[test]
fn test_tags_columns_measured_in_utf16_code_units() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = r#""❤️❤️❤️".hello_α_ω()"#.as_bytes();
let tag = tag_context
.generate_tags(&tags_config, source, None)
.unwrap()
.0
.next()
.unwrap()
.unwrap();
assert_eq!(substr(source, &tag.name_range), "hello_α");
assert_eq!(tag.span, Point::new(0, 21)..Point::new(0, 32));
assert_eq!(tag.utf16_column_range, 9..18);
}
#[test]
fn test_tags_ruby() {
let language = get_language("ruby");
let locals_query =
fs::read_to_string(get_language_queries_path("ruby").join("locals.scm")).unwrap();
let tags_config = TagsConfiguration::new(language, RUBY_TAG_QUERY, &locals_query).unwrap();
let source = strip_whitespace(
8,
"
b = 1
def foo=()
c = 1
# a is a method because it is not in scope
# b is a method because `b` doesn't capture variables from its containing scope
bar a, b, c
[1, 2, 3].each do |a|
# a is a parameter
# b is a method
# c is a variable, because the block captures variables from its containing scope.
baz a, b, c
end
end",
);
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), None)
.unwrap()
.0
.collect::<Result<Vec<_>, _>>()
.unwrap();
assert_eq!(
tags.iter()
.map(|t| (
substr(source.as_bytes(), &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id),
(t.span.start.row, t.span.start.column),
))
.collect::<Vec<_>>(),
&[
("foo=", "method", (2, 4)),
("bar", "call", (7, 4)),
("a", "call", (7, 8)),
("b", "call", (7, 11)),
("each", "call", (9, 14)),
("baz", "call", (13, 8)),
("b", "call", (13, 15),),
]
);
}
#[test]
fn test_tags_cancellation() {
use std::sync::atomic::{AtomicUsize, Ordering};
allocations::record(|| {
// Large javascript document
let source = (0..500)
.map(|_| "/* hi */ class A { /* ok */ b() {} }\n")
.collect::<String>();
let cancellation_flag = AtomicUsize::new(0);
let language = get_language("javascript");
let tags_config = TagsConfiguration::new(language, JS_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let tags = tag_context
.generate_tags(&tags_config, source.as_bytes(), Some(&cancellation_flag))
.unwrap();
for (i, tag) in tags.0.enumerate() {
if i == 150 {
cancellation_flag.store(1, Ordering::SeqCst);
}
if let Err(e) = tag {
assert_eq!(e, Error::Cancelled);
return;
}
}
panic!("Expected to halt tagging with an error");
});
}
#[test]
fn test_invalid_capture() {
let language = get_language("python");
let e = TagsConfiguration::new(language, "(identifier) @method", "")
.expect_err("expected InvalidCapture error");
assert_eq!(e, Error::InvalidCapture("method".to_string()));
}
#[test]
fn test_tags_with_parse_error() {
let language = get_language("python");
let tags_config = TagsConfiguration::new(language, PYTHON_TAG_QUERY, "").unwrap();
let mut tag_context = TagsContext::new();
let source = br#"
class Fine: pass
class Bad
"#;
let (tags, failed) = tag_context
.generate_tags(&tags_config, source, None)
.unwrap();
let newtags = tags.collect::<Result<Vec<_>, _>>().unwrap();
assert!(failed, "syntax error should have been detected");
assert_eq!(
newtags.iter()
.map(|t| (
substr(source, &t.name_range),
tags_config.syntax_type_name(t.syntax_type_id)
))
.collect::<Vec<_>>(),
&[
("Fine", "class"),
]
);
}
#[test]
fn test_tags_via_c_api() {
allocations::record(|| {
let tagger = c::ts_tagger_new();
let buffer = c::ts_tags_buffer_new();
let scope_name = "source.js";
let language = get_language("javascript");
let source_code = strip_whitespace(
12,
"
var a = 1;
// one
// two
// three
function b() {
}
// four
// five
class C extends D {
}
b(a);",
);
let c_scope_name = CString::new(scope_name).unwrap();
let result = c::ts_tagger_add_language(
tagger,
c_scope_name.as_ptr(),
language,
JS_TAG_QUERY.as_ptr(),
ptr::null(),
JS_TAG_QUERY.len() as u32,
0,
);
assert_eq!(result, c::TSTagsError::Ok);
let result = c::ts_tagger_tag(
tagger,
c_scope_name.as_ptr(),
source_code.as_ptr(),
source_code.len() as u32,
buffer,
ptr::null(),
);
assert_eq!(result, c::TSTagsError::Ok);
let tags = unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_tags(buffer),
c::ts_tags_buffer_tags_len(buffer) as usize,
)
};
let docs = str::from_utf8(unsafe {
slice::from_raw_parts(
c::ts_tags_buffer_docs(buffer) as *const u8,
c::ts_tags_buffer_docs_len(buffer) as usize,
)
})
.unwrap();
let syntax_types: Vec<&str> = unsafe {
let mut len: u32 = 0;
let ptr =
c::ts_tagger_syntax_kinds_for_scope_name(tagger, c_scope_name.as_ptr(), &mut len);
slice::from_raw_parts(ptr, len as usize)
.iter()
.map(|i| CStr::from_ptr(*i).to_str().unwrap())
.collect()
};
assert_eq!(
tags.iter()
.map(|tag| (
syntax_types[tag.syntax_type_id as usize],
&source_code[tag.name_start_byte as usize..tag.name_end_byte as usize],
&source_code[tag.line_start_byte as usize..tag.line_end_byte as usize],
&docs[tag.docs_start_byte as usize..tag.docs_end_byte as usize],
))
.collect::<Vec<_>>(),
&[
("function", "b", "function b() {", "one\ntwo\nthree"),
("class", "C", "class C extends D {", "four\nfive"),
("call", "b", "b(a);", "")
]
);
c::ts_tags_buffer_delete(buffer);
c::ts_tagger_delete(tagger);
});
}
fn substr<'a>(source: &'a [u8], range: &std::ops::Range<usize>) -> &'a str {
std::str::from_utf8(&source[range.clone()]).unwrap()
}
fn strip_whitespace(indent: usize, s: &str) -> String {
s.lines()
.skip(1)
.map(|line| &line[line.len().min(indent)..])
.collect::<Vec<_>>()
.join("\n")
}

View file

@ -0,0 +1,64 @@
use super::helpers::fixtures::{get_highlight_config, get_language, test_loader};
use crate::query_testing::{parse_position_comments, Assertion};
use crate::test_highlight::get_highlight_positions;
use tree_sitter::{Parser, Point};
use tree_sitter_highlight::{Highlight, Highlighter};
#[test]
fn test_highlight_test_with_basic_test() {
let language = get_language("javascript");
let config = get_highlight_config(
"javascript",
Some("injections.scm"),
&[
"function".to_string(),
"variable.parameter".to_string(),
"keyword".to_string(),
],
);
let source = [
"var abc = function(d) {",
" // ^ function",
" // ^ keyword",
" return d + e;",
" // ^ variable.parameter",
"};",
]
.join("\n");
let assertions =
parse_position_comments(&mut Parser::new(), language, source.as_bytes()).unwrap();
assert_eq!(
assertions,
&[
Assertion {
position: Point::new(0, 5),
expected_capture_name: "function".to_string()
},
Assertion {
position: Point::new(0, 11),
expected_capture_name: "keyword".to_string()
},
Assertion {
position: Point::new(3, 9),
expected_capture_name: "variable.parameter".to_string()
},
]
);
let mut highlighter = Highlighter::new();
let highlight_positions =
get_highlight_positions(test_loader(), &mut highlighter, &config, source.as_bytes())
.unwrap();
assert_eq!(
highlight_positions,
&[
(Point::new(0, 0), Point::new(0, 3), Highlight(2)), // "var"
(Point::new(0, 4), Point::new(0, 7), Highlight(0)), // "abc"
(Point::new(0, 10), Point::new(0, 18), Highlight(2)), // "function"
(Point::new(0, 19), Point::new(0, 20), Highlight(1)), // "d"
(Point::new(3, 2), Point::new(3, 8), Highlight(2)), // "return"
(Point::new(3, 9), Point::new(3, 10), Highlight(1)), // "d"
]
);
}

View file

@ -1,12 +1,32 @@
use super::error::{Error, Result};
use std::io;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::Arc;
use std::thread;
use tree_sitter::Parser;
#[cfg(unix)]
use std::path::PathBuf;
#[cfg(unix)]
use std::process::{Child, ChildStdin, Command, Stdio};
use tree_sitter::Parser;
#[cfg(unix)]
const HTML_HEADER: &[u8] = b"<!DOCTYPE html>\n<style>svg { width: 100%; }</style>\n\n";
pub fn cancel_on_stdin() -> Arc<AtomicUsize> {
let result = Arc::new(AtomicUsize::new(0));
if atty::is(atty::Stream::Stdin) {
thread::spawn({
let flag = result.clone();
move || {
let mut line = String::new();
io::stdin().read_line(&mut line).unwrap();
flag.store(1, Ordering::Relaxed);
}
});
}
result
}
#[cfg(windows)]
pub struct LogSession();
@ -14,12 +34,12 @@ pub struct LogSession();
pub struct LogSession(PathBuf, Option<Child>, Option<ChildStdin>);
#[cfg(windows)]
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(_parser: &mut Parser, _path: &str) -> Result<LogSession> {
Ok(LogSession())
}
#[cfg(unix)]
pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession> {
pub fn log_graphs(parser: &mut Parser, path: &str) -> Result<LogSession> {
use std::io::Write;
let mut dot_file = std::fs::File::create(path)?;
@ -29,11 +49,13 @@ pub fn log_graphs(parser: &mut Parser, path: &str) -> std::io::Result<LogSession
.stdin(Stdio::piped())
.stdout(dot_file)
.spawn()
.expect("Failed to run Dot");
.map_err(Error::wrap(|| {
"Failed to run the `dot` command. Check that graphviz is installed."
}))?;
let dot_stdin = dot_process
.stdin
.take()
.expect("Failed to open stdin for Dot");
.ok_or_else(|| Error::new("Failed to open stdin for `dot` process.".to_string()))?;
parser.print_dot_graphs(&dot_stdin);
Ok(LogSession(
PathBuf::from(path),

View file

@ -57,9 +57,11 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
}
// Run `emcc` in a container using the `emscripten-slim` image
command.args(&["trzeci/emscripten-slim", "emcc"]);
command.args(&["emscripten/emsdk", "emcc"]);
} else {
return Error::err("You must have either emcc or docker on your PATH to run this command".to_string());
return Error::err(
"You must have either emcc or docker on your PATH to run this command".to_string(),
);
}
command.args(&[
@ -81,31 +83,22 @@ pub fn compile_language_to_wasm(language_dir: &Path, force_docker: bool) -> Resu
"src",
]);
// Find source files to pass to emscripten
let src_entries = fs::read_dir(&src_dir).map_err(Error::wrap(|| {
format!("Failed to read source directory {:?}", src_dir)
}))?;
let src = Path::new("src");
let parser_c_path = src.join("parser.c");
let scanner_c_path = src.join("scanner.c");
let scanner_cc_path = src.join("scanner.cc");
let scanner_cpp_path = src.join("scanner.cpp");
for entry in src_entries {
let entry = entry?;
let file_name = entry.file_name();
// Do not compile the node.js binding file.
if file_name
.to_str()
.map_or(false, |s| s.starts_with("binding"))
{
continue;
}
// Compile any .c, .cc, or .cpp files
if let Some(extension) = Path::new(&file_name).extension().and_then(|s| s.to_str()) {
if extension == "c" || extension == "cc" || extension == "cpp" {
command.arg(Path::new("src").join(entry.file_name()));
}
}
if language_dir.join(&scanner_cc_path).exists() {
command.arg("-xc++").arg(&scanner_cc_path);
} else if language_dir.join(&scanner_cpp_path).exists() {
command.arg("-xc++").arg(&scanner_cpp_path);
} else if language_dir.join(&scanner_c_path).exists() {
command.arg(&scanner_c_path);
}
command.arg(&parser_c_path);
let output = command
.output()
.map_err(Error::wrap(|| "Failed to run emcc command"))?;

View file

@ -1,4 +1,5 @@
<head>
<meta charset="utf-8">
<title>tree-sitter THE_LANGUAGE_NAME</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/codemirror/5.45.0/codemirror.min.css">
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/clusterize.js/0.18.0/clusterize.min.css">
@ -7,7 +8,7 @@
</head>
<body>
<div id="playground-container">
<div id="playground-container" style="visibility: hidden;">
<header>
<div class=header-item>
<bold>THE_LANGUAGE_NAME</bold>
@ -18,18 +19,31 @@
<input id="logging-checkbox" type="checkbox"></input>
</div>
<div class=header-item>
<label for="query-checkbox">query</label>
<input id="query-checkbox" type="checkbox"></input>
</div>
<div class=header-item>
<label for="update-time">parse time: </label>
<span id="update-time"></span>
</div>
</header>
<main>
<select id="language-select" style="display: none;">
<option value="parser">Parser</option>
</select>
</header>
<textarea id="code-input"></textarea>
<main>
<div id="input-pane">
<div id="code-container">
<textarea id="code-input"></textarea>
</div>
<div id="query-container" style="visibility: hidden; position: absolute;">
<textarea id="query-input"></textarea>
</div>
</div>
<div id="output-container-scroll">
<pre id="output-container" class="highlight"></pre>
@ -51,15 +65,13 @@
<style>
body {
font: Sans Serif;
margin: 0;
padding: 0;
}
#playground-container {
position: absolute;
top: 0;
bottom: 0;
left: 0;
right: 0;
width: 100%;
height: 100%;
display: flex;
flex-direction: column;
}
@ -73,24 +85,51 @@
}
main {
flex: 1;
position: relative;
}
#input-pane {
position: absolute;
top: 0;
left: 0;
bottom: 0;
right: 50%;
display: flex;
height: 100%;
flex-direction: row;
flex-direction: column;
}
#code-container, #query-container {
flex: 1;
position: relative;
overflow: hidden;
border-right: 1px solid #aaa;
border-bottom: 1px solid #aaa;
}
#output-container-scroll {
position: absolute;
top: 0;
left: 50%;
bottom: 0;
right: 0;
}
.header-item {
margin-right: 30px;
}
.CodeMirror {
width: 50%;
#playground-container .CodeMirror {
position: absolute;
top: 0;
bottom: 0;
left: 0;
right: 0;
height: 100%;
border-right: 1px solid #aaa;
}
#output-container-scroll {
width: 50%;
height: 100%;
flex: 1;
padding: 0;
overflow: auto;
}
@ -124,5 +163,9 @@
border-radius: 3px;
text-decoration: underline;
}
.query-error {
text-decoration: underline red dashed;
}
</style>
</body>

View file

@ -1,26 +1,63 @@
use super::error::Error;
use super::wasm;
use std::env;
use std::fs;
use std::net::TcpListener;
use std::path::Path;
use std::path::{Path, PathBuf};
use std::str::FromStr;
use tiny_http::{Header, Response, Server};
use webbrowser;
const HTML: &'static str = include_str!("./web_ui.html");
const PLAYGROUND_JS: &'static [u8] = include_bytes!("../../docs/assets/js/playground.js");
macro_rules! resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
include_bytes!(concat!("../../", $path)).to_vec()
}
}
#[cfg(unix)]
const LIB_JS: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.js");
#[cfg(unix)]
const LIB_WASM: &'static [u8] = include_bytes!("../../lib/binding_web/tree-sitter.wasm");
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
include_bytes!(concat!("../../", $path)).to_vec()
}
}
};
}
#[cfg(windows)]
const LIB_JS: &'static [u8] = &[];
#[cfg(windows)]
const LIB_WASM: &'static [u8] = &[];
macro_rules! optional_resource {
($name: tt, $path: tt) => {
#[cfg(TREE_SITTER_EMBED_WASM_BINDING)]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
include_bytes!(concat!("../../", $path)).to_vec()
}
}
pub fn serve(grammar_path: &Path) {
#[cfg(not(TREE_SITTER_EMBED_WASM_BINDING))]
fn $name(tree_sitter_dir: &Option<PathBuf>) -> Vec<u8> {
if let Some(tree_sitter_dir) = tree_sitter_dir {
fs::read(tree_sitter_dir.join($path)).unwrap()
} else {
Vec::new()
}
}
};
}
resource!(get_main_html, "cli/src/web_ui.html");
resource!(get_playground_js, "docs/assets/js/playground.js");
optional_resource!(get_lib_js, "lib/binding_web/tree-sitter.js");
optional_resource!(get_lib_wasm, "lib/binding_web/tree-sitter.wasm");
pub fn serve(grammar_path: &Path, open_in_browser: bool) {
let port = get_available_port().expect("Couldn't find an available port");
let url = format!("127.0.0.1:{}", port);
let server = Server::http(&url).expect("Failed to start web server");
@ -36,37 +73,42 @@ pub fn serve(grammar_path: &Path) {
)
}))
.unwrap();
if open_in_browser {
if let Err(_) = webbrowser::open(&format!("http://127.0.0.1:{}", port)) {
eprintln!("Failed to open '{}' in a web browser", url);
}
}
webbrowser::open(&format!("http://127.0.0.1:{}", port))
.map_err(Error::wrap(|| {
format!("Failed to open '{}' in a web browser", url)
}))
.unwrap();
let html = HTML
let tree_sitter_dir = env::var("TREE_SITTER_BASE_DIR").map(PathBuf::from).ok();
let main_html = String::from_utf8(get_main_html(&tree_sitter_dir))
.unwrap()
.replace("THE_LANGUAGE_NAME", &grammar_name)
.into_bytes();
let playground_js = get_playground_js(&tree_sitter_dir);
let lib_js = get_lib_js(&tree_sitter_dir);
let lib_wasm = get_lib_wasm(&tree_sitter_dir);
let html_header = Header::from_str("Content-Type: text/html").unwrap();
let js_header = Header::from_str("Content-Type: application/javascript").unwrap();
let wasm_header = Header::from_str("Content-Type: application/wasm").unwrap();
for request in server.incoming_requests() {
let res = match request.url() {
"/" => response(&html, &html_header),
"/playground.js" => response(PLAYGROUND_JS, &js_header),
"/" => response(&main_html, &html_header),
"/playground.js" => response(&playground_js, &js_header),
"/tree-sitter-parser.wasm" => response(&language_wasm, &wasm_header),
"/tree-sitter.js" => {
if cfg!(windows) {
redirect("https://tree-sitter.github.io/tree-sitter.js")
} else {
response(LIB_JS, &js_header)
response(&lib_js, &js_header)
}
}
"/tree-sitter.wasm" => {
if cfg!(windows) {
redirect("https://tree-sitter.github.io/tree-sitter.wasm")
} else {
response(LIB_WASM, &wasm_header)
response(&lib_wasm, &wasm_header)
}
}
_ => response(b"Not found", &html_header).with_status_code(404),