Merge pull request #271 from tree-sitter/node-fields
Add an API for associating field names with child nodes
This commit is contained in:
commit
77636e8fe6
52 changed files with 2600 additions and 436 deletions
|
|
@ -32,7 +32,7 @@ serde = "1.0"
|
|||
serde_derive = "1.0"
|
||||
regex-syntax = "0.6.4"
|
||||
regex = "1"
|
||||
rsass = "0.9.8"
|
||||
rsass = "^0.9.8"
|
||||
|
||||
[dependencies.tree-sitter]
|
||||
version = ">= 0.3.7"
|
||||
|
|
|
|||
|
|
@ -161,7 +161,8 @@ fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> us
|
|||
}
|
||||
|
||||
fn get_language(name: &str) -> Language {
|
||||
let src_dir = GRAMMARS_DIR.join(name).join("src");
|
||||
TEST_LOADER
|
||||
.load_language_at_path(&GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
|
||||
.load_language_at_path(&src_dir, &src_dir)
|
||||
.unwrap()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,19 +4,20 @@ use crate::error::{Error, Result};
|
|||
use crate::generate::grammars::{
|
||||
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
|
||||
};
|
||||
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{Associativity, Symbol, SymbolType};
|
||||
use crate::generate::tables::{
|
||||
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
|
||||
ProductionInfo, ProductionInfoId,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::hash_map::Entry;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::collections::hash_map::DefaultHasher;
|
||||
use std::collections::VecDeque;
|
||||
use std::u32;
|
||||
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::fmt::Write;
|
||||
use std::hash::Hasher;
|
||||
use std::u32;
|
||||
|
||||
#[derive(Clone)]
|
||||
struct AuxiliarySymbolInfo {
|
||||
|
|
@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> {
|
|||
item_set_builder: ParseItemSetBuilder<'a>,
|
||||
syntax_grammar: &'a SyntaxGrammar,
|
||||
lexical_grammar: &'a LexicalGrammar,
|
||||
variable_info: &'a Vec<VariableInfo>,
|
||||
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
|
||||
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
|
||||
parse_state_queue: VecDeque<ParseStateQueueEntry>,
|
||||
|
|
@ -47,7 +49,9 @@ struct ParseTableBuilder<'a> {
|
|||
impl<'a> ParseTableBuilder<'a> {
|
||||
fn build(mut self) -> Result<ParseTable> {
|
||||
// Ensure that the empty alias sequence has index 0.
|
||||
self.parse_table.alias_sequences.push(Vec::new());
|
||||
self.parse_table
|
||||
.production_infos
|
||||
.push(ProductionInfo::default());
|
||||
|
||||
// Add the error state at index 0.
|
||||
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
|
||||
|
|
@ -176,7 +180,7 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
precedence: item.precedence(),
|
||||
associativity: item.associativity(),
|
||||
dynamic_precedence: item.production.dynamic_precedence,
|
||||
alias_sequence_id: self.get_alias_sequence_id(item),
|
||||
production_id: self.get_production_id(item),
|
||||
}
|
||||
};
|
||||
|
||||
|
|
@ -441,13 +445,10 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.unwrap();
|
||||
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
|
||||
|
||||
let interpretions = conflicting_items
|
||||
let mut interpretions = conflicting_items
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(i, item)| {
|
||||
.map(|item| {
|
||||
let mut line = String::new();
|
||||
write!(&mut line, " {}:", i + 1).unwrap();
|
||||
|
||||
for preceding_symbol in preceding_symbols
|
||||
.iter()
|
||||
.take(preceding_symbols.len() - item.step_index as usize)
|
||||
|
|
@ -503,8 +504,9 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
.map(|i| i.0.chars().count())
|
||||
.max()
|
||||
.unwrap();
|
||||
|
||||
for (line, prec_suffix) in interpretions {
|
||||
interpretions.sort_unstable();
|
||||
for (i, (line, prec_suffix)) in interpretions.into_iter().enumerate() {
|
||||
write!(&mut msg, " {}:", i + 1).unwrap();
|
||||
msg += &line;
|
||||
if let Some(prec_suffix) = prec_suffix {
|
||||
for _ in line.chars().count()..max_interpretation_length {
|
||||
|
|
@ -518,11 +520,12 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
|
||||
let mut resolution_count = 0;
|
||||
write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
|
||||
let shift_items = conflicting_items
|
||||
let mut shift_items = conflicting_items
|
||||
.iter()
|
||||
.filter(|i| !i.is_done())
|
||||
.cloned()
|
||||
.collect::<Vec<_>>();
|
||||
shift_items.sort_unstable();
|
||||
if actual_conflict.len() > 1 {
|
||||
if shift_items.len() > 0 {
|
||||
resolution_count += 1;
|
||||
|
|
@ -645,29 +648,62 @@ impl<'a> ParseTableBuilder<'a> {
|
|||
}
|
||||
}
|
||||
|
||||
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
|
||||
let mut alias_sequence: Vec<Option<Alias>> = item
|
||||
.production
|
||||
.steps
|
||||
.iter()
|
||||
.map(|s| s.alias.clone())
|
||||
.collect();
|
||||
while alias_sequence.last() == Some(&None) {
|
||||
alias_sequence.pop();
|
||||
fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
|
||||
let mut production_info = ProductionInfo {
|
||||
alias_sequence: Vec::new(),
|
||||
field_map: BTreeMap::new(),
|
||||
};
|
||||
|
||||
for (i, step) in item.production.steps.iter().enumerate() {
|
||||
production_info.alias_sequence.push(step.alias.clone());
|
||||
if let Some(field_name) = &step.field_name {
|
||||
production_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: false,
|
||||
});
|
||||
}
|
||||
|
||||
if step.symbol.kind == SymbolType::NonTerminal
|
||||
&& !self.syntax_grammar.variables[step.symbol.index]
|
||||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let info = &self.variable_info[step.symbol.index];
|
||||
for (field_name, _) in &info.fields {
|
||||
production_info
|
||||
.field_map
|
||||
.entry(field_name.clone())
|
||||
.or_insert(Vec::new())
|
||||
.push(FieldLocation {
|
||||
index: i,
|
||||
inherited: true,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
while production_info.alias_sequence.last() == Some(&None) {
|
||||
production_info.alias_sequence.pop();
|
||||
}
|
||||
|
||||
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
|
||||
self.parse_table.max_aliased_production_length = item.production.steps.len()
|
||||
}
|
||||
|
||||
if let Some(index) = self
|
||||
.parse_table
|
||||
.alias_sequences
|
||||
.production_infos
|
||||
.iter()
|
||||
.position(|seq| *seq == alias_sequence)
|
||||
.position(|seq| *seq == production_info)
|
||||
{
|
||||
index
|
||||
} else {
|
||||
self.parse_table.alias_sequences.push(alias_sequence);
|
||||
self.parse_table.alias_sequences.len() - 1
|
||||
self.parse_table.production_infos.push(production_info);
|
||||
self.parse_table.production_infos.len() - 1
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -718,6 +754,7 @@ pub(crate) fn build_parse_table(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
inlines: &InlinedProductionMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, Vec<TokenSet>)> {
|
||||
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
|
||||
|
|
@ -734,13 +771,14 @@ pub(crate) fn build_parse_table(
|
|||
lexical_grammar,
|
||||
state_ids_to_log,
|
||||
item_set_builder,
|
||||
variable_info,
|
||||
state_ids_by_item_set: HashMap::new(),
|
||||
item_sets_by_state_id: Vec::new(),
|
||||
parse_state_queue: VecDeque::new(),
|
||||
parse_table: ParseTable {
|
||||
states: Vec::new(),
|
||||
symbols: Vec::new(),
|
||||
alias_sequences: Vec::new(),
|
||||
production_infos: Vec::new(),
|
||||
max_aliased_production_length: 0,
|
||||
},
|
||||
}
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ lazy_static! {
|
|||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
field_name: None,
|
||||
}],
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> {
|
|||
ParseAction::ShiftExtra => continue,
|
||||
ParseAction::Reduce {
|
||||
child_count: 1,
|
||||
alias_sequence_id: 0,
|
||||
production_id: 0,
|
||||
symbol,
|
||||
..
|
||||
} => {
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
mod build_lex_table;
|
||||
mod build_parse_table;
|
||||
pub(crate) mod build_lex_table;
|
||||
pub(crate) mod build_parse_table;
|
||||
mod coincident_tokens;
|
||||
mod item;
|
||||
mod item_set_builder;
|
||||
|
|
@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap;
|
|||
use crate::error::Result;
|
||||
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
|
||||
use crate::generate::nfa::{CharacterSet, NfaCursor};
|
||||
use crate::generate::node_types::VariableInfo;
|
||||
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
|
||||
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use log::info;
|
||||
|
|
@ -23,12 +24,18 @@ pub(crate) fn build_tables(
|
|||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
inlines: &InlinedProductionMap,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
|
||||
let (mut parse_table, following_tokens) =
|
||||
build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
|
||||
let (mut parse_table, following_tokens) = build_parse_table(
|
||||
syntax_grammar,
|
||||
lexical_grammar,
|
||||
inlines,
|
||||
variable_info,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
|
||||
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
|
||||
let keywords = identify_keywords(
|
||||
|
|
|
|||
|
|
@ -32,6 +32,14 @@ function blank() {
|
|||
};
|
||||
}
|
||||
|
||||
function field(name, rule) {
|
||||
return {
|
||||
type: "FIELD",
|
||||
name: name,
|
||||
content: normalize(rule)
|
||||
}
|
||||
}
|
||||
|
||||
function choice(...elements) {
|
||||
return {
|
||||
type: "CHOICE",
|
||||
|
|
@ -204,137 +212,154 @@ function RuleBuilder(ruleMap) {
|
|||
}
|
||||
|
||||
function grammar(baseGrammar, options) {
|
||||
if (!options) {
|
||||
options = baseGrammar;
|
||||
baseGrammar = {
|
||||
name: null,
|
||||
rules: {},
|
||||
extras: [normalize(/\s/)],
|
||||
conflicts: [],
|
||||
externals: [],
|
||||
inline: []
|
||||
};
|
||||
if (!options) {
|
||||
options = baseGrammar;
|
||||
baseGrammar = {
|
||||
name: null,
|
||||
rules: {},
|
||||
extras: [normalize(/\s/)],
|
||||
conflicts: [],
|
||||
externals: [],
|
||||
inline: [],
|
||||
supertypes: []
|
||||
};
|
||||
}
|
||||
|
||||
let externals = baseGrammar.externals;
|
||||
if (options.externals) {
|
||||
if (typeof options.externals !== "function") {
|
||||
throw new Error("Grammar's 'externals' property must be a function.");
|
||||
}
|
||||
|
||||
let externals = baseGrammar.externals;
|
||||
if (options.externals) {
|
||||
if (typeof options.externals !== "function") {
|
||||
throw new Error("Grammar's 'externals' property must be a function.");
|
||||
const externalsRuleBuilder = RuleBuilder(null)
|
||||
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
|
||||
|
||||
if (!Array.isArray(externalRules)) {
|
||||
throw new Error("Grammar's 'externals' property must return an array of rules.");
|
||||
}
|
||||
|
||||
externals = externalRules.map(normalize);
|
||||
}
|
||||
|
||||
const ruleMap = {};
|
||||
for (const key in options.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const key in baseGrammar.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const external of externals) {
|
||||
if (typeof external.name === 'string') {
|
||||
ruleMap[external.name] = true;
|
||||
}
|
||||
}
|
||||
|
||||
const ruleBuilder = RuleBuilder(ruleMap);
|
||||
|
||||
const name = options.name;
|
||||
if (typeof name !== "string") {
|
||||
throw new Error("Grammar's 'name' property must be a string.");
|
||||
}
|
||||
|
||||
if (!/^[a-zA-Z_]\w*$/.test(name)) {
|
||||
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
|
||||
}
|
||||
|
||||
let rules = Object.assign({}, baseGrammar.rules);
|
||||
if (options.rules) {
|
||||
if (typeof options.rules !== "object") {
|
||||
throw new Error("Grammar's 'rules' property must be an object.");
|
||||
}
|
||||
|
||||
for (const ruleName in options.rules) {
|
||||
const ruleFn = options.rules[ruleName];
|
||||
if (typeof ruleFn !== "function") {
|
||||
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
|
||||
}
|
||||
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
|
||||
}
|
||||
}
|
||||
|
||||
const externalsRuleBuilder = RuleBuilder(null)
|
||||
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
|
||||
|
||||
if (!Array.isArray(externalRules)) {
|
||||
throw new Error("Grammar's 'externals' property must return an array of rules.");
|
||||
}
|
||||
|
||||
externals = externalRules.map(normalize);
|
||||
let extras = baseGrammar.extras.slice();
|
||||
if (options.extras) {
|
||||
if (typeof options.extras !== "function") {
|
||||
throw new Error("Grammar's 'extras' property must be a function.");
|
||||
}
|
||||
|
||||
const ruleMap = {};
|
||||
for (const key in options.rules) {
|
||||
ruleMap[key] = true;
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
if (options.word) {
|
||||
word = options.word.call(ruleBuilder, ruleBuilder).name;
|
||||
if (typeof word != 'string') {
|
||||
throw new Error("Grammar's 'word' property must be a named rule.");
|
||||
}
|
||||
for (const key in baseGrammar.rules) {
|
||||
ruleMap[key] = true;
|
||||
}
|
||||
for (const external of externals) {
|
||||
if (typeof external.name === 'string') {
|
||||
ruleMap[external.name] = true;
|
||||
}
|
||||
}
|
||||
|
||||
let conflicts = baseGrammar.conflicts;
|
||||
if (options.conflicts) {
|
||||
if (typeof options.conflicts !== "function") {
|
||||
throw new Error("Grammar's 'conflicts' property must be a function.");
|
||||
}
|
||||
|
||||
const ruleBuilder = RuleBuilder(ruleMap);
|
||||
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
|
||||
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
|
||||
|
||||
const name = options.name;
|
||||
if (typeof name !== "string") {
|
||||
throw new Error("Grammar's 'name' property must be a string.");
|
||||
if (!Array.isArray(conflictRules)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
if (!/^[a-zA-Z_]\w*$/.test(name)) {
|
||||
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
|
||||
}
|
||||
|
||||
let rules = Object.assign({}, baseGrammar.rules);
|
||||
if (options.rules) {
|
||||
if (typeof options.rules !== "object") {
|
||||
throw new Error("Grammar's 'rules' property must be an object.");
|
||||
}
|
||||
|
||||
for (const ruleName in options.rules) {
|
||||
const ruleFn = options.rules[ruleName];
|
||||
if (typeof ruleFn !== "function") {
|
||||
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
|
||||
}
|
||||
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
|
||||
}
|
||||
}
|
||||
|
||||
let extras = baseGrammar.extras.slice();
|
||||
if (options.extras) {
|
||||
if (typeof options.extras !== "function") {
|
||||
throw new Error("Grammar's 'extras' property must be a function.");
|
||||
}
|
||||
|
||||
extras = options.extras
|
||||
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
|
||||
.map(normalize);
|
||||
}
|
||||
|
||||
let word = baseGrammar.word;
|
||||
if (options.word) {
|
||||
word = options.word.call(ruleBuilder, ruleBuilder).name;
|
||||
if (typeof word != 'string') {
|
||||
throw new Error("Grammar's 'word' property must be a named rule.");
|
||||
}
|
||||
}
|
||||
|
||||
let conflicts = baseGrammar.conflicts;
|
||||
if (options.conflicts) {
|
||||
if (typeof options.conflicts !== "function") {
|
||||
throw new Error("Grammar's 'conflicts' property must be a function.");
|
||||
}
|
||||
|
||||
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
|
||||
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
|
||||
|
||||
if (!Array.isArray(conflictRules)) {
|
||||
conflicts = conflictRules.map(conflictSet => {
|
||||
if (!Array.isArray(conflictSet)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
conflicts = conflictRules.map(conflictSet => {
|
||||
if (!Array.isArray(conflictSet)) {
|
||||
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
|
||||
}
|
||||
|
||||
return conflictSet.map(symbol => normalize(symbol).name);
|
||||
});
|
||||
}
|
||||
|
||||
let inline = baseGrammar.inline;
|
||||
if (options.inline) {
|
||||
if (typeof options.inline !== "function") {
|
||||
throw new Error("Grammar's 'inline' property must be a function.");
|
||||
}
|
||||
|
||||
const baseInlineRules = baseGrammar.inline.map(sym);
|
||||
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
|
||||
|
||||
if (!Array.isArray(inlineRules)) {
|
||||
throw new Error("Grammar's inline must be an array of rules.");
|
||||
}
|
||||
|
||||
inline = inlineRules.map(symbol => symbol.name);
|
||||
}
|
||||
|
||||
if (Object.keys(rules).length == 0) {
|
||||
throw new Error("Grammar must have at least one rule.");
|
||||
}
|
||||
|
||||
return {name, word, rules, extras, conflicts, externals, inline};
|
||||
return conflictSet.map(symbol => normalize(symbol).name);
|
||||
});
|
||||
}
|
||||
|
||||
let inline = baseGrammar.inline;
|
||||
if (options.inline) {
|
||||
if (typeof options.inline !== "function") {
|
||||
throw new Error("Grammar's 'inline' property must be a function.");
|
||||
}
|
||||
|
||||
const baseInlineRules = baseGrammar.inline.map(sym);
|
||||
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
|
||||
|
||||
if (!Array.isArray(inlineRules)) {
|
||||
throw new Error("Grammar's inline must be an array of rules.");
|
||||
}
|
||||
|
||||
inline = inlineRules.map(symbol => symbol.name);
|
||||
}
|
||||
|
||||
let supertypes = baseGrammar.supertypes;
|
||||
if (options.supertypes) {
|
||||
if (typeof options.supertypes !== "function") {
|
||||
throw new Error("Grammar's 'supertypes' property must be a function.");
|
||||
}
|
||||
|
||||
const baseSupertypeRules = baseGrammar.supertypes.map(sym);
|
||||
const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);
|
||||
|
||||
if (!Array.isArray(supertypeRules)) {
|
||||
throw new Error("Grammar's supertypes must be an array of rules.");
|
||||
}
|
||||
|
||||
supertypes = supertypeRules.map(symbol => symbol.name);
|
||||
}
|
||||
|
||||
if (Object.keys(rules).length == 0) {
|
||||
throw new Error("Grammar must have at least one rule.");
|
||||
}
|
||||
|
||||
return {name, word, rules, extras, conflicts, externals, inline, supertypes};
|
||||
}
|
||||
|
||||
function checkArguments(ruleCount, caller, callerName, suffix = '') {
|
||||
if (ruleCount > 1) {
|
||||
const error = new Error([
|
||||
|
|
@ -357,6 +382,7 @@ global.seq = seq;
|
|||
global.sym = sym;
|
||||
global.token = token;
|
||||
global.grammar = grammar;
|
||||
global.field = field;
|
||||
|
||||
const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
|
||||
console.log(JSON.stringify(result, null, 2));
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ pub(crate) struct InputGrammar {
|
|||
pub expected_conflicts: Vec<Vec<String>>,
|
||||
pub external_tokens: Vec<Rule>,
|
||||
pub variables_to_inline: Vec<String>,
|
||||
pub supertype_symbols: Vec<String>,
|
||||
pub word_token: Option<String>,
|
||||
}
|
||||
|
||||
|
|
@ -54,6 +55,7 @@ pub(crate) struct ProductionStep {
|
|||
pub precedence: i32,
|
||||
pub associativity: Option<Associativity>,
|
||||
pub alias: Option<Alias>,
|
||||
pub field_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
|
|
@ -87,6 +89,7 @@ pub(crate) struct SyntaxGrammar {
|
|||
pub extra_tokens: Vec<Symbol>,
|
||||
pub expected_conflicts: Vec<Vec<Symbol>>,
|
||||
pub external_tokens: Vec<ExternalToken>,
|
||||
pub supertype_symbols: Vec<Symbol>,
|
||||
pub variables_to_inline: Vec<Symbol>,
|
||||
pub word_token: Option<Symbol>,
|
||||
}
|
||||
|
|
@ -99,6 +102,7 @@ impl ProductionStep {
|
|||
precedence: 0,
|
||||
associativity: None,
|
||||
alias: None,
|
||||
field_name: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -108,6 +112,7 @@ impl ProductionStep {
|
|||
precedence,
|
||||
associativity,
|
||||
alias: self.alias,
|
||||
field_name: self.field_name,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -120,6 +125,16 @@ impl ProductionStep {
|
|||
value: value.to_string(),
|
||||
is_named,
|
||||
}),
|
||||
field_name: self.field_name,
|
||||
}
|
||||
}
|
||||
pub(crate) fn with_field_name(self, name: &str) -> Self {
|
||||
Self {
|
||||
symbol: self.symbol,
|
||||
precedence: self.precedence,
|
||||
associativity: self.associativity,
|
||||
alias: self.alias,
|
||||
field_name: Some(name.to_string()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -174,6 +189,12 @@ impl Variable {
|
|||
}
|
||||
}
|
||||
|
||||
impl VariableType {
|
||||
pub fn is_visible(&self) -> bool {
|
||||
*self == VariableType::Named || *self == VariableType::Anonymous
|
||||
}
|
||||
}
|
||||
|
||||
impl LexicalGrammar {
|
||||
pub fn variable_indices_for_nfa_states<'a>(
|
||||
&'a self,
|
||||
|
|
|
|||
|
|
@ -13,6 +13,7 @@ use std::process::{Command, Stdio};
|
|||
mod build_tables;
|
||||
mod grammars;
|
||||
mod nfa;
|
||||
mod node_types;
|
||||
mod npm_files;
|
||||
mod parse_grammar;
|
||||
mod prepare_grammar;
|
||||
|
|
@ -27,6 +28,12 @@ lazy_static! {
|
|||
.unwrap();
|
||||
}
|
||||
|
||||
struct GeneratedParser {
|
||||
name: String,
|
||||
c_code: String,
|
||||
node_types_json: String,
|
||||
}
|
||||
|
||||
pub fn generate_parser_in_directory(
|
||||
repo_path: &PathBuf,
|
||||
grammar_path: Option<&str>,
|
||||
|
|
@ -51,10 +58,16 @@ pub fn generate_parser_in_directory(
|
|||
}
|
||||
}
|
||||
|
||||
let (language_name, c_code) =
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
|
||||
let GeneratedParser {
|
||||
name: language_name,
|
||||
c_code,
|
||||
node_types_json,
|
||||
} = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
|
||||
|
||||
fs::write(&repo_src_path.join("parser.c"), c_code)
|
||||
.map_err(|e| format!("Failed to write parser.c: {}", e))?;
|
||||
fs::write(&repo_src_path.join("node-types.json"), node_types_json)
|
||||
.map_err(|e| format!("Failed to write parser.c: {}", e))?;
|
||||
fs::write(
|
||||
&repo_header_path.join("parser.h"),
|
||||
tree_sitter::PARSER_HEADER,
|
||||
|
|
@ -74,27 +87,37 @@ pub fn generate_parser_in_directory(
|
|||
|
||||
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
|
||||
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
|
||||
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
|
||||
let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())?;
|
||||
Ok((parser.name, parser.c_code))
|
||||
}
|
||||
|
||||
fn generate_parser_for_grammar_with_opts(
|
||||
grammar_json: &str,
|
||||
minimize: bool,
|
||||
state_ids_to_log: Vec<usize>,
|
||||
) -> Result<(String, String)> {
|
||||
) -> Result<GeneratedParser> {
|
||||
let input_grammar = parse_grammar(grammar_json)?;
|
||||
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
|
||||
prepare_grammar(&input_grammar)?;
|
||||
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?;
|
||||
let node_types_json = node_types::generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
);
|
||||
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
&inlines,
|
||||
minimize,
|
||||
state_ids_to_log,
|
||||
)?;
|
||||
let name = input_grammar.name;
|
||||
let c_code = render_c_code(
|
||||
&input_grammar.name,
|
||||
&name,
|
||||
parse_table,
|
||||
main_lex_table,
|
||||
keyword_lex_table,
|
||||
|
|
@ -103,7 +126,11 @@ fn generate_parser_for_grammar_with_opts(
|
|||
lexical_grammar,
|
||||
simple_aliases,
|
||||
);
|
||||
Ok((input_grammar.name, c_code))
|
||||
Ok(GeneratedParser {
|
||||
name,
|
||||
c_code,
|
||||
node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
|
||||
})
|
||||
}
|
||||
|
||||
fn load_grammar_file(grammar_path: &Path) -> Result<String> {
|
||||
|
|
|
|||
842
cli/src/generate/node_types.rs
Normal file
842
cli/src/generate/node_types.rs
Normal file
|
|
@ -0,0 +1,842 @@
|
|||
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use crate::error::{Error, Result};
|
||||
use hashbrown::HashMap;
|
||||
use serde_derive::Serialize;
|
||||
use std::collections::BTreeMap;
|
||||
use std::mem;
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub(crate) enum ChildType {
|
||||
Normal(Symbol),
|
||||
Aliased(Alias),
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct FieldInfo {
|
||||
pub required: bool,
|
||||
pub multiple: bool,
|
||||
pub types: Vec<ChildType>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct VariableInfo {
|
||||
pub fields: HashMap<String, FieldInfo>,
|
||||
pub child_types: Vec<ChildType>,
|
||||
pub has_multi_step_production: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, PartialEq, Eq, Default)]
|
||||
pub(crate) struct NodeInfoJSON {
|
||||
#[serde(rename = "type")]
|
||||
kind: String,
|
||||
named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
fields: Option<BTreeMap<String, FieldInfoJSON>>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
subtypes: Option<Vec<NodeTypeJSON>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub(crate) struct NodeTypeJSON {
|
||||
#[serde(rename = "type")]
|
||||
kind: String,
|
||||
named: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Serialize, PartialEq, Eq)]
|
||||
pub(crate) struct FieldInfoJSON {
|
||||
multiple: bool,
|
||||
required: bool,
|
||||
types: Vec<NodeTypeJSON>,
|
||||
}
|
||||
|
||||
pub(crate) fn get_variable_info(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
) -> Result<Vec<VariableInfo>> {
|
||||
let mut result = Vec::new();
|
||||
|
||||
// Determine which field names and child node types can appear directly
|
||||
// within each type of node.
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
let mut info = VariableInfo {
|
||||
fields: HashMap::new(),
|
||||
child_types: Vec::new(),
|
||||
has_multi_step_production: false,
|
||||
};
|
||||
let is_recursive = variable
|
||||
.productions
|
||||
.iter()
|
||||
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
|
||||
|
||||
for production in &variable.productions {
|
||||
if production.steps.len() > 1 {
|
||||
info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
for step in &production.steps {
|
||||
let child_type = if let Some(alias) = &step.alias {
|
||||
ChildType::Aliased(alias.clone())
|
||||
} else {
|
||||
ChildType::Normal(step.symbol)
|
||||
};
|
||||
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
});
|
||||
field_info.multiple |= is_recursive;
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if let Err(i) = info.child_types.binary_search(&child_type) {
|
||||
info.child_types.insert(i, child_type.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for production in &variable.productions {
|
||||
let production_fields: Vec<&String> = production
|
||||
.steps
|
||||
.iter()
|
||||
.filter_map(|s| s.field_name.as_ref())
|
||||
.collect();
|
||||
for (field_name, field_info) in info.fields.iter_mut() {
|
||||
if !production_fields.contains(&field_name) {
|
||||
field_info.required = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result.push(info);
|
||||
}
|
||||
|
||||
// Expand each node type's information recursively to inherit the properties of
|
||||
// hidden children.
|
||||
let mut done = false;
|
||||
while !done {
|
||||
done = true;
|
||||
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
|
||||
// Move this variable's info out of the vector so it can be modified
|
||||
// while reading from other entries of the vector.
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
for production in &variable.productions {
|
||||
for step in &production.steps {
|
||||
let child_symbol = step.symbol;
|
||||
if step.alias.is_none()
|
||||
&& child_symbol.kind == SymbolType::NonTerminal
|
||||
&& !syntax_grammar.variables[child_symbol.index]
|
||||
.kind
|
||||
.is_visible()
|
||||
{
|
||||
let child_variable_info = &result[child_symbol.index];
|
||||
|
||||
// If a hidden child can have multiple children, then this
|
||||
// node can appear to have multiple children.
|
||||
if child_variable_info.has_multi_step_production {
|
||||
variable_info.has_multi_step_production = true;
|
||||
}
|
||||
|
||||
// Inherit fields from this hidden child
|
||||
for (field_name, child_field_info) in &child_variable_info.fields {
|
||||
let field_info = variable_info
|
||||
.fields
|
||||
.entry(field_name.clone())
|
||||
.or_insert_with(|| {
|
||||
done = false;
|
||||
child_field_info.clone()
|
||||
});
|
||||
if child_field_info.multiple && !field_info.multiple {
|
||||
field_info.multiple = child_field_info.multiple;
|
||||
done = false;
|
||||
}
|
||||
if !child_field_info.required && field_info.required {
|
||||
field_info.required = child_field_info.required;
|
||||
done = false;
|
||||
}
|
||||
for child_type in &child_field_info.types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !syntax_grammar.supertype_symbols.contains(&child_symbol) {
|
||||
// Inherit child types from this hidden child
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = variable_info.child_types.binary_search(&child_type)
|
||||
{
|
||||
variable_info.child_types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
|
||||
// If any field points to this hidden child, inherit child types
|
||||
// for the field.
|
||||
if let Some(field_name) = &step.field_name {
|
||||
let field_info = variable_info.fields.get_mut(field_name).unwrap();
|
||||
for child_type in &child_variable_info.child_types {
|
||||
if let Err(i) = field_info.types.binary_search(&child_type) {
|
||||
field_info.types.insert(i, child_type.clone());
|
||||
done = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Move this variable's info back into the vector.
|
||||
result[i] = variable_info;
|
||||
}
|
||||
}
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
let variable = &syntax_grammar.variables[supertype_symbol.index];
|
||||
if variable.kind != VariableType::Hidden {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must be hidden, but `{}` is not",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
|
||||
if result[supertype_symbol.index].has_multi_step_production {
|
||||
return Err(Error::grammar(&format!(
|
||||
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
|
||||
variable.name
|
||||
)));
|
||||
}
|
||||
}
|
||||
|
||||
let child_type_is_visible = |child_type: &ChildType| match child_type {
|
||||
ChildType::Aliased(_) => true,
|
||||
ChildType::Normal(symbol) => {
|
||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
||||
return true;
|
||||
}
|
||||
let variable_kind = match symbol.kind {
|
||||
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
|
||||
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
|
||||
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
|
||||
_ => VariableType::Hidden,
|
||||
};
|
||||
variable_kind.is_visible()
|
||||
}
|
||||
};
|
||||
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
result[supertype_symbol.index]
|
||||
.child_types
|
||||
.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
for i in 0..result.len() {
|
||||
let mut variable_info = VariableInfo::default();
|
||||
mem::swap(&mut variable_info, &mut result[i]);
|
||||
|
||||
// For each field, make the `types` list more concise by replacing sets of
|
||||
// subtypes with a single supertype.
|
||||
for (_, field_info) in variable_info.fields.iter_mut() {
|
||||
for supertype_symbol in &syntax_grammar.supertype_symbols {
|
||||
if sorted_vec_replace(
|
||||
&mut field_info.types,
|
||||
&result[supertype_symbol.index].child_types,
|
||||
ChildType::Normal(*supertype_symbol),
|
||||
) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
field_info.types.retain(child_type_is_visible);
|
||||
}
|
||||
|
||||
result[i] = variable_info;
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
||||
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
|
||||
where
|
||||
T: Eq + Ord,
|
||||
{
|
||||
let mut i = 0;
|
||||
for right_elem in right.iter() {
|
||||
while left[i] < *right_elem {
|
||||
i += 1;
|
||||
if i == left.len() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
if left[i] != *right_elem {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
i = 0;
|
||||
left.retain(|left_elem| {
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
while right[i] < *left_elem {
|
||||
i += 1;
|
||||
if i == right.len() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
right[i] != *left_elem
|
||||
});
|
||||
|
||||
if let Err(i) = left.binary_search(&value) {
|
||||
left.insert(i, value);
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
pub(crate) fn generate_node_types_json(
|
||||
syntax_grammar: &SyntaxGrammar,
|
||||
lexical_grammar: &LexicalGrammar,
|
||||
simple_aliases: &AliasMap,
|
||||
variable_info: &Vec<VariableInfo>,
|
||||
) -> Vec<NodeInfoJSON> {
|
||||
let mut node_types_json = BTreeMap::new();
|
||||
|
||||
let child_type_to_node_type = |child_type: &ChildType| match child_type {
|
||||
ChildType::Aliased(alias) => NodeTypeJSON {
|
||||
kind: alias.value.clone(),
|
||||
named: alias.is_named,
|
||||
},
|
||||
ChildType::Normal(symbol) => {
|
||||
if let Some(alias) = simple_aliases.get(&symbol) {
|
||||
NodeTypeJSON {
|
||||
kind: alias.value.clone(),
|
||||
named: alias.is_named,
|
||||
}
|
||||
} else {
|
||||
match symbol.kind {
|
||||
SymbolType::NonTerminal => {
|
||||
let variable = &syntax_grammar.variables[symbol.index];
|
||||
NodeTypeJSON {
|
||||
kind: variable.name.clone(),
|
||||
named: variable.kind != VariableType::Anonymous,
|
||||
}
|
||||
}
|
||||
SymbolType::Terminal => {
|
||||
let variable = &lexical_grammar.variables[symbol.index];
|
||||
NodeTypeJSON {
|
||||
kind: variable.name.clone(),
|
||||
named: variable.kind != VariableType::Anonymous,
|
||||
}
|
||||
}
|
||||
SymbolType::External => {
|
||||
let variable = &syntax_grammar.external_tokens[symbol.index];
|
||||
NodeTypeJSON {
|
||||
kind: variable.name.clone(),
|
||||
named: variable.kind != VariableType::Anonymous,
|
||||
}
|
||||
}
|
||||
_ => panic!("Unexpected symbol type"),
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
for (i, info) in variable_info.iter().enumerate() {
|
||||
let symbol = Symbol::non_terminal(i);
|
||||
let variable = &syntax_grammar.variables[i];
|
||||
let name = simple_aliases
|
||||
.get(&Symbol::non_terminal(i))
|
||||
.map_or(&variable.name, |alias| &alias.value);
|
||||
|
||||
if syntax_grammar.supertype_symbols.contains(&symbol) {
|
||||
let node_type_json =
|
||||
node_types_json
|
||||
.entry(name.clone())
|
||||
.or_insert_with(|| NodeInfoJSON {
|
||||
kind: name.clone(),
|
||||
named: true,
|
||||
fields: None,
|
||||
subtypes: None,
|
||||
});
|
||||
let mut subtypes = info
|
||||
.child_types
|
||||
.iter()
|
||||
.map(child_type_to_node_type)
|
||||
.collect::<Vec<_>>();
|
||||
subtypes.sort_unstable();
|
||||
subtypes.dedup();
|
||||
node_type_json.subtypes = Some(subtypes);
|
||||
} else if variable.kind.is_visible() {
|
||||
let node_type_json =
|
||||
node_types_json
|
||||
.entry(name.clone())
|
||||
.or_insert_with(|| NodeInfoJSON {
|
||||
kind: name.clone(),
|
||||
named: true,
|
||||
fields: None,
|
||||
subtypes: None,
|
||||
});
|
||||
let mut fields_json = BTreeMap::new();
|
||||
for (field, field_info) in info.fields.iter() {
|
||||
let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: Vec::new(),
|
||||
});
|
||||
|
||||
field_info_json.multiple |= field_info.multiple;
|
||||
field_info_json.required &= field_info.required;
|
||||
field_info_json
|
||||
.types
|
||||
.extend(field_info.types.iter().map(child_type_to_node_type));
|
||||
field_info_json.types.sort_unstable();
|
||||
field_info_json.types.dedup();
|
||||
}
|
||||
node_type_json.fields = Some(fields_json);
|
||||
}
|
||||
}
|
||||
|
||||
let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
|
||||
|
||||
for variable in &lexical_grammar.variables {
|
||||
if variable.kind == VariableType::Named {
|
||||
result.push(NodeInfoJSON {
|
||||
kind: variable.name.clone(),
|
||||
named: true,
|
||||
fields: None,
|
||||
subtypes: None,
|
||||
});
|
||||
} else if variable.kind == VariableType::Anonymous {
|
||||
result.push(NodeInfoJSON {
|
||||
kind: variable.name.clone(),
|
||||
named: false,
|
||||
fields: None,
|
||||
subtypes: None,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::generate::grammars::{
|
||||
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
|
||||
};
|
||||
use crate::generate::prepare_grammar::prepare_grammar;
|
||||
use crate::generate::rules::Rule;
|
||||
|
||||
#[test]
|
||||
fn test_node_types_simple() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
name: String::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
supertype_symbols: vec![],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "v1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("f1".to_string(), Rule::named("v2")),
|
||||
Rule::field("f2".to_string(), Rule::string(";")),
|
||||
]),
|
||||
},
|
||||
Variable {
|
||||
name: "v2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
kind: "v1".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
fields: Some(
|
||||
vec![
|
||||
(
|
||||
"f1".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
}]
|
||||
}
|
||||
),
|
||||
(
|
||||
"f2".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
}]
|
||||
}
|
||||
),
|
||||
]
|
||||
.into_iter()
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[1],
|
||||
NodeInfoJSON {
|
||||
kind: ";".to_string(),
|
||||
named: false,
|
||||
subtypes: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[2],
|
||||
NodeInfoJSON {
|
||||
kind: "v2".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
fields: None
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_types_with_supertypes() {
|
||||
let node_types = get_node_types(InputGrammar {
|
||||
name: String::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
word_token: None,
|
||||
supertype_symbols: vec!["_v2".to_string()],
|
||||
variables: vec![
|
||||
Variable {
|
||||
name: "v1".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::field("f1".to_string(), Rule::named("_v2")),
|
||||
},
|
||||
Variable {
|
||||
name: "_v2".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
rule: Rule::choice(vec![
|
||||
Rule::named("v3"),
|
||||
Rule::named("v4"),
|
||||
Rule::string("*"),
|
||||
]),
|
||||
},
|
||||
Variable {
|
||||
name: "v3".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("x"),
|
||||
},
|
||||
Variable {
|
||||
name: "v4".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::string("y"),
|
||||
},
|
||||
],
|
||||
});
|
||||
|
||||
assert_eq!(
|
||||
node_types[0],
|
||||
NodeInfoJSON {
|
||||
kind: "_v2".to_string(),
|
||||
named: true,
|
||||
fields: None,
|
||||
subtypes: Some(vec![
|
||||
NodeTypeJSON {
|
||||
kind: "*".to_string(),
|
||||
named: false,
|
||||
},
|
||||
NodeTypeJSON {
|
||||
kind: "v3".to_string(),
|
||||
named: true,
|
||||
},
|
||||
NodeTypeJSON {
|
||||
kind: "v4".to_string(),
|
||||
named: true,
|
||||
},
|
||||
]),
|
||||
}
|
||||
);
|
||||
assert_eq!(
|
||||
node_types[1],
|
||||
NodeInfoJSON {
|
||||
kind: "v1".to_string(),
|
||||
named: true,
|
||||
subtypes: None,
|
||||
fields: Some(
|
||||
vec![(
|
||||
"f1".to_string(),
|
||||
FieldInfoJSON {
|
||||
multiple: false,
|
||||
required: true,
|
||||
types: vec![NodeTypeJSON {
|
||||
kind: "_v2".to_string(),
|
||||
named: true,
|
||||
}]
|
||||
}
|
||||
),]
|
||||
.into_iter()
|
||||
.collect()
|
||||
)
|
||||
}
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
// Required field `field1` has only one node type.
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(1))],
|
||||
}],
|
||||
},
|
||||
// Optional field `field2` can have two possible node types.
|
||||
SyntaxVariable {
|
||||
name: "rule2".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(0))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::terminal(3))
|
||||
.with_field_name("field2"),
|
||||
],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
|
||||
assert_eq!(
|
||||
variable_info[2].fields,
|
||||
vec![(
|
||||
"field2".to_string(),
|
||||
FieldInfo {
|
||||
required: false,
|
||||
multiple: false,
|
||||
types: vec![
|
||||
ChildType::Normal(Symbol::terminal(2)),
|
||||
ChildType::Normal(Symbol::terminal(3)),
|
||||
],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_inherited_fields() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1)),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
// Hidden node with fields
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
|
||||
],
|
||||
}],
|
||||
},
|
||||
],
|
||||
vec![],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::terminal(3))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_get_variable_info_with_supertypes() {
|
||||
let variable_info = get_variable_info(
|
||||
&build_syntax_grammar(
|
||||
vec![
|
||||
SyntaxVariable {
|
||||
name: "rule0".to_string(),
|
||||
kind: VariableType::Named,
|
||||
productions: vec![Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(0)),
|
||||
ProductionStep::new(Symbol::non_terminal(1))
|
||||
.with_field_name("field1"),
|
||||
ProductionStep::new(Symbol::terminal(1)),
|
||||
],
|
||||
}],
|
||||
},
|
||||
SyntaxVariable {
|
||||
name: "_rule1".to_string(),
|
||||
kind: VariableType::Hidden,
|
||||
productions: vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(2))],
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![ProductionStep::new(Symbol::terminal(3))],
|
||||
},
|
||||
],
|
||||
},
|
||||
],
|
||||
// _rule1 is a supertype
|
||||
vec![Symbol::non_terminal(1)],
|
||||
),
|
||||
&build_lexical_grammar(),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
variable_info[0].fields,
|
||||
vec![(
|
||||
"field1".to_string(),
|
||||
FieldInfo {
|
||||
required: true,
|
||||
multiple: false,
|
||||
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
|
||||
}
|
||||
)]
|
||||
.into_iter()
|
||||
.collect::<HashMap<_, _>>()
|
||||
);
|
||||
}
|
||||
|
||||
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
|
||||
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
|
||||
prepare_grammar(&grammar).unwrap();
|
||||
let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap();
|
||||
generate_node_types_json(
|
||||
&syntax_grammar,
|
||||
&lexical_grammar,
|
||||
&simple_aliases,
|
||||
&variable_info,
|
||||
)
|
||||
}
|
||||
|
||||
fn build_syntax_grammar(
|
||||
variables: Vec<SyntaxVariable>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
) -> SyntaxGrammar {
|
||||
let mut syntax_grammar = SyntaxGrammar::default();
|
||||
syntax_grammar.variables = variables;
|
||||
syntax_grammar.supertype_symbols = supertype_symbols;
|
||||
syntax_grammar
|
||||
}
|
||||
|
||||
fn build_lexical_grammar() -> LexicalGrammar {
|
||||
let mut lexical_grammar = LexicalGrammar::default();
|
||||
for i in 0..10 {
|
||||
lexical_grammar.variables.push(LexicalVariable {
|
||||
name: format!("token_{}", i),
|
||||
kind: VariableType::Named,
|
||||
implicit_precedence: 0,
|
||||
start_state: 0,
|
||||
});
|
||||
}
|
||||
lexical_grammar
|
||||
}
|
||||
}
|
||||
|
|
@ -26,6 +26,10 @@ enum RuleJSON {
|
|||
CHOICE {
|
||||
members: Vec<RuleJSON>,
|
||||
},
|
||||
FIELD {
|
||||
name: String,
|
||||
content: Box<RuleJSON>,
|
||||
},
|
||||
SEQ {
|
||||
members: Vec<RuleJSON>,
|
||||
},
|
||||
|
|
@ -67,6 +71,7 @@ struct GrammarJSON {
|
|||
externals: Option<Vec<RuleJSON>>,
|
||||
extras: Option<Vec<RuleJSON>>,
|
||||
inline: Option<Vec<String>>,
|
||||
supertypes: Option<Vec<String>>,
|
||||
word: Option<String>,
|
||||
}
|
||||
|
||||
|
|
@ -96,6 +101,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
.collect();
|
||||
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
|
||||
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
|
||||
let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new());
|
||||
|
||||
Ok(InputGrammar {
|
||||
name: grammar_json.name,
|
||||
|
|
@ -104,6 +110,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
|
|||
extra_tokens,
|
||||
expected_conflicts,
|
||||
external_tokens,
|
||||
supertype_symbols,
|
||||
variables_to_inline,
|
||||
})
|
||||
}
|
||||
|
|
@ -120,6 +127,7 @@ fn parse_rule(json: RuleJSON) -> Rule {
|
|||
RuleJSON::PATTERN { value } => Rule::Pattern(value),
|
||||
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
|
||||
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
|
||||
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
|
||||
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
|
||||
RuleJSON::REPEAT { content } => {
|
||||
|
|
|
|||
|
|
@ -235,6 +235,7 @@ mod tests {
|
|||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -149,6 +149,7 @@ mod tests {
|
|||
extra_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
|
|
|||
|
|
@ -77,6 +77,12 @@ pub(super) fn extract_tokens(
|
|||
})
|
||||
.collect();
|
||||
|
||||
let supertype_symbols = grammar
|
||||
.supertype_symbols
|
||||
.into_iter()
|
||||
.map(|symbol| symbol_replacer.replace_symbol(symbol))
|
||||
.collect();
|
||||
|
||||
let variables_to_inline = grammar
|
||||
.variables_to_inline
|
||||
.into_iter()
|
||||
|
|
@ -154,6 +160,7 @@ pub(super) fn extract_tokens(
|
|||
expected_conflicts,
|
||||
extra_tokens,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
external_tokens,
|
||||
word_token,
|
||||
},
|
||||
|
|
@ -519,6 +526,7 @@ mod test {
|
|||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,6 +11,7 @@ struct RuleFlattener {
|
|||
precedence_stack: Vec<i32>,
|
||||
associativity_stack: Vec<Associativity>,
|
||||
alias_stack: Vec<Alias>,
|
||||
field_name_stack: Vec<String>,
|
||||
}
|
||||
|
||||
impl RuleFlattener {
|
||||
|
|
@ -23,6 +24,7 @@ impl RuleFlattener {
|
|||
precedence_stack: Vec::new(),
|
||||
associativity_stack: Vec::new(),
|
||||
alias_stack: Vec::new(),
|
||||
field_name_stack: Vec::new(),
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -60,6 +62,12 @@ impl RuleFlattener {
|
|||
self.alias_stack.push(alias);
|
||||
}
|
||||
|
||||
let mut has_field_name = false;
|
||||
if let Some(field_name) = params.field_name {
|
||||
has_field_name = true;
|
||||
self.field_name_stack.push(field_name);
|
||||
}
|
||||
|
||||
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
|
||||
self.production.dynamic_precedence = params.dynamic_precedence;
|
||||
}
|
||||
|
|
@ -86,6 +94,10 @@ impl RuleFlattener {
|
|||
self.alias_stack.pop();
|
||||
}
|
||||
|
||||
if has_field_name {
|
||||
self.field_name_stack.pop();
|
||||
}
|
||||
|
||||
did_push
|
||||
}
|
||||
Rule::Symbol(symbol) => {
|
||||
|
|
@ -94,6 +106,7 @@ impl RuleFlattener {
|
|||
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
|
||||
associativity: self.associativity_stack.last().cloned(),
|
||||
alias: self.alias_stack.last().cloned(),
|
||||
field_name: self.field_name_stack.last().cloned(),
|
||||
});
|
||||
true
|
||||
}
|
||||
|
|
@ -190,6 +203,7 @@ unless they are used only as the grammar's start rule.
|
|||
expected_conflicts: grammar.expected_conflicts,
|
||||
variables_to_inline: grammar.variables_to_inline,
|
||||
external_tokens: grammar.external_tokens,
|
||||
supertype_symbols: grammar.supertype_symbols,
|
||||
word_token: grammar.word_token,
|
||||
variables,
|
||||
})
|
||||
|
|
@ -355,4 +369,42 @@ mod tests {
|
|||
}]
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flatten_grammar_with_field_names() {
|
||||
let result = flatten_variable(Variable {
|
||||
name: "test".to_string(),
|
||||
kind: VariableType::Named,
|
||||
rule: Rule::seq(vec![
|
||||
Rule::field("first-thing".to_string(), Rule::terminal(1)),
|
||||
Rule::terminal(2),
|
||||
Rule::choice(vec![
|
||||
Rule::Blank,
|
||||
Rule::field("second-thing".to_string(), Rule::terminal(3)),
|
||||
]),
|
||||
]),
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
result.productions,
|
||||
vec![
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
|
||||
ProductionStep::new(Symbol::terminal(2))
|
||||
]
|
||||
},
|
||||
Production {
|
||||
dynamic_precedence: 0,
|
||||
steps: vec![
|
||||
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
|
||||
ProductionStep::new(Symbol::terminal(2)),
|
||||
ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
|
||||
]
|
||||
},
|
||||
]
|
||||
);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -35,6 +35,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
extra_tokens.push(interner.intern_rule(extra_token)?);
|
||||
}
|
||||
|
||||
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
|
||||
for supertype_symbol_name in grammar.supertype_symbols.iter() {
|
||||
supertype_symbols.push(
|
||||
interner
|
||||
.intern_name(supertype_symbol_name)
|
||||
.ok_or_else(|| Error::undefined_symbol(supertype_symbol_name))?,
|
||||
);
|
||||
}
|
||||
|
||||
let mut expected_conflicts = Vec::new();
|
||||
for conflict in grammar.expected_conflicts.iter() {
|
||||
let mut interned_conflict = Vec::with_capacity(conflict.len());
|
||||
|
|
@ -70,6 +79,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
|
|||
extra_tokens,
|
||||
expected_conflicts,
|
||||
variables_to_inline,
|
||||
supertype_symbols,
|
||||
word_token,
|
||||
})
|
||||
}
|
||||
|
|
@ -230,6 +240,7 @@ mod tests {
|
|||
external_tokens: Vec::new(),
|
||||
expected_conflicts: Vec::new(),
|
||||
variables_to_inline: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -25,6 +25,7 @@ pub(crate) struct IntermediateGrammar<T, U> {
|
|||
expected_conflicts: Vec<Vec<Symbol>>,
|
||||
external_tokens: Vec<U>,
|
||||
variables_to_inline: Vec<Symbol>,
|
||||
supertype_symbols: Vec<Symbol>,
|
||||
word_token: Option<Symbol>,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -114,6 +114,11 @@ impl InlinedProductionMapBuilder {
|
|||
inserted_step.alias = Some(alias.clone());
|
||||
}
|
||||
}
|
||||
if let Some(field_name) = removed_step.field_name {
|
||||
for inserted_step in inserted_steps.iter_mut() {
|
||||
inserted_step.field_name = Some(field_name.clone());
|
||||
}
|
||||
}
|
||||
if let Some(last_inserted_step) = inserted_steps.last_mut() {
|
||||
if last_inserted_step.precedence == 0 {
|
||||
last_inserted_step.precedence = removed_step.precedence;
|
||||
|
|
@ -193,6 +198,7 @@ mod tests {
|
|||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
variables_to_inline: vec![Symbol::non_terminal(1)],
|
||||
variables: vec![
|
||||
|
|
@ -323,6 +329,7 @@ mod tests {
|
|||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
let inline_map = process_inlines(&grammar);
|
||||
|
|
@ -424,6 +431,7 @@ mod tests {
|
|||
expected_conflicts: Vec::new(),
|
||||
extra_tokens: Vec::new(),
|
||||
external_tokens: Vec::new(),
|
||||
supertype_symbols: Vec::new(),
|
||||
word_token: None,
|
||||
};
|
||||
|
||||
|
|
|
|||
|
|
@ -1,11 +1,14 @@
|
|||
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
|
||||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
|
||||
use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
|
||||
use super::tables::{
|
||||
AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
|
||||
};
|
||||
use core::ops::Range;
|
||||
use hashbrown::{HashMap, HashSet};
|
||||
use std::fmt::Write;
|
||||
use std::mem::swap;
|
||||
use tree_sitter::LANGUAGE_VERSION;
|
||||
|
||||
macro_rules! add {
|
||||
($this: tt, $($arg: tt)*) => {{
|
||||
|
|
@ -56,10 +59,12 @@ struct Generator {
|
|||
alias_ids: HashMap<Alias, String>,
|
||||
external_scanner_states: Vec<HashSet<usize>>,
|
||||
alias_map: HashMap<Alias, Option<Symbol>>,
|
||||
field_names: Vec<String>,
|
||||
}
|
||||
|
||||
impl Generator {
|
||||
fn generate(mut self) -> String {
|
||||
self.init();
|
||||
self.add_includes();
|
||||
self.add_pragmas();
|
||||
self.add_stats();
|
||||
|
|
@ -67,7 +72,13 @@ impl Generator {
|
|||
self.add_symbol_names_list();
|
||||
self.add_symbol_metadata_list();
|
||||
|
||||
if self.parse_table.alias_sequences.len() > 1 {
|
||||
if !self.field_names.is_empty() {
|
||||
self.add_field_name_enum();
|
||||
self.add_field_name_names_list();
|
||||
self.add_field_sequences();
|
||||
}
|
||||
|
||||
if !self.alias_ids.is_empty() {
|
||||
self.add_alias_sequences();
|
||||
}
|
||||
|
||||
|
|
@ -95,6 +106,49 @@ impl Generator {
|
|||
self.buffer
|
||||
}
|
||||
|
||||
fn init(&mut self) {
|
||||
let mut symbol_identifiers = HashSet::new();
|
||||
for i in 0..self.parse_table.symbols.len() {
|
||||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
||||
}
|
||||
|
||||
let mut field_names = Vec::new();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
for field_name in production_info.field_map.keys() {
|
||||
field_names.push(field_name);
|
||||
}
|
||||
|
||||
for alias in &production_info.alias_sequence {
|
||||
if let Some(alias) = &alias {
|
||||
let alias_kind = if alias.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
VariableType::Anonymous
|
||||
};
|
||||
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias_kind
|
||||
});
|
||||
let alias_id = if let Some(symbol) = matching_symbol {
|
||||
self.symbol_ids[&symbol].clone()
|
||||
} else if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
self.alias_map
|
||||
.entry(alias.clone())
|
||||
.or_insert(matching_symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
field_names.sort_unstable();
|
||||
field_names.dedup();
|
||||
self.field_names = field_names.into_iter().cloned().collect();
|
||||
}
|
||||
|
||||
fn add_includes(&mut self) {
|
||||
add_line!(self, "#include <tree_sitter/parser.h>");
|
||||
add_line!(self, "");
|
||||
|
|
@ -143,39 +197,7 @@ impl Generator {
|
|||
})
|
||||
.count();
|
||||
|
||||
let mut symbol_identifiers = HashSet::new();
|
||||
for i in 0..self.parse_table.symbols.len() {
|
||||
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
|
||||
}
|
||||
|
||||
for alias_sequence in &self.parse_table.alias_sequences {
|
||||
for entry in alias_sequence {
|
||||
if let Some(alias) = entry {
|
||||
let alias_kind = if alias.is_named {
|
||||
VariableType::Named
|
||||
} else {
|
||||
VariableType::Anonymous
|
||||
};
|
||||
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
|
||||
let (name, kind) = self.metadata_for_symbol(*symbol);
|
||||
name == alias.value && kind == alias_kind
|
||||
});
|
||||
let alias_id = if let Some(symbol) = matching_symbol {
|
||||
self.symbol_ids[&symbol].clone()
|
||||
} else if alias.is_named {
|
||||
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
} else {
|
||||
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
|
||||
};
|
||||
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
|
||||
self.alias_map
|
||||
.entry(alias.clone())
|
||||
.or_insert(matching_symbol);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
add_line!(self, "#define LANGUAGE_VERSION {}", 9);
|
||||
add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION);
|
||||
add_line!(
|
||||
self,
|
||||
"#define STATE_COUNT {}",
|
||||
|
|
@ -197,6 +219,7 @@ impl Generator {
|
|||
"#define EXTERNAL_TOKEN_COUNT {}",
|
||||
self.syntax_grammar.external_tokens.len()
|
||||
);
|
||||
add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
|
||||
add_line!(
|
||||
self,
|
||||
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
|
||||
|
|
@ -253,6 +276,34 @@ impl Generator {
|
|||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_field_name_enum(&mut self) {
|
||||
add_line!(self, "enum {{");
|
||||
indent!(self);
|
||||
for (i, field_name) in self.field_names.iter().enumerate() {
|
||||
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_field_name_names_list(&mut self) {
|
||||
add_line!(self, "static const char *ts_field_names[] = {{");
|
||||
indent!(self);
|
||||
add_line!(self, "[0] = NULL,");
|
||||
for field_name in &self.field_names {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = \"{}\",",
|
||||
self.field_id(field_name),
|
||||
field_name
|
||||
);
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_symbol_metadata_list(&mut self) {
|
||||
add_line!(
|
||||
self,
|
||||
|
|
@ -307,13 +358,17 @@ impl Generator {
|
|||
add_line!(
|
||||
self,
|
||||
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
|
||||
self.parse_table.alias_sequences.len()
|
||||
self.parse_table.production_infos.len()
|
||||
);
|
||||
indent!(self);
|
||||
for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) {
|
||||
for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
|
||||
if production_info.alias_sequence.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
add_line!(self, "[{}] = {{", i);
|
||||
indent!(self);
|
||||
for (j, alias) in sequence.iter().enumerate() {
|
||||
for (j, alias) in production_info.alias_sequence.iter().enumerate() {
|
||||
if let Some(alias) = alias {
|
||||
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
|
||||
}
|
||||
|
|
@ -326,6 +381,81 @@ impl Generator {
|
|||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_field_sequences(&mut self) {
|
||||
let mut flat_field_maps = vec![];
|
||||
let mut next_flat_field_map_index = 0;
|
||||
self.get_field_map_id(
|
||||
&Vec::new(),
|
||||
&mut flat_field_maps,
|
||||
&mut next_flat_field_map_index,
|
||||
);
|
||||
|
||||
let mut field_map_ids = Vec::new();
|
||||
for production_info in &self.parse_table.production_infos {
|
||||
if !production_info.field_map.is_empty() {
|
||||
let mut flat_field_map = Vec::new();
|
||||
for (field_name, locations) in &production_info.field_map {
|
||||
for location in locations {
|
||||
flat_field_map.push((field_name.clone(), *location));
|
||||
}
|
||||
}
|
||||
field_map_ids.push((
|
||||
self.get_field_map_id(
|
||||
&flat_field_map,
|
||||
&mut flat_field_maps,
|
||||
&mut next_flat_field_map_index,
|
||||
),
|
||||
flat_field_map.len(),
|
||||
));
|
||||
} else {
|
||||
field_map_ids.push((0, 0));
|
||||
}
|
||||
}
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
"static const TSFieldMapSlice ts_field_map_slices[] = {{",
|
||||
);
|
||||
indent!(self);
|
||||
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
|
||||
if length > 0 {
|
||||
add_line!(
|
||||
self,
|
||||
"[{}] = {{.index = {}, .length = {}}},",
|
||||
production_id,
|
||||
row_id,
|
||||
length
|
||||
);
|
||||
}
|
||||
}
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
"static const TSFieldMapEntry ts_field_map_entries[] = {{",
|
||||
);
|
||||
indent!(self);
|
||||
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
|
||||
add_line!(self, "[{}] =", row_index);
|
||||
indent!(self);
|
||||
for (field_name, location) in field_pairs {
|
||||
add_whitespace!(self);
|
||||
add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
|
||||
if location.inherited {
|
||||
add!(self, ", .inherited = true");
|
||||
}
|
||||
add!(self, "}},\n");
|
||||
}
|
||||
dedent!(self);
|
||||
}
|
||||
|
||||
dedent!(self);
|
||||
add_line!(self, "}};");
|
||||
add_line!(self, "");
|
||||
}
|
||||
|
||||
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
|
||||
add_line!(
|
||||
self,
|
||||
|
|
@ -686,15 +816,15 @@ impl Generator {
|
|||
symbol,
|
||||
child_count,
|
||||
dynamic_precedence,
|
||||
alias_sequence_id,
|
||||
production_id,
|
||||
..
|
||||
} => {
|
||||
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
|
||||
if dynamic_precedence != 0 {
|
||||
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
|
||||
}
|
||||
if alias_sequence_id != 0 {
|
||||
add!(self, ", .alias_sequence_id = {}", alias_sequence_id);
|
||||
if production_id != 0 {
|
||||
add!(self, ", .production_id = {}", production_id);
|
||||
}
|
||||
add!(self, ")");
|
||||
}
|
||||
|
|
@ -759,13 +889,27 @@ impl Generator {
|
|||
add_line!(self, ".lex_modes = ts_lex_modes,");
|
||||
add_line!(self, ".symbol_names = ts_symbol_names,");
|
||||
|
||||
if self.parse_table.alias_sequences.len() > 1 {
|
||||
if !self.alias_ids.is_empty() {
|
||||
add_line!(
|
||||
self,
|
||||
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
|
||||
);
|
||||
}
|
||||
|
||||
add_line!(self, ".field_count = FIELD_COUNT,");
|
||||
|
||||
if !self.field_names.is_empty() {
|
||||
add_line!(self, ".field_names = ts_field_names,");
|
||||
add_line!(
|
||||
self,
|
||||
".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices,"
|
||||
);
|
||||
add_line!(
|
||||
self,
|
||||
".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries,"
|
||||
);
|
||||
}
|
||||
|
||||
add_line!(
|
||||
self,
|
||||
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
|
||||
|
|
@ -820,6 +964,22 @@ impl Generator {
|
|||
result
|
||||
}
|
||||
|
||||
fn get_field_map_id(
|
||||
&self,
|
||||
flat_field_map: &Vec<(String, FieldLocation)>,
|
||||
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
|
||||
next_flat_field_map_index: &mut usize,
|
||||
) -> usize {
|
||||
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
|
||||
return *index;
|
||||
}
|
||||
|
||||
let result = *next_flat_field_map_index;
|
||||
flat_field_maps.push((result, flat_field_map.clone()));
|
||||
*next_flat_field_map_index += flat_field_map.len();
|
||||
result
|
||||
}
|
||||
|
||||
fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
|
||||
self.external_scanner_states
|
||||
.iter()
|
||||
|
|
@ -865,6 +1025,10 @@ impl Generator {
|
|||
self.symbol_ids.insert(symbol, id);
|
||||
}
|
||||
|
||||
fn field_id(&self, field_name: &String) -> String {
|
||||
format!("field_{}", field_name)
|
||||
}
|
||||
|
||||
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
|
||||
match symbol.kind {
|
||||
SymbolType::End => ("end", VariableType::Hidden),
|
||||
|
|
@ -996,6 +1160,7 @@ pub(crate) fn render_c_code(
|
|||
alias_ids: HashMap::new(),
|
||||
external_scanner_states: Vec::new(),
|
||||
alias_map: HashMap::new(),
|
||||
field_names: Vec::new(),
|
||||
}
|
||||
.generate()
|
||||
}
|
||||
|
|
|
|||
|
|
@ -32,6 +32,7 @@ pub(crate) struct MetadataParams {
|
|||
pub is_active: bool,
|
||||
pub is_main_token: bool,
|
||||
pub alias: Option<Alias>,
|
||||
pub field_name: Option<String>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
|
||||
|
|
@ -57,6 +58,12 @@ pub(crate) enum Rule {
|
|||
}
|
||||
|
||||
impl Rule {
|
||||
pub fn field(name: String, content: Rule) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.field_name = Some(name);
|
||||
})
|
||||
}
|
||||
|
||||
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
|
||||
add_metadata(content, move |params| {
|
||||
params.alias = Some(Alias { is_named, value });
|
||||
|
|
|
|||
|
|
@ -1,8 +1,9 @@
|
|||
use super::nfa::CharacterSet;
|
||||
use super::rules::{Alias, Associativity, Symbol};
|
||||
use hashbrown::HashMap;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
pub(crate) type AliasSequenceId = usize;
|
||||
pub(crate) type ProductionInfoId = usize;
|
||||
pub(crate) type ParseStateId = usize;
|
||||
pub(crate) type LexStateId = usize;
|
||||
|
||||
|
|
@ -21,7 +22,7 @@ pub(crate) enum ParseAction {
|
|||
precedence: i32,
|
||||
dynamic_precedence: i32,
|
||||
associativity: Option<Associativity>,
|
||||
alias_sequence_id: AliasSequenceId,
|
||||
production_id: ProductionInfoId,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -39,11 +40,23 @@ pub(crate) struct ParseState {
|
|||
pub unfinished_item_signature: u64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct FieldLocation {
|
||||
pub index: usize,
|
||||
pub inherited: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default, PartialEq, Eq)]
|
||||
pub(crate) struct ProductionInfo {
|
||||
pub alias_sequence: Vec<Option<Alias>>,
|
||||
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
pub(crate) struct ParseTable {
|
||||
pub states: Vec<ParseState>,
|
||||
pub symbols: Vec<Symbol>,
|
||||
pub alias_sequences: Vec<Vec<Option<Alias>>>,
|
||||
pub production_infos: Vec<ProductionInfo>,
|
||||
pub max_aliased_production_length: usize,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -95,6 +95,9 @@ pub fn parse_file_at_path(
|
|||
}
|
||||
let start = node.start_position();
|
||||
let end = node.end_position();
|
||||
if let Some(field_name) = cursor.field_name() {
|
||||
write!(&mut stdout, "{}: ", field_name)?;
|
||||
}
|
||||
write!(
|
||||
&mut stdout,
|
||||
"({} [{}, {}] - [{}, {}]",
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ use crate::error::{Error, Result};
|
|||
use log::info;
|
||||
use rsass;
|
||||
use rsass::sass::Value;
|
||||
use rsass::selectors::SelectorPart;
|
||||
use serde_derive::Serialize;
|
||||
use std::collections::hash_map::Entry;
|
||||
use std::collections::{btree_map, BTreeMap, HashMap, VecDeque};
|
||||
|
|
@ -27,11 +28,12 @@ type PropertySetId = usize;
|
|||
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
struct SelectorStep {
|
||||
kind: String,
|
||||
is_named: bool,
|
||||
is_immediate: bool,
|
||||
kind: Option<String>,
|
||||
field: Option<String>,
|
||||
child_index: Option<usize>,
|
||||
text_pattern: Option<String>,
|
||||
is_named: Option<bool>,
|
||||
is_immediate: bool,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Eq)]
|
||||
|
|
@ -175,6 +177,7 @@ impl Builder {
|
|||
transitions
|
||||
.entry(PropertyTransitionJSON {
|
||||
kind: step.kind.clone(),
|
||||
field: step.field.clone(),
|
||||
named: step.is_named,
|
||||
index: step.child_index,
|
||||
text: step.text_pattern.clone(),
|
||||
|
|
@ -203,11 +206,11 @@ impl Builder {
|
|||
let mut transition_list: Vec<(PropertyTransitionJSON, u32)> =
|
||||
transitions.into_iter().collect();
|
||||
transition_list.sort_by(|a, b| {
|
||||
a.0.kind
|
||||
.cmp(&b.0.kind)
|
||||
.then_with(|| a.0.named.cmp(&b.0.named))
|
||||
.then_with(|| transition_specificity(&b.0).cmp(&transition_specificity(&a.0)))
|
||||
(transition_specificity(&b.0).cmp(&transition_specificity(&a.0)))
|
||||
.then_with(|| b.1.cmp(&a.1))
|
||||
.then_with(|| a.0.kind.cmp(&b.0.kind))
|
||||
.then_with(|| a.0.named.cmp(&b.0.named))
|
||||
.then_with(|| a.0.field.cmp(&b.0.field))
|
||||
});
|
||||
|
||||
// For eacy possible state transition, compute the set of items in that transition's
|
||||
|
|
@ -249,9 +252,7 @@ impl Builder {
|
|||
// rules will override less specific selectors and earlier rules.
|
||||
let mut properties = PropertySet::new();
|
||||
selector_matches.sort_unstable_by(|a, b| {
|
||||
a.specificity
|
||||
.cmp(&b.specificity)
|
||||
.then_with(|| a.rule_id.cmp(&b.rule_id))
|
||||
(a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id))
|
||||
});
|
||||
selector_matches.dedup();
|
||||
for selector_match in selector_matches {
|
||||
|
|
@ -313,6 +314,7 @@ impl Builder {
|
|||
transition.state_id = *replacement;
|
||||
}
|
||||
}
|
||||
state.transitions.dedup();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -347,8 +349,14 @@ impl Builder {
|
|||
}
|
||||
|
||||
fn selector_specificity(selector: &Selector) -> u32 {
|
||||
let mut result = selector.0.len() as u32;
|
||||
let mut result = 0;
|
||||
for step in &selector.0 {
|
||||
if step.kind.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if step.field.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if step.child_index.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
|
|
@ -361,6 +369,12 @@ fn selector_specificity(selector: &Selector) -> u32 {
|
|||
|
||||
fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
|
||||
let mut result = 0;
|
||||
if transition.kind.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if transition.field.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
if transition.index.is_some() {
|
||||
result += 1;
|
||||
}
|
||||
|
|
@ -371,19 +385,37 @@ fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
|
|||
}
|
||||
|
||||
fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool {
|
||||
step.kind == transition.kind
|
||||
&& step.is_named == transition.named
|
||||
&& (step.child_index == transition.index || step.child_index.is_none())
|
||||
&& (step.text_pattern == transition.text || step.text_pattern.is_none())
|
||||
step.kind
|
||||
.as_ref()
|
||||
.map_or(true, |kind| transition.kind.as_ref() == Some(kind))
|
||||
&& step
|
||||
.is_named
|
||||
.map_or(true, |named| transition.named == Some(named))
|
||||
&& step
|
||||
.field
|
||||
.as_ref()
|
||||
.map_or(true, |field| transition.field.as_ref() == Some(field))
|
||||
&& step
|
||||
.child_index
|
||||
.map_or(true, |index| transition.index == Some(index))
|
||||
&& step
|
||||
.text_pattern
|
||||
.as_ref()
|
||||
.map_or(true, |text| transition.text.as_ref() == Some(text))
|
||||
}
|
||||
|
||||
impl fmt::Debug for SelectorStep {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
write!(f, "(")?;
|
||||
if self.is_named {
|
||||
write!(f, "{}", self.kind)?;
|
||||
} else {
|
||||
write!(f, "\"{}\"", self.kind)?;
|
||||
if let Some(kind) = &self.kind {
|
||||
if self.is_named.unwrap() {
|
||||
write!(f, "{}", kind)?;
|
||||
} else {
|
||||
write!(f, "[token='{}']", kind)?;
|
||||
}
|
||||
}
|
||||
if let Some(field) = &self.field {
|
||||
write!(f, ".{}", field)?;
|
||||
}
|
||||
if let Some(n) = self.child_index {
|
||||
write!(f, ":nth-child({})", n)?;
|
||||
|
|
@ -407,7 +439,7 @@ impl fmt::Debug for Selector {
|
|||
}
|
||||
write!(f, "{:?}", step)?;
|
||||
}
|
||||
write!(f, "]")?;
|
||||
write!(f, " (specificity: {})]", selector_specificity(self))?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
|
@ -484,52 +516,134 @@ fn parse_sass_items(
|
|||
rsass::Item::Rule(selectors, items) => {
|
||||
let mut full_selectors = Vec::new();
|
||||
for prefix in selector_prefixes {
|
||||
let mut part_string = String::new();
|
||||
let mut next_step_is_immediate = false;
|
||||
for selector in &selectors.s {
|
||||
let mut prefix = prefix.clone();
|
||||
let mut operator_was_immediate: Option<bool> = Some(false);
|
||||
for part in &selector.0 {
|
||||
part_string.clear();
|
||||
write!(&mut part_string, "{}", part).unwrap();
|
||||
let part_string = part_string.trim();
|
||||
if !part_string.is_empty() {
|
||||
if part_string == "&" {
|
||||
continue;
|
||||
} else if part_string.starts_with(":nth-child(") {
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
if let Ok(index) = usize::from_str_radix(
|
||||
&part_string[11..(part_string.len() - 1)],
|
||||
10,
|
||||
) {
|
||||
last_step.child_index = Some(index);
|
||||
match part {
|
||||
SelectorPart::BackRef => {
|
||||
operator_was_immediate = None;
|
||||
}
|
||||
SelectorPart::Simple(value) => {
|
||||
if let Some(value) = value.single_raw() {
|
||||
for (i, value) in value.split('.').enumerate() {
|
||||
if value.is_empty() {
|
||||
continue;
|
||||
}
|
||||
let value = value.to_string();
|
||||
check_node_kind(&value)?;
|
||||
if i > 0 {
|
||||
if let Some(immediate) = operator_was_immediate {
|
||||
prefix.push(SelectorStep {
|
||||
kind: None,
|
||||
field: Some(value),
|
||||
is_named: None,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: immediate,
|
||||
})
|
||||
} else {
|
||||
prefix.last_mut().unwrap().field = Some(value);
|
||||
}
|
||||
} else {
|
||||
if let Some(immediate) = operator_was_immediate {
|
||||
prefix.push(SelectorStep {
|
||||
kind: Some(value.to_string()),
|
||||
field: None,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_named: Some(true),
|
||||
is_immediate: immediate,
|
||||
});
|
||||
} else {
|
||||
return Err(Error(format!("Node type {} must be separated by whitespace or the `>` operator", value)));
|
||||
}
|
||||
}
|
||||
operator_was_immediate = None;
|
||||
}
|
||||
} else {
|
||||
return Err(interpolation_error());
|
||||
}
|
||||
operator_was_immediate = None;
|
||||
}
|
||||
SelectorPart::Attribute { name, val, .. } => {
|
||||
match name.single_raw() {
|
||||
None => return Err(interpolation_error()),
|
||||
Some("text") => {
|
||||
if operator_was_immediate.is_some() {
|
||||
return Err(Error("The `text` attribute must be used in combination with a node type or field".to_string()));
|
||||
}
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
last_step.text_pattern =
|
||||
Some(get_string_value(val.to_string())?)
|
||||
}
|
||||
}
|
||||
Some("token") => {
|
||||
if let Some(immediate) = operator_was_immediate {
|
||||
prefix.push(SelectorStep {
|
||||
kind: Some(get_string_value(val.to_string())?),
|
||||
field: None,
|
||||
is_named: Some(false),
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: immediate,
|
||||
});
|
||||
operator_was_immediate = None;
|
||||
} else {
|
||||
return Err(Error("The `token` attribute canot be used in combination with a node type".to_string()));
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(Error(format!(
|
||||
"Unsupported attribute {}",
|
||||
part
|
||||
)));
|
||||
}
|
||||
}
|
||||
} else if part_string.starts_with("[text=") {
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
last_step.text_pattern = Some(
|
||||
part_string[7..(part_string.len() - 2)].to_string(),
|
||||
)
|
||||
}
|
||||
SelectorPart::PseudoElement { .. } => {
|
||||
return Err(Error(
|
||||
"Pseudo elements are not supported".to_string(),
|
||||
));
|
||||
}
|
||||
SelectorPart::Pseudo { name, arg } => match name.single_raw() {
|
||||
None => return Err(interpolation_error()),
|
||||
Some("nth-child") => {
|
||||
if let Some(arg) = arg {
|
||||
let mut arg_str = String::new();
|
||||
write!(&mut arg_str, "{}", arg).unwrap();
|
||||
if let Some(last_step) = prefix.last_mut() {
|
||||
if let Ok(i) = usize::from_str_radix(&arg_str, 10) {
|
||||
last_step.child_index = Some(i);
|
||||
} else {
|
||||
return Err(Error(format!(
|
||||
"Invalid child index {}",
|
||||
arg
|
||||
)));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
return Err(Error(format!(
|
||||
"Unsupported pseudo-class {}",
|
||||
part
|
||||
)));
|
||||
}
|
||||
},
|
||||
SelectorPart::Descendant => {
|
||||
operator_was_immediate = Some(false);
|
||||
}
|
||||
SelectorPart::RelOp(operator) => {
|
||||
let operator = *operator as char;
|
||||
if operator == '>' {
|
||||
operator_was_immediate = Some(true);
|
||||
} else {
|
||||
return Err(Error(format!(
|
||||
"Unsupported operator {}",
|
||||
operator
|
||||
)));
|
||||
}
|
||||
} else if part_string == ">" {
|
||||
next_step_is_immediate = true;
|
||||
} else if part_string.starts_with("[token=") {
|
||||
prefix.push(SelectorStep {
|
||||
kind: part_string[8..(part_string.len() - 2)].to_string(),
|
||||
is_named: false,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: next_step_is_immediate,
|
||||
});
|
||||
next_step_is_immediate = false;
|
||||
} else {
|
||||
prefix.push(SelectorStep {
|
||||
kind: part_string.to_string(),
|
||||
is_named: true,
|
||||
child_index: None,
|
||||
text_pattern: None,
|
||||
is_immediate: next_step_is_immediate,
|
||||
});
|
||||
next_step_is_immediate = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -597,7 +711,7 @@ fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
|
|||
if let Some(s) = s.single_raw() {
|
||||
Ok(PropertyValue::String(s.to_string()))
|
||||
} else {
|
||||
Err(Error("String interpolation is not supported".to_string()))
|
||||
Err(interpolation_error())
|
||||
}
|
||||
}
|
||||
Value::Call(name, raw_args) => {
|
||||
|
|
@ -665,6 +779,29 @@ fn resolve_path(base: &Path, p: &str) -> Result<PathBuf> {
|
|||
Err(Error(format!("Could not resolve import path `{}`", p)))
|
||||
}
|
||||
|
||||
fn check_node_kind(name: &String) -> Result<()> {
|
||||
for c in name.chars() {
|
||||
if !c.is_alphanumeric() && c != '_' {
|
||||
return Err(Error(format!("Invalid identifier '{}'", name)));
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn get_string_value(mut s: String) -> Result<String> {
|
||||
if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') {
|
||||
s.pop();
|
||||
s.remove(0);
|
||||
Ok(s)
|
||||
} else {
|
||||
Err(Error(format!("Unsupported string literal {}", s)))
|
||||
}
|
||||
}
|
||||
|
||||
fn interpolation_error() -> Error {
|
||||
Error("String interpolation is not supported".to_string())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
|
@ -801,24 +938,91 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "abc"),
|
||||
*query(&sheet, vec![("f1", None, true, 0)], "abc"),
|
||||
props(&[("color", string("red"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "Abc"),
|
||||
*query(&sheet, vec![("f1", None, true, 0)], "Abc"),
|
||||
props(&[("color", string("green"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0)], "AB_CD"),
|
||||
*query(&sheet, vec![("f1", None, true, 0)], "AB_CD"),
|
||||
props(&[("color", string("blue"))])
|
||||
);
|
||||
assert_eq!(*query(&sheet, vec![("f2", true, 0)], "Abc"), props(&[]));
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f2", true, 0)], "ABC"),
|
||||
*query(&sheet, vec![("f2", None, true, 0)], "Abc"),
|
||||
props(&[])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f2", None, true, 0)], "ABC"),
|
||||
props(&[("color", string("purple"))])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_property_sheet_with_fields() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
a {
|
||||
color: red;
|
||||
&.x {
|
||||
color: green;
|
||||
b {
|
||||
color: blue;
|
||||
&.y { color: yellow; }
|
||||
}
|
||||
}
|
||||
b { color: orange; }
|
||||
b.y { color: indigo; }
|
||||
}
|
||||
.x { color: violet; }
|
||||
",
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("a", None, true, 0)], ""),
|
||||
props(&[("color", string("red"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("a", Some("x"), true, 0)], ""),
|
||||
props(&[("color", string("green"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("a", Some("x"), true, 0), ("b", None, true, 0)],
|
||||
""
|
||||
),
|
||||
props(&[("color", string("blue"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)],
|
||||
""
|
||||
),
|
||||
props(&[("color", string("yellow"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("b", Some("x"), true, 0)], ""),
|
||||
props(&[("color", string("violet"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""),
|
||||
props(&[("color", string("orange"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("a", None, true, 0), ("b", Some("y"), true, 0)],
|
||||
""
|
||||
),
|
||||
props(&[("color", string("indigo"))])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_property_sheet_with_cascade_ordering_as_tie_breaker() {
|
||||
let sheet = generate_property_sheet(
|
||||
|
|
@ -833,29 +1037,49 @@ mod tests {
|
|||
.unwrap();
|
||||
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0), ("f2", true, 0)], "x"),
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("f1", None, true, 0), ("f2", None, true, 0)],
|
||||
"x"
|
||||
),
|
||||
props(&[])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 0), ("f2", true, 1)], "x"),
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("f1", None, true, 0), ("f2", None, true, 1)],
|
||||
"x"
|
||||
),
|
||||
props(&[("color", string("red"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "x"),
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("f1", None, true, 1), ("f2", None, true, 1)],
|
||||
"x"
|
||||
),
|
||||
props(&[("color", string("green"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "a"),
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("f1", None, true, 1), ("f2", None, true, 1)],
|
||||
"a"
|
||||
),
|
||||
props(&[("color", string("blue"))])
|
||||
);
|
||||
assert_eq!(
|
||||
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "ab"),
|
||||
*query(
|
||||
&sheet,
|
||||
vec![("f1", None, true, 1), ("f2", None, true, 1)],
|
||||
"ab"
|
||||
),
|
||||
props(&[("color", string("violet"))])
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_property_sheet_with_function_calls() {
|
||||
fn test_property_sheet_with_css_function_calls() {
|
||||
let sheet = generate_property_sheet(
|
||||
"foo.css",
|
||||
"
|
||||
|
|
@ -1016,25 +1240,26 @@ mod tests {
|
|||
) -> &'a PropertySet {
|
||||
query(
|
||||
sheet,
|
||||
node_stack.into_iter().map(|s| (s, true, 0)).collect(),
|
||||
node_stack.into_iter().map(|s| (s, None, true, 0)).collect(),
|
||||
"",
|
||||
)
|
||||
}
|
||||
|
||||
fn query<'a>(
|
||||
sheet: &'a PropertySheetJSON,
|
||||
node_stack: Vec<(&'static str, bool, usize)>,
|
||||
node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>,
|
||||
leaf_text: &str,
|
||||
) -> &'a PropertySet {
|
||||
let mut state_id = 0;
|
||||
for (kind, is_named, child_index) in node_stack {
|
||||
for (kind, field, is_named, child_index) in node_stack {
|
||||
let state = &sheet.states[state_id];
|
||||
state_id = state
|
||||
.transitions
|
||||
.iter()
|
||||
.find(|transition| {
|
||||
transition.kind == kind
|
||||
&& transition.named == is_named
|
||||
transition.kind.as_ref().map_or(true, |k| k == kind)
|
||||
&& transition.named.map_or(true, |n| n == is_named)
|
||||
&& transition.field.as_ref().map_or(true, |f| field == Some(f))
|
||||
&& transition.index.map_or(true, |index| index == child_index)
|
||||
&& (transition
|
||||
.text
|
||||
|
|
|
|||
|
|
@ -22,6 +22,7 @@ lazy_static! {
|
|||
.build()
|
||||
.unwrap();
|
||||
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
|
||||
static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap();
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq, Eq)]
|
||||
|
|
@ -34,6 +35,7 @@ pub enum TestEntry {
|
|||
name: String,
|
||||
input: Vec<u8>,
|
||||
output: String,
|
||||
has_fields: bool,
|
||||
},
|
||||
}
|
||||
|
||||
|
|
@ -135,6 +137,7 @@ fn run_tests(
|
|||
name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
} => {
|
||||
if let Some(filter) = filter {
|
||||
if !name.contains(filter) {
|
||||
|
|
@ -142,7 +145,10 @@ fn run_tests(
|
|||
}
|
||||
}
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let actual = tree.root_node().to_sexp();
|
||||
let mut actual = tree.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
actual = strip_sexp_fields(actual);
|
||||
}
|
||||
for _ in 0..indent_level {
|
||||
print!(" ");
|
||||
}
|
||||
|
|
@ -186,6 +192,10 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn strip_sexp_fields(sexp: String) -> String {
|
||||
SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
|
||||
}
|
||||
|
||||
fn parse_test_content(name: String, content: String) -> TestEntry {
|
||||
let mut children = Vec::new();
|
||||
let bytes = content.as_bytes();
|
||||
|
|
@ -209,10 +219,12 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
|
|||
let input = bytes[previous_header_end..divider_start].to_vec();
|
||||
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
|
||||
let output = output.replace(" )", ")");
|
||||
let has_fields = SEXP_FIELD_REGEX.is_match(&output);
|
||||
children.push(TestEntry::Example {
|
||||
name: previous_name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
|
@ -265,11 +277,13 @@ d
|
|||
name: "The first test".to_string(),
|
||||
input: "\na b c\n".as_bytes().to_vec(),
|
||||
output: "(a (b c))".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
TestEntry::Example {
|
||||
name: "The second test".to_string(),
|
||||
input: "d".as_bytes().to_vec(),
|
||||
output: "(d)".to_string(),
|
||||
has_fields: false,
|
||||
},
|
||||
]
|
||||
}
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
|
|||
use super::helpers::random::Rand;
|
||||
use super::helpers::scope_sequence::ScopeSequence;
|
||||
use crate::generate;
|
||||
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
|
||||
use crate::test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry};
|
||||
use crate::util;
|
||||
use lazy_static::lazy_static;
|
||||
use std::{env, fs, time, usize};
|
||||
|
|
@ -67,7 +67,7 @@ fn test_real_language_corpus_files() {
|
|||
eprintln!("language: {:?}", language_name);
|
||||
}
|
||||
|
||||
for (example_name, input, expected_output) in tests {
|
||||
for (example_name, input, expected_output, has_fields) in tests {
|
||||
eprintln!(" example: {:?}", example_name);
|
||||
|
||||
if TRIAL_FILTER.map_or(true, |t| t == 0) {
|
||||
|
|
@ -76,7 +76,10 @@ fn test_real_language_corpus_files() {
|
|||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let actual_output = tree.root_node().to_sexp();
|
||||
let mut actual_output = tree.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
drop(tree);
|
||||
drop(parser);
|
||||
if actual_output != expected_output {
|
||||
|
|
@ -144,7 +147,11 @@ fn test_real_language_corpus_files() {
|
|||
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
|
||||
|
||||
// Verify that the final tree matches the expectation from the corpus.
|
||||
let actual_output = tree3.root_node().to_sexp();
|
||||
let mut actual_output = tree3.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
|
||||
if actual_output != expected_output {
|
||||
println!(
|
||||
"Incorrect parse for {} - {} - trial {}",
|
||||
|
|
@ -241,7 +248,7 @@ fn test_feature_corpus_files() {
|
|||
eprintln!("test language: {:?}", language_name);
|
||||
}
|
||||
|
||||
for (name, input, expected_output) in tests {
|
||||
for (name, input, expected_output, has_fields) in tests {
|
||||
eprintln!(" example: {:?}", name);
|
||||
|
||||
allocations::start_recording();
|
||||
|
|
@ -249,7 +256,11 @@ fn test_feature_corpus_files() {
|
|||
let mut parser = get_parser(&mut log_session, "log.html");
|
||||
parser.set_language(language).unwrap();
|
||||
let tree = parser.parse(&input, None).unwrap();
|
||||
let actual_output = tree.root_node().to_sexp();
|
||||
let mut actual_output = tree.root_node().to_sexp();
|
||||
if !has_fields {
|
||||
actual_output = strip_sexp_fields(actual_output);
|
||||
}
|
||||
|
||||
drop(tree);
|
||||
drop(parser);
|
||||
if actual_output != expected_output {
|
||||
|
|
@ -348,13 +359,14 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
|
|||
parser
|
||||
}
|
||||
|
||||
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
|
||||
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String)>) {
|
||||
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
|
||||
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String, bool)>) {
|
||||
match test {
|
||||
TestEntry::Example {
|
||||
mut name,
|
||||
input,
|
||||
output,
|
||||
has_fields,
|
||||
} => {
|
||||
if !prefix.is_empty() {
|
||||
name.insert_str(0, " - ");
|
||||
|
|
@ -365,7 +377,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
|
|||
return;
|
||||
}
|
||||
}
|
||||
result.push((name, input, output));
|
||||
result.push((name, input, output, has_fields));
|
||||
}
|
||||
TestEntry::Group { mut name, children } => {
|
||||
if !prefix.is_empty() {
|
||||
|
|
|
|||
62
cli/src/tests/node_refs.rs
Normal file
62
cli/src/tests/node_refs.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use super::helpers::fixtures::get_test_language;
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use tree_sitter::Parser;
|
||||
|
||||
#[test]
|
||||
fn test_basic_node_refs() {
|
||||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "test_grammar_with_refs",
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"}
|
||||
],
|
||||
"rules": {
|
||||
"rule_a": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "REF",
|
||||
"value": "ref_1",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-1"
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "child-2"
|
||||
},
|
||||
{
|
||||
"type": "BLANK"
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "REF",
|
||||
"value": "ref_2",
|
||||
"content": {
|
||||
"type": "STRING",
|
||||
"value": "child-3"
|
||||
}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let language = get_test_language(&parser_name, &parser_code, None);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
|
||||
let root_node = tree.root_node();
|
||||
assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0));
|
||||
assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2));
|
||||
}
|
||||
|
|
@ -1,6 +1,6 @@
|
|||
use super::helpers::edits::{get_random_edit, perform_edit};
|
||||
use super::helpers::fixtures::{get_language, get_test_language};
|
||||
use super::helpers::random::Rand;
|
||||
use super::helpers::edits::{get_random_edit, perform_edit};
|
||||
use crate::generate::generate_parser_for_grammar;
|
||||
use tree_sitter::{Node, Parser, Point, Tree};
|
||||
|
||||
|
|
@ -321,11 +321,7 @@ fn test_node_edit() {
|
|||
let nodes_after = get_all_nodes(&tree2);
|
||||
for (i, node) in nodes_before.into_iter().enumerate() {
|
||||
assert_eq!(
|
||||
(
|
||||
node.kind(),
|
||||
node.start_byte(),
|
||||
node.start_position()
|
||||
),
|
||||
(node.kind(), node.start_byte(), node.start_position()),
|
||||
(
|
||||
nodes_after[i].kind(),
|
||||
nodes_after[i].start_byte(),
|
||||
|
|
@ -338,6 +334,165 @@ fn test_node_edit() {
|
|||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_field_names() {
|
||||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "test_grammar_with_fields",
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"}
|
||||
],
|
||||
"rules": {
|
||||
"rule_a": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_1",
|
||||
"content": {"type": "STRING", "value": "child-0"}
|
||||
},
|
||||
{
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-1"},
|
||||
{"type": "BLANK"},
|
||||
|
||||
// This isn't used in the test, but prevents `_hidden_rule1`
|
||||
// from being eliminated as a unit reduction.
|
||||
{
|
||||
"type": "ALIAS",
|
||||
"value": "x",
|
||||
"named": true,
|
||||
"content": {
|
||||
"type": "SYMBOL",
|
||||
"name": "_hidden_rule1"
|
||||
}
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_2",
|
||||
"content": {"type": "SYMBOL", "name": "_hidden_rule1"}
|
||||
},
|
||||
{"type": "SYMBOL", "name": "_hidden_rule2"}
|
||||
]
|
||||
},
|
||||
|
||||
// Fields pointing to hidden nodes with a single child resolve to the child.
|
||||
"_hidden_rule1": {
|
||||
"type": "CHOICE",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-2"},
|
||||
{"type": "STRING", "value": "child-2.5"}
|
||||
]
|
||||
},
|
||||
|
||||
// Fields within hidden nodes can be referenced through the parent node.
|
||||
"_hidden_rule2": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{"type": "STRING", "value": "child-3"},
|
||||
{
|
||||
"type": "FIELD",
|
||||
"name": "field_3",
|
||||
"content": {"type": "STRING", "value": "child-4"}
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let language = get_test_language(&parser_name, &parser_code, None);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser
|
||||
.parse("child-0 child-1 child-2 child-3 child-4", None)
|
||||
.unwrap();
|
||||
let root_node = tree.root_node();
|
||||
|
||||
assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
|
||||
assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
|
||||
assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4));
|
||||
assert_eq!(
|
||||
root_node.child(0).unwrap().child_by_field_name("field_1"),
|
||||
None
|
||||
);
|
||||
assert_eq!(root_node.child_by_field_name("not_a_real_field"), None);
|
||||
|
||||
let mut cursor = root_node.walk();
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_first_child();
|
||||
assert_eq!(cursor.node().kind(), "child-0");
|
||||
assert_eq!(cursor.field_name(), Some("field_1"));
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-1");
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-2");
|
||||
assert_eq!(cursor.field_name(), Some("field_2"));
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-3");
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
cursor.goto_next_sibling();
|
||||
assert_eq!(cursor.node().kind(), "child-4");
|
||||
assert_eq!(cursor.field_name(), Some("field_3"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_node_field_calls_in_language_without_fields() {
|
||||
let (parser_name, parser_code) = generate_parser_for_grammar(
|
||||
r#"
|
||||
{
|
||||
"name": "test_grammar_with_no_fields",
|
||||
"extras": [
|
||||
{"type": "PATTERN", "value": "\\s+"}
|
||||
],
|
||||
"rules": {
|
||||
"a": {
|
||||
"type": "SEQ",
|
||||
"members": [
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "b"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "c"
|
||||
},
|
||||
{
|
||||
"type": "STRING",
|
||||
"value": "d"
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
"#,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
let mut parser = Parser::new();
|
||||
let language = get_test_language(&parser_name, &parser_code, None);
|
||||
parser.set_language(language).unwrap();
|
||||
|
||||
let tree = parser.parse("b c d", None).unwrap();
|
||||
|
||||
let root_node = tree.root_node();
|
||||
assert_eq!(root_node.kind(), "a");
|
||||
assert_eq!(root_node.child_by_field_name("something"), None);
|
||||
|
||||
let mut cursor = root_node.walk();
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
assert_eq!(cursor.goto_first_child(), true);
|
||||
assert_eq!(cursor.field_name(), None);
|
||||
}
|
||||
|
||||
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
|
||||
let mut result = Vec::new();
|
||||
let mut visited_children = false;
|
||||
|
|
|
|||
|
|
@ -833,7 +833,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
|
|||
let root = tree.root_node();
|
||||
assert_eq!(
|
||||
root.to_sexp(),
|
||||
"(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"
|
||||
"(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))"
|
||||
);
|
||||
assert_eq!(root.start_byte(), 2);
|
||||
assert_eq!(root.child(3).unwrap().start_byte(), 4);
|
||||
|
|
|
|||
|
|
@ -600,8 +600,6 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
|
|||
}
|
||||
```
|
||||
|
||||
|
||||
|
||||
[ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar
|
||||
[antlr]: http://www.antlr.org/
|
||||
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html
|
||||
|
|
|
|||
|
|
@ -605,8 +605,9 @@ where
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>> Iterator
|
||||
for Highlighter<'a, T>
|
||||
impl<'a, T> Iterator for Highlighter<'a, T>
|
||||
where
|
||||
T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
|
||||
{
|
||||
type Item = HighlightEvent<'a>;
|
||||
|
||||
|
|
@ -703,6 +704,31 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>> Itera
|
|||
}
|
||||
}
|
||||
|
||||
impl<'a, T> fmt::Debug for Highlighter<'a, T>
|
||||
where
|
||||
T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
|
||||
{
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
if let Some(layer) = self.layers.first() {
|
||||
let node = layer.cursor.node();
|
||||
let position = if layer.at_node_end {
|
||||
node.end_position()
|
||||
} else {
|
||||
node.start_position()
|
||||
};
|
||||
write!(
|
||||
f,
|
||||
"{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}",
|
||||
position,
|
||||
node.kind(),
|
||||
layer.at_node_end,
|
||||
layer.cursor.node_properties()
|
||||
)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Layer<'a> {
|
||||
fn new(
|
||||
source: &'a [u8],
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@
|
|||
pub type __darwin_size_t = ::std::os::raw::c_ulong;
|
||||
pub type FILE = [u64; 19usize];
|
||||
pub type TSSymbol = u16;
|
||||
pub type TSFieldId = u16;
|
||||
#[repr(C)]
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
pub struct TSLanguage {
|
||||
|
|
@ -227,6 +228,16 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_child_by_field_name(
|
||||
arg1: TSNode,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
arg3: u32,
|
||||
) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
|
||||
}
|
||||
|
|
@ -286,6 +297,14 @@ extern "C" {
|
|||
extern "C" {
|
||||
pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_current_field_name(
|
||||
arg1: *const TSTreeCursor,
|
||||
) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
|
||||
}
|
||||
|
|
@ -313,6 +332,22 @@ extern "C" {
|
|||
arg2: *const ::std::os::raw::c_char,
|
||||
) -> TSSymbol;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_field_name_for_id(
|
||||
arg1: *const TSLanguage,
|
||||
arg2: TSFieldId,
|
||||
) -> *const ::std::os::raw::c_char;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_field_id_for_name(
|
||||
arg1: *const TSLanguage,
|
||||
arg2: *const ::std::os::raw::c_char,
|
||||
arg3: u32,
|
||||
) -> TSFieldId;
|
||||
}
|
||||
extern "C" {
|
||||
pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
|
||||
}
|
||||
|
|
@ -320,4 +355,5 @@ extern "C" {
|
|||
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
|
||||
}
|
||||
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9;
|
||||
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 10;
|
||||
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9;
|
||||
|
|
|
|||
|
|
@ -18,6 +18,7 @@ use std::os::raw::{c_char, c_void};
|
|||
use std::sync::atomic::AtomicUsize;
|
||||
use std::{fmt, ptr, slice, str, u16};
|
||||
|
||||
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
|
||||
pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
|
||||
|
||||
#[derive(Clone, Copy)]
|
||||
|
|
@ -57,13 +58,15 @@ pub struct InputEdit {
|
|||
}
|
||||
|
||||
struct PropertyTransition {
|
||||
state_id: usize,
|
||||
child_index: Option<usize>,
|
||||
text_regex_index: Option<usize>,
|
||||
state_id: u16,
|
||||
child_index: Option<u16>,
|
||||
text_regex_index: Option<u16>,
|
||||
node_kind_id: Option<u16>,
|
||||
}
|
||||
|
||||
struct PropertyState {
|
||||
transitions: HashMap<u16, Vec<PropertyTransition>>,
|
||||
field_transitions: HashMap<u16, Vec<PropertyTransition>>,
|
||||
kind_transitions: HashMap<u16, Vec<PropertyTransition>>,
|
||||
property_set_id: usize,
|
||||
default_next_state_id: usize,
|
||||
}
|
||||
|
|
@ -83,11 +86,15 @@ pub struct PropertySheet<P = HashMap<String, String>> {
|
|||
#[derive(Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
|
||||
pub struct PropertyTransitionJSON {
|
||||
#[serde(rename = "type")]
|
||||
pub kind: String,
|
||||
pub named: bool,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub kind: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub named: Option<bool>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub index: Option<usize>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub field: Option<String>,
|
||||
#[serde(skip_serializing_if = "Option::is_none")]
|
||||
pub text: Option<String>,
|
||||
pub state_id: usize,
|
||||
}
|
||||
|
|
@ -137,6 +144,22 @@ impl Language {
|
|||
pub fn node_kind_is_named(&self, id: u16) -> bool {
|
||||
unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
|
||||
}
|
||||
|
||||
pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option<u16> {
|
||||
let field_name = field_name.as_ref();
|
||||
let id = unsafe {
|
||||
ffi::ts_language_field_id_for_name(
|
||||
self.0,
|
||||
field_name.as_ptr() as *const c_char,
|
||||
field_name.len() as u32,
|
||||
)
|
||||
};
|
||||
if id == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
unsafe impl Send for Language {}
|
||||
|
|
@ -154,15 +177,21 @@ impl Parser {
|
|||
pub fn set_language(&mut self, language: Language) -> Result<(), String> {
|
||||
unsafe {
|
||||
let version = ffi::ts_language_version(language.0) as usize;
|
||||
if version == ffi::TREE_SITTER_LANGUAGE_VERSION {
|
||||
ffi::ts_parser_set_language(self.0, language.0);
|
||||
Ok(())
|
||||
} else {
|
||||
if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION {
|
||||
Err(format!(
|
||||
"Incompatible language version {}. Expected {} or greater.",
|
||||
version,
|
||||
ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
|
||||
))
|
||||
} else if version > ffi::TREE_SITTER_LANGUAGE_VERSION {
|
||||
Err(format!(
|
||||
"Incompatible language version {}. Expected {}.",
|
||||
version,
|
||||
ffi::TREE_SITTER_LANGUAGE_VERSION
|
||||
))
|
||||
} else {
|
||||
ffi::ts_parser_set_language(self.0, language.0);
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -484,6 +513,17 @@ impl<'tree> Node<'tree> {
|
|||
Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
|
||||
}
|
||||
|
||||
pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option<Self> {
|
||||
let field_name = field_name.as_ref();
|
||||
Self::new(unsafe {
|
||||
ffi::ts_node_child_by_field_name(
|
||||
self.0,
|
||||
field_name.as_ptr() as *const c_char,
|
||||
field_name.len() as u32,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
pub fn child_count(&self) -> usize {
|
||||
unsafe { ffi::ts_node_child_count(self.0) as usize }
|
||||
}
|
||||
|
|
@ -601,6 +641,28 @@ impl<'a> TreeCursor<'a> {
|
|||
)
|
||||
}
|
||||
|
||||
pub fn field_id(&self) -> Option<u16> {
|
||||
unsafe {
|
||||
let id = ffi::ts_tree_cursor_current_field_id(&self.0);
|
||||
if id == 0 {
|
||||
None
|
||||
} else {
|
||||
Some(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn field_name(&self) -> Option<&str> {
|
||||
unsafe {
|
||||
let ptr = ffi::ts_tree_cursor_current_field_name(&self.0);
|
||||
if ptr.is_null() {
|
||||
None
|
||||
} else {
|
||||
Some(CStr::from_ptr(ptr).to_str().unwrap())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn goto_first_child(&mut self) -> bool {
|
||||
return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
|
||||
}
|
||||
|
|
@ -643,7 +705,9 @@ impl<'a, P> TreePropertyCursor<'a, P> {
|
|||
property_sheet,
|
||||
source,
|
||||
};
|
||||
let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0);
|
||||
let kind_id = result.cursor.node().kind_id();
|
||||
let field_id = result.cursor.field_id();
|
||||
let state = result.next_state(&result.current_state(), kind_id, field_id, 0);
|
||||
result.state_stack.push(state);
|
||||
result
|
||||
}
|
||||
|
|
@ -662,7 +726,8 @@ impl<'a, P> TreePropertyCursor<'a, P> {
|
|||
let next_state_id = {
|
||||
let state = &self.current_state();
|
||||
let kind_id = self.cursor.node().kind_id();
|
||||
self.next_state(state, kind_id, child_index)
|
||||
let field_id = self.cursor.field_id();
|
||||
self.next_state(state, kind_id, field_id, child_index)
|
||||
};
|
||||
self.state_stack.push(next_state_id);
|
||||
self.child_index_stack.push(child_index);
|
||||
|
|
@ -679,7 +744,8 @@ impl<'a, P> TreePropertyCursor<'a, P> {
|
|||
let next_state_id = {
|
||||
let state = &self.current_state();
|
||||
let kind_id = self.cursor.node().kind_id();
|
||||
self.next_state(state, kind_id, child_index)
|
||||
let field_id = self.cursor.field_id();
|
||||
self.next_state(state, kind_id, field_id, child_index)
|
||||
};
|
||||
self.state_stack.push(next_state_id);
|
||||
self.child_index_stack.push(child_index);
|
||||
|
|
@ -703,34 +769,47 @@ impl<'a, P> TreePropertyCursor<'a, P> {
|
|||
&self,
|
||||
state: &PropertyState,
|
||||
node_kind_id: u16,
|
||||
node_field_id: Option<u16>,
|
||||
node_child_index: usize,
|
||||
) -> usize {
|
||||
state
|
||||
.transitions
|
||||
.get(&node_kind_id)
|
||||
.and_then(|transitions| {
|
||||
for transition in transitions.iter() {
|
||||
if let Some(text_regex_index) = transition.text_regex_index {
|
||||
let node = self.cursor.node();
|
||||
let text = &self.source[node.start_byte()..node.end_byte()];
|
||||
if let Ok(text) = str::from_utf8(text) {
|
||||
if !self.property_sheet.text_regexes[text_regex_index].is_match(text) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
let transitions = if let Some(field_id) = node_field_id {
|
||||
state.field_transitions.get(&field_id)
|
||||
} else {
|
||||
state.kind_transitions.get(&node_kind_id)
|
||||
};
|
||||
|
||||
if let Some(child_index) = transition.child_index {
|
||||
if child_index != node_child_index {
|
||||
if let Some(transitions) = transitions {
|
||||
for transition in transitions.iter() {
|
||||
if transition
|
||||
.node_kind_id
|
||||
.map_or(false, |id| id != node_kind_id)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(text_regex_index) = transition.text_regex_index {
|
||||
let node = self.cursor.node();
|
||||
let text = &self.source[node.start_byte()..node.end_byte()];
|
||||
if let Ok(text) = str::from_utf8(text) {
|
||||
if !self.property_sheet.text_regexes[text_regex_index as usize]
|
||||
.is_match(text)
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return Some(transition.state_id);
|
||||
}
|
||||
None
|
||||
})
|
||||
.unwrap_or(state.default_next_state_id)
|
||||
|
||||
if let Some(child_index) = transition.child_index {
|
||||
if child_index != node_child_index as u16 {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return transition.state_id as usize;
|
||||
}
|
||||
}
|
||||
|
||||
state.default_next_state_id
|
||||
}
|
||||
|
||||
fn current_state(&self) -> &PropertyState {
|
||||
|
|
@ -815,40 +894,97 @@ impl<P> PropertySheet<P> {
|
|||
let mut text_regex_patterns = Vec::new();
|
||||
|
||||
for state in input.states.iter() {
|
||||
let mut transitions = HashMap::new();
|
||||
let node_kind_count = language.node_kind_count();
|
||||
let mut kind_transitions = HashMap::new();
|
||||
let mut field_transitions = HashMap::new();
|
||||
|
||||
for transition in state.transitions.iter() {
|
||||
let field_id = transition
|
||||
.field
|
||||
.as_ref()
|
||||
.and_then(|field| language.field_id_for_name(&field));
|
||||
if let Some(field_id) = field_id {
|
||||
field_transitions.entry(field_id).or_insert(Vec::new());
|
||||
}
|
||||
}
|
||||
|
||||
for transition in state.transitions.iter() {
|
||||
let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
|
||||
if let Some(index) =
|
||||
text_regex_patterns.iter().position(|r| *r == regex_pattern)
|
||||
{
|
||||
Some(index)
|
||||
Some(index as u16)
|
||||
} else {
|
||||
text_regex_patterns.push(regex_pattern);
|
||||
text_regexes.push(
|
||||
Regex::new(®ex_pattern).map_err(PropertySheetError::InvalidRegex)?,
|
||||
);
|
||||
Some(text_regexes.len() - 1)
|
||||
Some(text_regexes.len() as u16 - 1)
|
||||
}
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
for i in 0..(node_kind_count as u16) {
|
||||
if transition.kind == language.node_kind_for_id(i)
|
||||
&& transition.named == language.node_kind_is_named(i)
|
||||
{
|
||||
let entry = transitions.entry(i).or_insert(Vec::new());
|
||||
entry.push(PropertyTransition {
|
||||
child_index: transition.index,
|
||||
state_id: transition.state_id,
|
||||
let state_id = transition.state_id as u16;
|
||||
let child_index = transition.index.map(|i| i as u16);
|
||||
let field_id = transition
|
||||
.field
|
||||
.as_ref()
|
||||
.and_then(|field| language.field_id_for_name(&field));
|
||||
|
||||
if let Some(kind) = transition.kind.as_ref() {
|
||||
for kind_id in 0..(node_kind_count as u16) {
|
||||
if kind != language.node_kind_for_id(kind_id)
|
||||
|| transition.named != Some(language.node_kind_is_named(kind_id))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(field_id) = field_id {
|
||||
field_transitions
|
||||
.entry(field_id)
|
||||
.or_insert(Vec::new())
|
||||
.push(PropertyTransition {
|
||||
node_kind_id: Some(kind_id),
|
||||
state_id,
|
||||
child_index,
|
||||
text_regex_index,
|
||||
});
|
||||
} else {
|
||||
for (_, entries) in field_transitions.iter_mut() {
|
||||
entries.push(PropertyTransition {
|
||||
node_kind_id: Some(kind_id),
|
||||
state_id,
|
||||
child_index,
|
||||
text_regex_index,
|
||||
});
|
||||
}
|
||||
|
||||
kind_transitions.entry(kind_id).or_insert(Vec::new()).push(
|
||||
PropertyTransition {
|
||||
node_kind_id: None,
|
||||
state_id,
|
||||
child_index,
|
||||
text_regex_index,
|
||||
},
|
||||
);
|
||||
}
|
||||
}
|
||||
} else if let Some(field_id) = field_id {
|
||||
field_transitions
|
||||
.entry(field_id)
|
||||
.or_insert(Vec::new())
|
||||
.push(PropertyTransition {
|
||||
node_kind_id: None,
|
||||
state_id,
|
||||
child_index,
|
||||
text_regex_index,
|
||||
});
|
||||
}
|
||||
}
|
||||
}
|
||||
states.push(PropertyState {
|
||||
transitions,
|
||||
field_transitions,
|
||||
kind_transitions,
|
||||
default_next_state_id: state.default_next_state_id,
|
||||
property_set_id: state.property_set_id,
|
||||
});
|
||||
|
|
|
|||
|
|
@ -10,9 +10,11 @@ extern "C" {
|
|||
#include <stdint.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 9
|
||||
#define TREE_SITTER_LANGUAGE_VERSION 10
|
||||
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
|
||||
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
typedef struct TSParser TSParser;
|
||||
typedef struct TSTree TSTree;
|
||||
|
|
@ -119,6 +121,8 @@ bool ts_node_has_changes(TSNode);
|
|||
bool ts_node_has_error(TSNode);
|
||||
TSNode ts_node_parent(TSNode);
|
||||
TSNode ts_node_child(TSNode, uint32_t);
|
||||
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
|
||||
TSNode ts_node_child_by_field_name(TSNode, const char *, uint32_t);
|
||||
TSNode ts_node_named_child(TSNode, uint32_t);
|
||||
uint32_t ts_node_child_count(TSNode);
|
||||
uint32_t ts_node_named_child_count(TSNode);
|
||||
|
|
@ -138,6 +142,8 @@ TSTreeCursor ts_tree_cursor_new(TSNode);
|
|||
void ts_tree_cursor_delete(TSTreeCursor *);
|
||||
void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
|
||||
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
|
||||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
|
||||
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
|
||||
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
|
||||
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
|
||||
|
|
@ -146,6 +152,9 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
|
|||
uint32_t ts_language_symbol_count(const TSLanguage *);
|
||||
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
|
||||
TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
|
||||
uint32_t ts_language_field_count(const TSLanguage *);
|
||||
const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
|
||||
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
|
||||
TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
|
||||
uint32_t ts_language_version(const TSLanguage *);
|
||||
|
||||
|
|
|
|||
|
|
@ -15,9 +15,21 @@ extern "C" {
|
|||
|
||||
#ifndef TREE_SITTER_API_H_
|
||||
typedef uint16_t TSSymbol;
|
||||
typedef uint16_t TSFieldId;
|
||||
typedef struct TSLanguage TSLanguage;
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
TSFieldId field_id;
|
||||
uint8_t child_index;
|
||||
bool inherited;
|
||||
} TSFieldMapEntry;
|
||||
|
||||
typedef struct {
|
||||
uint16_t index;
|
||||
uint16_t length;
|
||||
} TSFieldMapSlice;
|
||||
|
||||
typedef uint16_t TSStateId;
|
||||
|
||||
typedef struct {
|
||||
|
|
@ -54,7 +66,7 @@ typedef struct {
|
|||
TSSymbol symbol;
|
||||
int16_t dynamic_precedence;
|
||||
uint8_t child_count;
|
||||
uint8_t alias_sequence_id;
|
||||
uint8_t production_id;
|
||||
};
|
||||
} params;
|
||||
TSParseActionType type : 4;
|
||||
|
|
@ -98,6 +110,10 @@ struct TSLanguage {
|
|||
unsigned (*serialize)(void *, char *);
|
||||
void (*deserialize)(void *, const char *, unsigned);
|
||||
} external_scanner;
|
||||
uint32_t field_count;
|
||||
const TSFieldMapSlice *field_map_slices;
|
||||
const TSFieldMapEntry *field_map_entries;
|
||||
const char **field_names;
|
||||
};
|
||||
|
||||
/*
|
||||
|
|
|
|||
|
|
@ -148,7 +148,7 @@ static bool iterator_tree_is_visible(const Iterator *self) {
|
|||
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->language,
|
||||
parent.ptr->alias_sequence_id
|
||||
parent.ptr->production_id
|
||||
);
|
||||
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
|
||||
}
|
||||
|
|
@ -171,7 +171,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
|
|||
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->language,
|
||||
parent->ptr->alias_sequence_id
|
||||
parent->ptr->production_id
|
||||
);
|
||||
if (alias_sequence) {
|
||||
*alias_symbol = alias_sequence[entry.structural_child_index];
|
||||
|
|
|
|||
|
|
@ -69,3 +69,39 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol
|
|||
return TSSymbolTypeAuxiliary;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t ts_language_field_count(const TSLanguage *self) {
|
||||
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
|
||||
return self->field_count;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
|
||||
uint32_t count = ts_language_field_count(self);
|
||||
if (count) {
|
||||
return self->field_names[id];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
TSFieldId ts_language_field_id_for_name(
|
||||
const TSLanguage *self,
|
||||
const char *name,
|
||||
uint32_t name_length
|
||||
) {
|
||||
uint32_t count = ts_language_field_count(self);
|
||||
for (TSSymbol i = 1; i < count + 1; i++) {
|
||||
switch (strncmp(name, self->field_names[i], name_length)) {
|
||||
case 0:
|
||||
return i;
|
||||
case -1:
|
||||
return 0;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ extern "C" {
|
|||
#include "tree_sitter/parser.h"
|
||||
|
||||
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
|
||||
#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
|
||||
|
||||
typedef struct {
|
||||
const TSParseAction *actions;
|
||||
|
|
@ -81,12 +82,29 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
|
|||
}
|
||||
|
||||
static inline const TSSymbol *
|
||||
ts_language_alias_sequence(const TSLanguage *self, unsigned id) {
|
||||
return id > 0 ?
|
||||
self->alias_sequences + id * self->max_alias_sequence_length :
|
||||
ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
|
||||
return production_id > 0 ?
|
||||
self->alias_sequences + production_id * self->max_alias_sequence_length :
|
||||
NULL;
|
||||
}
|
||||
|
||||
static inline void ts_language_field_map(
|
||||
const TSLanguage *self,
|
||||
uint32_t production_id,
|
||||
const TSFieldMapEntry **start,
|
||||
const TSFieldMapEntry **end
|
||||
) {
|
||||
if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
|
||||
*start = NULL;
|
||||
*end = NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
TSFieldMapSlice slice = self->field_map_slices[production_id];
|
||||
*start = &self->field_map_entries[slice.index];
|
||||
*end = &self->field_map_entries[slice.index] + slice.length;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
|
|||
}
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
node->tree->language,
|
||||
subtree.ptr->alias_sequence_id
|
||||
subtree.ptr->production_id
|
||||
);
|
||||
return (NodeChildIterator) {
|
||||
.tree = node->tree,
|
||||
|
|
@ -65,8 +65,12 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
|
|||
};
|
||||
}
|
||||
|
||||
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
|
||||
return self->child_index == self->parent.ptr->child_count;
|
||||
}
|
||||
|
||||
static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) {
|
||||
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
|
||||
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
|
||||
const Subtree *child = &self->parent.ptr->children[self->child_index];
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (!ts_subtree_extra(*child)) {
|
||||
|
|
@ -453,6 +457,85 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
|
|||
return ts_node__child(self, child_index, false);
|
||||
}
|
||||
|
||||
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
|
||||
recur:
|
||||
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self.tree->language,
|
||||
ts_node__subtree(self).ptr->production_id,
|
||||
&field_map,
|
||||
&field_map_end
|
||||
);
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
|
||||
// The field mappings are sorted by their field id. Scan all
|
||||
// the mappings to find the ones for the given field id.
|
||||
while (field_map->field_id < field_id) {
|
||||
field_map++;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
while (field_map_end[-1].field_id > field_id) {
|
||||
field_map_end--;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
|
||||
TSNode child;
|
||||
NodeChildIterator iterator = ts_node_iterate_children(&self);
|
||||
while (ts_node_child_iterator_next(&iterator, &child)) {
|
||||
if (!ts_subtree_extra(ts_node__subtree(child))) {
|
||||
uint32_t index = iterator.structural_child_index - 1;
|
||||
if (index < field_map->child_index) continue;
|
||||
|
||||
// Hidden nodes' fields are "inherited" by their visible parent.
|
||||
if (field_map->inherited) {
|
||||
|
||||
// If this is the *last* possible child node for this field,
|
||||
// then perform a tail call to avoid recursion.
|
||||
if (field_map + 1 == field_map_end) {
|
||||
self = child;
|
||||
goto recur;
|
||||
}
|
||||
|
||||
// Otherwise, descend into this child, but if it doesn't contain
|
||||
// the field, continue searching subsequent children.
|
||||
else {
|
||||
TSNode result = ts_node_child_by_field_id(child, field_id);
|
||||
if (result.id) return result;
|
||||
field_map++;
|
||||
if (field_map == field_map_end) return ts_node__null();
|
||||
}
|
||||
}
|
||||
|
||||
else if (ts_node__is_relevant(child, true)) {
|
||||
return child;
|
||||
}
|
||||
|
||||
// If the field refers to a hidden node, return its first visible
|
||||
// child.
|
||||
else {
|
||||
return ts_node_child(child, 0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return ts_node__null();
|
||||
}
|
||||
|
||||
TSNode ts_node_child_by_field_name(
|
||||
TSNode self,
|
||||
const char *name,
|
||||
uint32_t name_length
|
||||
) {
|
||||
TSFieldId field_id = ts_language_field_id_for_name(
|
||||
self.tree->language,
|
||||
name,
|
||||
name_length
|
||||
);
|
||||
return ts_node_child_by_field_id(self, field_id);
|
||||
}
|
||||
|
||||
uint32_t ts_node_child_count(TSNode self) {
|
||||
Subtree tree = ts_node__subtree(self);
|
||||
if (ts_subtree_child_count(tree) > 0) {
|
||||
|
|
|
|||
|
|
@ -681,7 +681,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su
|
|||
|
||||
static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol,
|
||||
uint32_t count, int dynamic_precedence,
|
||||
uint16_t alias_sequence_id, bool fragile) {
|
||||
uint16_t production_id, bool fragile) {
|
||||
uint32_t initial_version_count = ts_stack_version_count(self->stack);
|
||||
uint32_t removed_version_count = 0;
|
||||
StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
|
||||
|
|
@ -715,7 +715,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
|
|||
}
|
||||
|
||||
MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
|
||||
symbol, &children, alias_sequence_id, self->language
|
||||
symbol, &children, production_id, self->language
|
||||
);
|
||||
|
||||
// This pop operation may have caused multiple stack versions to collapse
|
||||
|
|
@ -741,7 +741,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
|
|||
}
|
||||
|
||||
parent.ptr->dynamic_precedence += dynamic_precedence;
|
||||
parent.ptr->alias_sequence_id = alias_sequence_id;
|
||||
parent.ptr->production_id = production_id;
|
||||
|
||||
TSStateId state = ts_stack_state(self->stack, slice_version);
|
||||
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
|
||||
|
|
@ -797,7 +797,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look
|
|||
&self->tree_pool,
|
||||
ts_subtree_symbol(child),
|
||||
&trees,
|
||||
child.ptr->alias_sequence_id,
|
||||
child.ptr->production_id,
|
||||
self->language
|
||||
));
|
||||
ts_subtree_release(&self->tree_pool, child);
|
||||
|
|
@ -873,7 +873,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
|
|||
.symbol = action.params.symbol,
|
||||
.count = action.params.child_count,
|
||||
.dynamic_precedence = action.params.dynamic_precedence,
|
||||
.alias_sequence_id = action.params.alias_sequence_id,
|
||||
.production_id = action.params.production_id,
|
||||
});
|
||||
default:
|
||||
break;
|
||||
|
|
@ -887,7 +887,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
|
|||
|
||||
reduction_version = ts_parser__reduce(
|
||||
self, version, action.symbol, action.count,
|
||||
action.dynamic_precedence, action.alias_sequence_id,
|
||||
action.dynamic_precedence, action.production_id,
|
||||
true
|
||||
);
|
||||
}
|
||||
|
|
@ -1331,7 +1331,7 @@ static bool ts_parser__advance(
|
|||
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
|
||||
StackVersion reduction_version = ts_parser__reduce(
|
||||
self, version, action.params.symbol, action.params.child_count,
|
||||
action.params.dynamic_precedence, action.params.alias_sequence_id,
|
||||
action.params.dynamic_precedence, action.params.production_id,
|
||||
is_fragile
|
||||
);
|
||||
if (reduction_version != STACK_VERSION_NONE) {
|
||||
|
|
@ -1549,7 +1549,10 @@ const TSLanguage *ts_parser_language(const TSParser *self) {
|
|||
}
|
||||
|
||||
bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
|
||||
if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false;
|
||||
if (language) {
|
||||
if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
|
||||
if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
|
||||
}
|
||||
|
||||
if (self->external_scanner_payload && self->language->external_scanner.destroy) {
|
||||
self->language->external_scanner.destroy(self->external_scanner_payload);
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ typedef struct {
|
|||
uint32_t count;
|
||||
TSSymbol symbol;
|
||||
int dynamic_precedence;
|
||||
unsigned short alias_sequence_id;
|
||||
unsigned short production_id;
|
||||
} ReduceAction;
|
||||
|
||||
typedef Array(ReduceAction) ReduceActionSet;
|
||||
|
|
|
|||
|
|
@ -379,7 +379,7 @@ void ts_subtree_set_children(
|
|||
self.ptr->dynamic_precedence = 0;
|
||||
|
||||
uint32_t non_extra_index = 0;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
|
||||
uint32_t lookahead_end_byte = 0;
|
||||
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
|
|
@ -474,7 +474,7 @@ void ts_subtree_set_children(
|
|||
}
|
||||
|
||||
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
|
||||
SubtreeArray *children, unsigned alias_sequence_id,
|
||||
SubtreeArray *children, unsigned production_id,
|
||||
const TSLanguage *language) {
|
||||
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
|
||||
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
|
||||
|
|
@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
|
|||
*data = (SubtreeHeapData) {
|
||||
.ref_count = 1,
|
||||
.symbol = symbol,
|
||||
.alias_sequence_id = alias_sequence_id,
|
||||
.production_id = production_id,
|
||||
.visible = metadata.visible,
|
||||
.named = metadata.named,
|
||||
.has_changes = false,
|
||||
|
|
@ -805,56 +805,90 @@ static void ts_subtree__write_dot_string(FILE *f, const char *string) {
|
|||
}
|
||||
}
|
||||
|
||||
static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit,
|
||||
const TSLanguage *language, bool is_root,
|
||||
bool include_all, TSSymbol alias_symbol,
|
||||
bool alias_is_named) {
|
||||
static const char *ROOT_FIELD = "__ROOT__";
|
||||
|
||||
static size_t ts_subtree__write_to_string(
|
||||
Subtree self, char *string, size_t limit,
|
||||
const TSLanguage *language, bool include_all,
|
||||
TSSymbol alias_symbol, bool alias_is_named, const char *field_name
|
||||
) {
|
||||
if (!self.ptr) return snprintf(string, limit, "(NULL)");
|
||||
|
||||
char *cursor = string;
|
||||
char **writer = (limit > 0) ? &cursor : &string;
|
||||
bool visible =
|
||||
include_all ||
|
||||
is_root ||
|
||||
alias_is_named ||
|
||||
ts_subtree_missing(self) ||
|
||||
(ts_subtree_visible(self) && ts_subtree_named(self)) ||
|
||||
alias_is_named;
|
||||
|
||||
if (visible && !is_root) {
|
||||
cursor += snprintf(*writer, limit, " ");
|
||||
}
|
||||
(ts_subtree_visible(self) && ts_subtree_named(self));
|
||||
|
||||
if (visible) {
|
||||
if (field_name != ROOT_FIELD) {
|
||||
cursor += snprintf(*writer, limit, " ");
|
||||
|
||||
if (field_name) {
|
||||
cursor += snprintf(*writer, limit, "%s: ", field_name);
|
||||
}
|
||||
}
|
||||
|
||||
if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
|
||||
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
|
||||
cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
|
||||
} else if (ts_subtree_missing(self)) {
|
||||
cursor += snprintf(*writer, limit, "(MISSING");
|
||||
} else {
|
||||
TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
|
||||
const char *symbol_name = ts_language_symbol_name(language, symbol);
|
||||
cursor += snprintf(*writer, limit, "(%s", symbol_name);
|
||||
if (ts_subtree_missing(self)) {
|
||||
cursor += snprintf(*writer, limit, "(MISSING ");
|
||||
if (alias_is_named || ts_subtree_named(self)) {
|
||||
cursor += snprintf(*writer, limit, "%s", symbol_name);
|
||||
} else {
|
||||
cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
|
||||
}
|
||||
} else {
|
||||
cursor += snprintf(*writer, limit, "(%s", symbol_name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (ts_subtree_child_count(self)) {
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
language,
|
||||
self.ptr->production_id,
|
||||
&field_map,
|
||||
&field_map_end
|
||||
);
|
||||
|
||||
uint32_t structural_child_index = 0;
|
||||
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
|
||||
Subtree child = self.ptr->children[i];
|
||||
if (ts_subtree_extra(child)) {
|
||||
cursor += ts_subtree__write_to_string(
|
||||
child, *writer, limit,
|
||||
language, false, include_all,
|
||||
0, false
|
||||
language, include_all,
|
||||
0, false, NULL
|
||||
);
|
||||
} else {
|
||||
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
|
||||
TSSymbol alias_symbol = alias_sequence
|
||||
? alias_sequence[structural_child_index]
|
||||
: 0;
|
||||
bool alias_is_named = alias_symbol
|
||||
? ts_language_symbol_metadata(language, alias_symbol).named
|
||||
: false;
|
||||
|
||||
const char *child_field_name = visible ? NULL : field_name;
|
||||
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
|
||||
if (!i->inherited && i->child_index == structural_child_index) {
|
||||
child_field_name = language->field_names[i->field_id];
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
cursor += ts_subtree__write_to_string(
|
||||
child, *writer, limit,
|
||||
language, false, include_all,
|
||||
alias_symbol,
|
||||
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
|
||||
language, include_all,
|
||||
alias_symbol, alias_is_named, child_field_name
|
||||
);
|
||||
structural_child_index++;
|
||||
}
|
||||
|
|
@ -866,15 +900,23 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim
|
|||
return cursor - string;
|
||||
}
|
||||
|
||||
char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) {
|
||||
char *ts_subtree_string(
|
||||
Subtree self,
|
||||
const TSLanguage *language,
|
||||
bool include_all
|
||||
) {
|
||||
char scratch_string[1];
|
||||
size_t size = ts_subtree__write_to_string(
|
||||
self, scratch_string, 0,
|
||||
language, true,
|
||||
include_all, 0, false
|
||||
language, include_all,
|
||||
0, false, ROOT_FIELD
|
||||
) + 1;
|
||||
char *result = malloc(size * sizeof(char));
|
||||
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
|
||||
ts_subtree__write_to_string(
|
||||
self, result, size,
|
||||
language, include_all,
|
||||
0, false, ROOT_FIELD
|
||||
);
|
||||
return result;
|
||||
}
|
||||
|
||||
|
|
@ -913,20 +955,17 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
|
|||
fprintf(f, "\"]\n");
|
||||
|
||||
uint32_t child_start_offset = start_offset;
|
||||
uint32_t structural_child_index = 0;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
language,
|
||||
ts_subtree_alias_sequence_id(*self)
|
||||
);
|
||||
uint32_t child_info_offset =
|
||||
language->max_alias_sequence_length *
|
||||
ts_subtree_production_id(*self);
|
||||
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
|
||||
const Subtree *child = &self->ptr->children[i];
|
||||
if (ts_subtree_extra(*child)) {
|
||||
ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f);
|
||||
} else {
|
||||
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
|
||||
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
|
||||
structural_child_index++;
|
||||
TSSymbol alias_symbol = 0;
|
||||
if (!ts_subtree_extra(*child) && child_info_offset) {
|
||||
alias_symbol = language->alias_sequences[child_info_offset];
|
||||
child_info_offset++;
|
||||
}
|
||||
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
|
||||
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
|
||||
child_start_offset += ts_subtree_total_bytes(*child);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ typedef struct {
|
|||
uint32_t node_count;
|
||||
uint32_t repeat_depth;
|
||||
int32_t dynamic_precedence;
|
||||
uint16_t alias_sequence_id;
|
||||
uint16_t production_id;
|
||||
struct {
|
||||
TSSymbol symbol;
|
||||
TSStateId parse_state;
|
||||
|
|
@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
|
|||
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
|
||||
}
|
||||
|
||||
static inline uint16_t ts_subtree_alias_sequence_id(Subtree self) {
|
||||
static inline uint16_t ts_subtree_production_id(Subtree self) {
|
||||
if (ts_subtree_child_count(self) > 0) {
|
||||
return self.ptr->alias_sequence_id;
|
||||
return self.ptr->production_id;
|
||||
} else {
|
||||
return 0;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs
|
|||
}
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
last_entry->subtree->ptr->alias_sequence_id
|
||||
last_entry->subtree->ptr->production_id
|
||||
);
|
||||
return (CursorChildIterator) {
|
||||
.tree = self->tree,
|
||||
|
|
@ -49,11 +49,11 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
|
|||
bool extra = ts_subtree_extra(*child);
|
||||
if (!extra && self->alias_sequence) {
|
||||
*visible |= self->alias_sequence[self->structural_child_index];
|
||||
self->structural_child_index++;
|
||||
}
|
||||
|
||||
self->position = length_add(self->position, ts_subtree_size(*child));
|
||||
self->child_index++;
|
||||
if (!extra) self->structural_child_index++;
|
||||
|
||||
if (self->child_index < self->parent.ptr->child_count) {
|
||||
Subtree next_child = self->parent.ptr->children[self->child_index];
|
||||
|
|
@ -210,7 +210,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
|
|||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->alias_sequence_id
|
||||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
|
||||
}
|
||||
|
|
@ -230,7 +230,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
|
|||
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->alias_sequence_id
|
||||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
|
||||
alias_symbol = alias_sequence[last_entry->structural_child_index];
|
||||
|
|
@ -243,3 +243,51 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
|
|||
alias_symbol
|
||||
);
|
||||
}
|
||||
|
||||
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
|
||||
// Walk up the tree, visiting the current node and its invisible ancestors.
|
||||
for (unsigned i = self->stack.size - 1; i > 0; i--) {
|
||||
TreeCursorEntry *entry = &self->stack.contents[i];
|
||||
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
|
||||
|
||||
// Stop walking up when another visible node is found.
|
||||
if (i != self->stack.size - 1) {
|
||||
if (ts_subtree_visible(*entry->subtree)) break;
|
||||
const TSSymbol *alias_sequence = ts_language_alias_sequence(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id
|
||||
);
|
||||
if (alias_sequence && alias_sequence[entry->structural_child_index]) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
const TSFieldMapEntry *field_map, *field_map_end;
|
||||
ts_language_field_map(
|
||||
self->tree->language,
|
||||
parent_entry->subtree->ptr->production_id,
|
||||
&field_map, &field_map_end
|
||||
);
|
||||
|
||||
while (field_map < field_map_end) {
|
||||
if (
|
||||
!field_map->inherited &&
|
||||
field_map->child_index == entry->structural_child_index
|
||||
) return field_map->field_id;
|
||||
field_map++;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
|
||||
TSFieldId id = ts_tree_cursor_current_field_id(_self);
|
||||
if (id) {
|
||||
const TreeCursor *self = (const TreeCursor *)_self;
|
||||
return self->tree->language->field_names[id];
|
||||
} else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -12,6 +12,13 @@ bindgen \
|
|||
$header_path > $output_path
|
||||
|
||||
echo "" >> $output_path
|
||||
version_constant='TREE_SITTER_LANGUAGE_VERSION'
|
||||
version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3)
|
||||
echo "pub const $version_constant: usize = $version_number;" >> $output_path
|
||||
|
||||
defines=(
|
||||
TREE_SITTER_LANGUAGE_VERSION
|
||||
TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
|
||||
)
|
||||
|
||||
for define in ${defines[@]}; do
|
||||
define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3)
|
||||
echo "pub const $define: usize = $define_value;" >> $output_path
|
||||
done
|
||||
|
|
|
|||
14
test/fixtures/error_corpus/c_errors.txt
vendored
14
test/fixtures/error_corpus/c_errors.txt
vendored
|
|
@ -14,8 +14,8 @@ int main() {
|
|||
(primitive_type)
|
||||
(function_declarator (identifier) (parameter_list))
|
||||
(compound_statement
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)))))
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))
|
||||
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")))))
|
||||
|
||||
==============================================
|
||||
Top-level declarations with missing semicolons
|
||||
|
|
@ -27,8 +27,8 @@ static int b
|
|||
---
|
||||
|
||||
(translation_unit
|
||||
(declaration (primitive_type) (identifier) (MISSING))
|
||||
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING)))
|
||||
(declaration (primitive_type) (identifier) (MISSING ";"))
|
||||
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";")))
|
||||
|
||||
==========================================
|
||||
Partial declaration lists inside ifdefs
|
||||
|
|
@ -58,7 +58,7 @@ int c() {
|
|||
(comment)
|
||||
(declaration (primitive_type) (identifier))
|
||||
(function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal))))
|
||||
(preproc_ifdef (identifier) (MISSING))))))
|
||||
(preproc_ifdef (identifier) (MISSING "#endif"))))))
|
||||
|
||||
==========================================
|
||||
If statements with incomplete expressions
|
||||
|
|
@ -83,12 +83,12 @@ int main() {
|
|||
(if_statement
|
||||
(parenthesized_expression (field_expression
|
||||
(identifier)
|
||||
(MISSING)))
|
||||
(MISSING field_identifier)))
|
||||
(compound_statement
|
||||
(expression_statement (call_expression (identifier) (argument_list)))
|
||||
(expression_statement (call_expression (identifier) (argument_list)))
|
||||
(if_statement
|
||||
(parenthesized_expression (pointer_expression (MISSING)))
|
||||
(parenthesized_expression (pointer_expression (MISSING identifier)))
|
||||
(expression_statement (call_expression (identifier) (argument_list)))))))))
|
||||
|
||||
====================================
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ Missing object-literal values
|
|||
|
||||
(program (expression_statement (object
|
||||
(pair (property_identifier) (identifier))
|
||||
(pair (property_identifier) (MISSING)))))
|
||||
(pair (property_identifier) (MISSING identifier)))))
|
||||
|
||||
===================================================
|
||||
Extra identifiers in expressions
|
||||
|
|
@ -81,7 +81,7 @@ if ({a: 'b'} {c: 'd'}) {
|
|||
(assignment_expression
|
||||
(identifier)
|
||||
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))
|
||||
(MISSING))
|
||||
(MISSING ";"))
|
||||
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))
|
||||
|
||||
===================================================
|
||||
|
|
@ -153,7 +153,7 @@ const h = `i ${j(k} l`
|
|||
(identifier)
|
||||
(template_string (template_substitution (call_expression
|
||||
(identifier)
|
||||
(arguments (identifier) (MISSING))))))))
|
||||
(arguments (identifier) (MISSING ")"))))))))
|
||||
|
||||
=========================================================
|
||||
Long sequences of invalid tokens
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: expression '+' (math_operation expression • '+' expression)
|
||||
2: (math_operation expression '+' expression) • '+' …
|
||||
1: (math_operation expression '+' expression) • '+' …
|
||||
2: expression '+' (math_operation expression • '+' expression)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: '[' (array_type_repeat1 identifier) • identifier …
|
||||
2: '[' (array_repeat1 identifier) • identifier …
|
||||
1: '[' (array_repeat1 identifier) • identifier …
|
||||
2: '[' (array_type_repeat1 identifier) • identifier …
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: _program_start '[' (array_type_repeat1 identifier) • identifier …
|
||||
2: _program_start '[' (array_repeat1 identifier) • identifier …
|
||||
1: _program_start '[' (array_repeat1 identifier) • identifier …
|
||||
2: _program_start '[' (array_type_repeat1 identifier) • identifier …
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,9 +4,9 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left)
|
||||
1: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left)
|
||||
2: expression '+' (other_thing expression • '*' '*') (precedence: -1, associativity: Left)
|
||||
3: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left)
|
||||
3: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: (unary_b '!' expression) • '<' … (precedence: 2)
|
||||
2: (unary_a '!' expression) • '<' … (precedence: 2)
|
||||
1: (unary_a '!' expression) • '<' … (precedence: 2)
|
||||
2: (unary_b '!' expression) • '<' … (precedence: 2)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
|
|
@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
|
|||
|
||||
Possible interpretations:
|
||||
|
||||
1: identifier (function_call identifier • block) (precedence: 0, associativity: Right)
|
||||
2: identifier (expression identifier) • '{' …
|
||||
1: identifier (expression identifier) • '{' …
|
||||
2: identifier (function_call identifier • block) (precedence: 0, associativity: Right)
|
||||
|
||||
Possible resolutions:
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue