Merge pull request #271 from tree-sitter/node-fields

Add an API for associating field names with child nodes
This commit is contained in:
Max Brunsfeld 2019-03-27 17:44:48 -07:00 committed by GitHub
commit 77636e8fe6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
52 changed files with 2600 additions and 436 deletions

View file

@ -32,7 +32,7 @@ serde = "1.0"
serde_derive = "1.0"
regex-syntax = "0.6.4"
regex = "1"
rsass = "0.9.8"
rsass = "^0.9.8"
[dependencies.tree-sitter]
version = ">= 0.3.7"

View file

@ -161,7 +161,8 @@ fn parse(parser: &mut Parser, example_path: &Path, max_path_length: usize) -> us
}
fn get_language(name: &str) -> Language {
let src_dir = GRAMMARS_DIR.join(name).join("src");
TEST_LOADER
.load_language_at_path(&GRAMMARS_DIR.join(name).join("src"), &HEADER_DIR)
.load_language_at_path(&src_dir, &src_dir)
.unwrap()
}

View file

@ -4,19 +4,20 @@ use crate::error::{Error, Result};
use crate::generate::grammars::{
InlinedProductionMap, LexicalGrammar, SyntaxGrammar, VariableType,
};
use crate::generate::rules::{Alias, Associativity, Symbol, SymbolType};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{Associativity, Symbol, SymbolType};
use crate::generate::tables::{
AliasSequenceId, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
FieldLocation, ParseAction, ParseState, ParseStateId, ParseTable, ParseTableEntry,
ProductionInfo, ProductionInfoId,
};
use core::ops::Range;
use hashbrown::hash_map::Entry;
use hashbrown::{HashMap, HashSet};
use std::collections::hash_map::DefaultHasher;
use std::collections::VecDeque;
use std::u32;
use std::collections::{BTreeMap, VecDeque};
use std::fmt::Write;
use std::hash::Hasher;
use std::u32;
#[derive(Clone)]
struct AuxiliarySymbolInfo {
@ -37,6 +38,7 @@ struct ParseTableBuilder<'a> {
item_set_builder: ParseItemSetBuilder<'a>,
syntax_grammar: &'a SyntaxGrammar,
lexical_grammar: &'a LexicalGrammar,
variable_info: &'a Vec<VariableInfo>,
state_ids_by_item_set: HashMap<ParseItemSet<'a>, ParseStateId>,
item_sets_by_state_id: Vec<ParseItemSet<'a>>,
parse_state_queue: VecDeque<ParseStateQueueEntry>,
@ -47,7 +49,9 @@ struct ParseTableBuilder<'a> {
impl<'a> ParseTableBuilder<'a> {
fn build(mut self) -> Result<ParseTable> {
// Ensure that the empty alias sequence has index 0.
self.parse_table.alias_sequences.push(Vec::new());
self.parse_table
.production_infos
.push(ProductionInfo::default());
// Add the error state at index 0.
self.add_parse_state(&Vec::new(), &Vec::new(), ParseItemSet::default());
@ -176,7 +180,7 @@ impl<'a> ParseTableBuilder<'a> {
precedence: item.precedence(),
associativity: item.associativity(),
dynamic_precedence: item.production.dynamic_precedence,
alias_sequence_id: self.get_alias_sequence_id(item),
production_id: self.get_production_id(item),
}
};
@ -441,13 +445,10 @@ impl<'a> ParseTableBuilder<'a> {
.unwrap();
write!(&mut msg, "Possible interpretations:\n\n").unwrap();
let interpretions = conflicting_items
let mut interpretions = conflicting_items
.iter()
.enumerate()
.map(|(i, item)| {
.map(|item| {
let mut line = String::new();
write!(&mut line, " {}:", i + 1).unwrap();
for preceding_symbol in preceding_symbols
.iter()
.take(preceding_symbols.len() - item.step_index as usize)
@ -503,8 +504,9 @@ impl<'a> ParseTableBuilder<'a> {
.map(|i| i.0.chars().count())
.max()
.unwrap();
for (line, prec_suffix) in interpretions {
interpretions.sort_unstable();
for (i, (line, prec_suffix)) in interpretions.into_iter().enumerate() {
write!(&mut msg, " {}:", i + 1).unwrap();
msg += &line;
if let Some(prec_suffix) = prec_suffix {
for _ in line.chars().count()..max_interpretation_length {
@ -518,11 +520,12 @@ impl<'a> ParseTableBuilder<'a> {
let mut resolution_count = 0;
write!(&mut msg, "\nPossible resolutions:\n\n").unwrap();
let shift_items = conflicting_items
let mut shift_items = conflicting_items
.iter()
.filter(|i| !i.is_done())
.cloned()
.collect::<Vec<_>>();
shift_items.sort_unstable();
if actual_conflict.len() > 1 {
if shift_items.len() > 0 {
resolution_count += 1;
@ -645,29 +648,62 @@ impl<'a> ParseTableBuilder<'a> {
}
}
fn get_alias_sequence_id(&mut self, item: &ParseItem) -> AliasSequenceId {
let mut alias_sequence: Vec<Option<Alias>> = item
.production
.steps
.iter()
.map(|s| s.alias.clone())
.collect();
while alias_sequence.last() == Some(&None) {
alias_sequence.pop();
fn get_production_id(&mut self, item: &ParseItem) -> ProductionInfoId {
let mut production_info = ProductionInfo {
alias_sequence: Vec::new(),
field_map: BTreeMap::new(),
};
for (i, step) in item.production.steps.iter().enumerate() {
production_info.alias_sequence.push(step.alias.clone());
if let Some(field_name) = &step.field_name {
production_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.push(FieldLocation {
index: i,
inherited: false,
});
}
if step.symbol.kind == SymbolType::NonTerminal
&& !self.syntax_grammar.variables[step.symbol.index]
.kind
.is_visible()
{
let info = &self.variable_info[step.symbol.index];
for (field_name, _) in &info.fields {
production_info
.field_map
.entry(field_name.clone())
.or_insert(Vec::new())
.push(FieldLocation {
index: i,
inherited: true,
});
}
}
}
while production_info.alias_sequence.last() == Some(&None) {
production_info.alias_sequence.pop();
}
if item.production.steps.len() > self.parse_table.max_aliased_production_length {
self.parse_table.max_aliased_production_length = item.production.steps.len()
}
if let Some(index) = self
.parse_table
.alias_sequences
.production_infos
.iter()
.position(|seq| *seq == alias_sequence)
.position(|seq| *seq == production_info)
{
index
} else {
self.parse_table.alias_sequences.push(alias_sequence);
self.parse_table.alias_sequences.len() - 1
self.parse_table.production_infos.push(production_info);
self.parse_table.production_infos.len() - 1
}
}
@ -718,6 +754,7 @@ pub(crate) fn build_parse_table(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
inlines: &InlinedProductionMap,
variable_info: &Vec<VariableInfo>,
state_ids_to_log: Vec<usize>,
) -> Result<(ParseTable, Vec<TokenSet>)> {
let item_set_builder = ParseItemSetBuilder::new(syntax_grammar, lexical_grammar, inlines);
@ -734,13 +771,14 @@ pub(crate) fn build_parse_table(
lexical_grammar,
state_ids_to_log,
item_set_builder,
variable_info,
state_ids_by_item_set: HashMap::new(),
item_sets_by_state_id: Vec::new(),
parse_state_queue: VecDeque::new(),
parse_table: ParseTable {
states: Vec::new(),
symbols: Vec::new(),
alias_sequences: Vec::new(),
production_infos: Vec::new(),
max_aliased_production_length: 0,
},
}

View file

@ -20,6 +20,7 @@ lazy_static! {
precedence: 0,
associativity: None,
alias: None,
field_name: None,
}],
};
}

View file

@ -59,7 +59,7 @@ impl<'a> Minimizer<'a> {
ParseAction::ShiftExtra => continue,
ParseAction::Reduce {
child_count: 1,
alias_sequence_id: 0,
production_id: 0,
symbol,
..
} => {

View file

@ -1,5 +1,5 @@
mod build_lex_table;
mod build_parse_table;
pub(crate) mod build_lex_table;
pub(crate) mod build_parse_table;
mod coincident_tokens;
mod item;
mod item_set_builder;
@ -15,6 +15,7 @@ use self::token_conflicts::TokenConflictMap;
use crate::error::Result;
use crate::generate::grammars::{InlinedProductionMap, LexicalGrammar, SyntaxGrammar};
use crate::generate::nfa::{CharacterSet, NfaCursor};
use crate::generate::node_types::VariableInfo;
use crate::generate::rules::{AliasMap, Symbol, SymbolType};
use crate::generate::tables::{LexTable, ParseAction, ParseTable, ParseTableEntry};
use log::info;
@ -23,12 +24,18 @@ pub(crate) fn build_tables(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
inlines: &InlinedProductionMap,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<(ParseTable, LexTable, LexTable, Option<Symbol>)> {
let (mut parse_table, following_tokens) =
build_parse_table(syntax_grammar, lexical_grammar, inlines, state_ids_to_log)?;
let (mut parse_table, following_tokens) = build_parse_table(
syntax_grammar,
lexical_grammar,
inlines,
variable_info,
state_ids_to_log,
)?;
let token_conflict_map = TokenConflictMap::new(lexical_grammar, following_tokens);
let coincident_token_index = CoincidentTokenIndex::new(&parse_table, lexical_grammar);
let keywords = identify_keywords(

View file

@ -32,6 +32,14 @@ function blank() {
};
}
function field(name, rule) {
return {
type: "FIELD",
name: name,
content: normalize(rule)
}
}
function choice(...elements) {
return {
type: "CHOICE",
@ -204,137 +212,154 @@ function RuleBuilder(ruleMap) {
}
function grammar(baseGrammar, options) {
if (!options) {
options = baseGrammar;
baseGrammar = {
name: null,
rules: {},
extras: [normalize(/\s/)],
conflicts: [],
externals: [],
inline: []
};
if (!options) {
options = baseGrammar;
baseGrammar = {
name: null,
rules: {},
extras: [normalize(/\s/)],
conflicts: [],
externals: [],
inline: [],
supertypes: []
};
}
let externals = baseGrammar.externals;
if (options.externals) {
if (typeof options.externals !== "function") {
throw new Error("Grammar's 'externals' property must be a function.");
}
let externals = baseGrammar.externals;
if (options.externals) {
if (typeof options.externals !== "function") {
throw new Error("Grammar's 'externals' property must be a function.");
const externalsRuleBuilder = RuleBuilder(null)
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
if (!Array.isArray(externalRules)) {
throw new Error("Grammar's 'externals' property must return an array of rules.");
}
externals = externalRules.map(normalize);
}
const ruleMap = {};
for (const key in options.rules) {
ruleMap[key] = true;
}
for (const key in baseGrammar.rules) {
ruleMap[key] = true;
}
for (const external of externals) {
if (typeof external.name === 'string') {
ruleMap[external.name] = true;
}
}
const ruleBuilder = RuleBuilder(ruleMap);
const name = options.name;
if (typeof name !== "string") {
throw new Error("Grammar's 'name' property must be a string.");
}
if (!/^[a-zA-Z_]\w*$/.test(name)) {
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
}
let rules = Object.assign({}, baseGrammar.rules);
if (options.rules) {
if (typeof options.rules !== "object") {
throw new Error("Grammar's 'rules' property must be an object.");
}
for (const ruleName in options.rules) {
const ruleFn = options.rules[ruleName];
if (typeof ruleFn !== "function") {
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
}
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
}
}
const externalsRuleBuilder = RuleBuilder(null)
const externalRules = options.externals.call(externalsRuleBuilder, externalsRuleBuilder, baseGrammar.externals);
if (!Array.isArray(externalRules)) {
throw new Error("Grammar's 'externals' property must return an array of rules.");
}
externals = externalRules.map(normalize);
let extras = baseGrammar.extras.slice();
if (options.extras) {
if (typeof options.extras !== "function") {
throw new Error("Grammar's 'extras' property must be a function.");
}
const ruleMap = {};
for (const key in options.rules) {
ruleMap[key] = true;
extras = options.extras
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
.map(normalize);
}
let word = baseGrammar.word;
if (options.word) {
word = options.word.call(ruleBuilder, ruleBuilder).name;
if (typeof word != 'string') {
throw new Error("Grammar's 'word' property must be a named rule.");
}
for (const key in baseGrammar.rules) {
ruleMap[key] = true;
}
for (const external of externals) {
if (typeof external.name === 'string') {
ruleMap[external.name] = true;
}
}
let conflicts = baseGrammar.conflicts;
if (options.conflicts) {
if (typeof options.conflicts !== "function") {
throw new Error("Grammar's 'conflicts' property must be a function.");
}
const ruleBuilder = RuleBuilder(ruleMap);
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
const name = options.name;
if (typeof name !== "string") {
throw new Error("Grammar's 'name' property must be a string.");
if (!Array.isArray(conflictRules)) {
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
}
if (!/^[a-zA-Z_]\w*$/.test(name)) {
throw new Error("Grammar's 'name' property must not start with a digit and cannot contain non-word characters.");
}
let rules = Object.assign({}, baseGrammar.rules);
if (options.rules) {
if (typeof options.rules !== "object") {
throw new Error("Grammar's 'rules' property must be an object.");
}
for (const ruleName in options.rules) {
const ruleFn = options.rules[ruleName];
if (typeof ruleFn !== "function") {
throw new Error("Grammar rules must all be functions. '" + ruleName + "' rule is not.");
}
rules[ruleName] = normalize(ruleFn.call(ruleBuilder, ruleBuilder, baseGrammar.rules[ruleName]));
}
}
let extras = baseGrammar.extras.slice();
if (options.extras) {
if (typeof options.extras !== "function") {
throw new Error("Grammar's 'extras' property must be a function.");
}
extras = options.extras
.call(ruleBuilder, ruleBuilder, baseGrammar.extras)
.map(normalize);
}
let word = baseGrammar.word;
if (options.word) {
word = options.word.call(ruleBuilder, ruleBuilder).name;
if (typeof word != 'string') {
throw new Error("Grammar's 'word' property must be a named rule.");
}
}
let conflicts = baseGrammar.conflicts;
if (options.conflicts) {
if (typeof options.conflicts !== "function") {
throw new Error("Grammar's 'conflicts' property must be a function.");
}
const baseConflictRules = baseGrammar.conflicts.map(conflict => conflict.map(sym));
const conflictRules = options.conflicts.call(ruleBuilder, ruleBuilder, baseConflictRules);
if (!Array.isArray(conflictRules)) {
conflicts = conflictRules.map(conflictSet => {
if (!Array.isArray(conflictSet)) {
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
}
conflicts = conflictRules.map(conflictSet => {
if (!Array.isArray(conflictSet)) {
throw new Error("Grammar's conflicts must be an array of arrays of rules.");
}
return conflictSet.map(symbol => normalize(symbol).name);
});
}
let inline = baseGrammar.inline;
if (options.inline) {
if (typeof options.inline !== "function") {
throw new Error("Grammar's 'inline' property must be a function.");
}
const baseInlineRules = baseGrammar.inline.map(sym);
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
if (!Array.isArray(inlineRules)) {
throw new Error("Grammar's inline must be an array of rules.");
}
inline = inlineRules.map(symbol => symbol.name);
}
if (Object.keys(rules).length == 0) {
throw new Error("Grammar must have at least one rule.");
}
return {name, word, rules, extras, conflicts, externals, inline};
return conflictSet.map(symbol => normalize(symbol).name);
});
}
let inline = baseGrammar.inline;
if (options.inline) {
if (typeof options.inline !== "function") {
throw new Error("Grammar's 'inline' property must be a function.");
}
const baseInlineRules = baseGrammar.inline.map(sym);
const inlineRules = options.inline.call(ruleBuilder, ruleBuilder, baseInlineRules);
if (!Array.isArray(inlineRules)) {
throw new Error("Grammar's inline must be an array of rules.");
}
inline = inlineRules.map(symbol => symbol.name);
}
let supertypes = baseGrammar.supertypes;
if (options.supertypes) {
if (typeof options.supertypes !== "function") {
throw new Error("Grammar's 'supertypes' property must be a function.");
}
const baseSupertypeRules = baseGrammar.supertypes.map(sym);
const supertypeRules = options.supertypes.call(ruleBuilder, ruleBuilder, baseSupertypeRules);
if (!Array.isArray(supertypeRules)) {
throw new Error("Grammar's supertypes must be an array of rules.");
}
supertypes = supertypeRules.map(symbol => symbol.name);
}
if (Object.keys(rules).length == 0) {
throw new Error("Grammar must have at least one rule.");
}
return {name, word, rules, extras, conflicts, externals, inline, supertypes};
}
function checkArguments(ruleCount, caller, callerName, suffix = '') {
if (ruleCount > 1) {
const error = new Error([
@ -357,6 +382,7 @@ global.seq = seq;
global.sym = sym;
global.token = token;
global.grammar = grammar;
global.field = field;
const result = require(process.env.TREE_SITTER_GRAMMAR_PATH);
console.log(JSON.stringify(result, null, 2));

View file

@ -27,6 +27,7 @@ pub(crate) struct InputGrammar {
pub expected_conflicts: Vec<Vec<String>>,
pub external_tokens: Vec<Rule>,
pub variables_to_inline: Vec<String>,
pub supertype_symbols: Vec<String>,
pub word_token: Option<String>,
}
@ -54,6 +55,7 @@ pub(crate) struct ProductionStep {
pub precedence: i32,
pub associativity: Option<Associativity>,
pub alias: Option<Alias>,
pub field_name: Option<String>,
}
#[derive(Clone, Debug, PartialEq, Eq)]
@ -87,6 +89,7 @@ pub(crate) struct SyntaxGrammar {
pub extra_tokens: Vec<Symbol>,
pub expected_conflicts: Vec<Vec<Symbol>>,
pub external_tokens: Vec<ExternalToken>,
pub supertype_symbols: Vec<Symbol>,
pub variables_to_inline: Vec<Symbol>,
pub word_token: Option<Symbol>,
}
@ -99,6 +102,7 @@ impl ProductionStep {
precedence: 0,
associativity: None,
alias: None,
field_name: None,
}
}
@ -108,6 +112,7 @@ impl ProductionStep {
precedence,
associativity,
alias: self.alias,
field_name: self.field_name,
}
}
@ -120,6 +125,16 @@ impl ProductionStep {
value: value.to_string(),
is_named,
}),
field_name: self.field_name,
}
}
pub(crate) fn with_field_name(self, name: &str) -> Self {
Self {
symbol: self.symbol,
precedence: self.precedence,
associativity: self.associativity,
alias: self.alias,
field_name: Some(name.to_string()),
}
}
}
@ -174,6 +189,12 @@ impl Variable {
}
}
impl VariableType {
pub fn is_visible(&self) -> bool {
*self == VariableType::Named || *self == VariableType::Anonymous
}
}
impl LexicalGrammar {
pub fn variable_indices_for_nfa_states<'a>(
&'a self,

View file

@ -13,6 +13,7 @@ use std::process::{Command, Stdio};
mod build_tables;
mod grammars;
mod nfa;
mod node_types;
mod npm_files;
mod parse_grammar;
mod prepare_grammar;
@ -27,6 +28,12 @@ lazy_static! {
.unwrap();
}
struct GeneratedParser {
name: String,
c_code: String,
node_types_json: String,
}
pub fn generate_parser_in_directory(
repo_path: &PathBuf,
grammar_path: Option<&str>,
@ -51,10 +58,16 @@ pub fn generate_parser_in_directory(
}
}
let (language_name, c_code) =
generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
let GeneratedParser {
name: language_name,
c_code,
node_types_json,
} = generate_parser_for_grammar_with_opts(&grammar_json, minimize, state_ids_to_log)?;
fs::write(&repo_src_path.join("parser.c"), c_code)
.map_err(|e| format!("Failed to write parser.c: {}", e))?;
fs::write(&repo_src_path.join("node-types.json"), node_types_json)
.map_err(|e| format!("Failed to write parser.c: {}", e))?;
fs::write(
&repo_header_path.join("parser.h"),
tree_sitter::PARSER_HEADER,
@ -74,27 +87,37 @@ pub fn generate_parser_in_directory(
pub fn generate_parser_for_grammar(grammar_json: &str) -> Result<(String, String)> {
let grammar_json = JSON_COMMENT_REGEX.replace_all(grammar_json, "\n");
generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())
let parser = generate_parser_for_grammar_with_opts(&grammar_json, true, Vec::new())?;
Ok((parser.name, parser.c_code))
}
fn generate_parser_for_grammar_with_opts(
grammar_json: &str,
minimize: bool,
state_ids_to_log: Vec<usize>,
) -> Result<(String, String)> {
) -> Result<GeneratedParser> {
let input_grammar = parse_grammar(grammar_json)?;
let (syntax_grammar, lexical_grammar, inlines, simple_aliases) =
prepare_grammar(&input_grammar)?;
let variable_info = node_types::get_variable_info(&syntax_grammar, &lexical_grammar)?;
let node_types_json = node_types::generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
);
let (parse_table, main_lex_table, keyword_lex_table, keyword_capture_token) = build_tables(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
&inlines,
minimize,
state_ids_to_log,
)?;
let name = input_grammar.name;
let c_code = render_c_code(
&input_grammar.name,
&name,
parse_table,
main_lex_table,
keyword_lex_table,
@ -103,7 +126,11 @@ fn generate_parser_for_grammar_with_opts(
lexical_grammar,
simple_aliases,
);
Ok((input_grammar.name, c_code))
Ok(GeneratedParser {
name,
c_code,
node_types_json: serde_json::to_string_pretty(&node_types_json).unwrap(),
})
}
fn load_grammar_file(grammar_path: &Path) -> Result<String> {

View file

@ -0,0 +1,842 @@
use super::grammars::{LexicalGrammar, SyntaxGrammar, VariableType};
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use crate::error::{Error, Result};
use hashbrown::HashMap;
use serde_derive::Serialize;
use std::collections::BTreeMap;
use std::mem;
#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub(crate) enum ChildType {
Normal(Symbol),
Aliased(Alias),
}
#[derive(Clone, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldInfo {
pub required: bool,
pub multiple: bool,
pub types: Vec<ChildType>,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct VariableInfo {
pub fields: HashMap<String, FieldInfo>,
pub child_types: Vec<ChildType>,
pub has_multi_step_production: bool,
}
#[derive(Debug, Serialize, PartialEq, Eq, Default)]
pub(crate) struct NodeInfoJSON {
#[serde(rename = "type")]
kind: String,
named: bool,
#[serde(skip_serializing_if = "Option::is_none")]
fields: Option<BTreeMap<String, FieldInfoJSON>>,
#[serde(skip_serializing_if = "Option::is_none")]
subtypes: Option<Vec<NodeTypeJSON>>,
}
#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
pub(crate) struct NodeTypeJSON {
#[serde(rename = "type")]
kind: String,
named: bool,
}
#[derive(Debug, Serialize, PartialEq, Eq)]
pub(crate) struct FieldInfoJSON {
multiple: bool,
required: bool,
types: Vec<NodeTypeJSON>,
}
pub(crate) fn get_variable_info(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
) -> Result<Vec<VariableInfo>> {
let mut result = Vec::new();
// Determine which field names and child node types can appear directly
// within each type of node.
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
let mut info = VariableInfo {
fields: HashMap::new(),
child_types: Vec::new(),
has_multi_step_production: false,
};
let is_recursive = variable
.productions
.iter()
.any(|p| p.steps.iter().any(|s| s.symbol == Symbol::non_terminal(i)));
for production in &variable.productions {
if production.steps.len() > 1 {
info.has_multi_step_production = true;
}
for step in &production.steps {
let child_type = if let Some(alias) = &step.alias {
ChildType::Aliased(alias.clone())
} else {
ChildType::Normal(step.symbol)
};
if let Some(field_name) = &step.field_name {
let field_info = info.fields.entry(field_name.clone()).or_insert(FieldInfo {
multiple: false,
required: true,
types: Vec::new(),
});
field_info.multiple |= is_recursive;
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
}
}
if let Err(i) = info.child_types.binary_search(&child_type) {
info.child_types.insert(i, child_type.clone());
}
}
}
for production in &variable.productions {
let production_fields: Vec<&String> = production
.steps
.iter()
.filter_map(|s| s.field_name.as_ref())
.collect();
for (field_name, field_info) in info.fields.iter_mut() {
if !production_fields.contains(&field_name) {
field_info.required = false;
}
}
}
result.push(info);
}
// Expand each node type's information recursively to inherit the properties of
// hidden children.
let mut done = false;
while !done {
done = true;
for (i, variable) in syntax_grammar.variables.iter().enumerate() {
// Move this variable's info out of the vector so it can be modified
// while reading from other entries of the vector.
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
for production in &variable.productions {
for step in &production.steps {
let child_symbol = step.symbol;
if step.alias.is_none()
&& child_symbol.kind == SymbolType::NonTerminal
&& !syntax_grammar.variables[child_symbol.index]
.kind
.is_visible()
{
let child_variable_info = &result[child_symbol.index];
// If a hidden child can have multiple children, then this
// node can appear to have multiple children.
if child_variable_info.has_multi_step_production {
variable_info.has_multi_step_production = true;
}
// Inherit fields from this hidden child
for (field_name, child_field_info) in &child_variable_info.fields {
let field_info = variable_info
.fields
.entry(field_name.clone())
.or_insert_with(|| {
done = false;
child_field_info.clone()
});
if child_field_info.multiple && !field_info.multiple {
field_info.multiple = child_field_info.multiple;
done = false;
}
if !child_field_info.required && field_info.required {
field_info.required = child_field_info.required;
done = false;
}
for child_type in &child_field_info.types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
if !syntax_grammar.supertype_symbols.contains(&child_symbol) {
// Inherit child types from this hidden child
for child_type in &child_variable_info.child_types {
if let Err(i) = variable_info.child_types.binary_search(&child_type)
{
variable_info.child_types.insert(i, child_type.clone());
done = false;
}
}
// If any field points to this hidden child, inherit child types
// for the field.
if let Some(field_name) = &step.field_name {
let field_info = variable_info.fields.get_mut(field_name).unwrap();
for child_type in &child_variable_info.child_types {
if let Err(i) = field_info.types.binary_search(&child_type) {
field_info.types.insert(i, child_type.clone());
done = false;
}
}
}
}
}
}
}
// Move this variable's info back into the vector.
result[i] = variable_info;
}
}
for supertype_symbol in &syntax_grammar.supertype_symbols {
let variable = &syntax_grammar.variables[supertype_symbol.index];
if variable.kind != VariableType::Hidden {
return Err(Error::grammar(&format!(
"Supertype symbols must be hidden, but `{}` is not",
variable.name
)));
}
if result[supertype_symbol.index].has_multi_step_production {
return Err(Error::grammar(&format!(
"Supertype symbols must always have a single visible child, but `{}` can have multiple",
variable.name
)));
}
}
let child_type_is_visible = |child_type: &ChildType| match child_type {
ChildType::Aliased(_) => true,
ChildType::Normal(symbol) => {
if syntax_grammar.supertype_symbols.contains(&symbol) {
return true;
}
let variable_kind = match symbol.kind {
SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
_ => VariableType::Hidden,
};
variable_kind.is_visible()
}
};
for supertype_symbol in &syntax_grammar.supertype_symbols {
result[supertype_symbol.index]
.child_types
.retain(child_type_is_visible);
}
for i in 0..result.len() {
let mut variable_info = VariableInfo::default();
mem::swap(&mut variable_info, &mut result[i]);
// For each field, make the `types` list more concise by replacing sets of
// subtypes with a single supertype.
for (_, field_info) in variable_info.fields.iter_mut() {
for supertype_symbol in &syntax_grammar.supertype_symbols {
if sorted_vec_replace(
&mut field_info.types,
&result[supertype_symbol.index].child_types,
ChildType::Normal(*supertype_symbol),
) {
break;
}
}
field_info.types.retain(child_type_is_visible);
}
result[i] = variable_info;
}
Ok(result)
}
fn sorted_vec_replace<T>(left: &mut Vec<T>, right: &Vec<T>, value: T) -> bool
where
T: Eq + Ord,
{
let mut i = 0;
for right_elem in right.iter() {
while left[i] < *right_elem {
i += 1;
if i == left.len() {
return false;
}
}
if left[i] != *right_elem {
return false;
}
}
i = 0;
left.retain(|left_elem| {
if i == right.len() {
return true;
}
while right[i] < *left_elem {
i += 1;
if i == right.len() {
return true;
}
}
right[i] != *left_elem
});
if let Err(i) = left.binary_search(&value) {
left.insert(i, value);
}
true
}
pub(crate) fn generate_node_types_json(
syntax_grammar: &SyntaxGrammar,
lexical_grammar: &LexicalGrammar,
simple_aliases: &AliasMap,
variable_info: &Vec<VariableInfo>,
) -> Vec<NodeInfoJSON> {
let mut node_types_json = BTreeMap::new();
let child_type_to_node_type = |child_type: &ChildType| match child_type {
ChildType::Aliased(alias) => NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
},
ChildType::Normal(symbol) => {
if let Some(alias) = simple_aliases.get(&symbol) {
NodeTypeJSON {
kind: alias.value.clone(),
named: alias.is_named,
}
} else {
match symbol.kind {
SymbolType::NonTerminal => {
let variable = &syntax_grammar.variables[symbol.index];
NodeTypeJSON {
kind: variable.name.clone(),
named: variable.kind != VariableType::Anonymous,
}
}
SymbolType::Terminal => {
let variable = &lexical_grammar.variables[symbol.index];
NodeTypeJSON {
kind: variable.name.clone(),
named: variable.kind != VariableType::Anonymous,
}
}
SymbolType::External => {
let variable = &syntax_grammar.external_tokens[symbol.index];
NodeTypeJSON {
kind: variable.name.clone(),
named: variable.kind != VariableType::Anonymous,
}
}
_ => panic!("Unexpected symbol type"),
}
}
}
};
for (i, info) in variable_info.iter().enumerate() {
let symbol = Symbol::non_terminal(i);
let variable = &syntax_grammar.variables[i];
let name = simple_aliases
.get(&Symbol::non_terminal(i))
.map_or(&variable.name, |alias| &alias.value);
if syntax_grammar.supertype_symbols.contains(&symbol) {
let node_type_json =
node_types_json
.entry(name.clone())
.or_insert_with(|| NodeInfoJSON {
kind: name.clone(),
named: true,
fields: None,
subtypes: None,
});
let mut subtypes = info
.child_types
.iter()
.map(child_type_to_node_type)
.collect::<Vec<_>>();
subtypes.sort_unstable();
subtypes.dedup();
node_type_json.subtypes = Some(subtypes);
} else if variable.kind.is_visible() {
let node_type_json =
node_types_json
.entry(name.clone())
.or_insert_with(|| NodeInfoJSON {
kind: name.clone(),
named: true,
fields: None,
subtypes: None,
});
let mut fields_json = BTreeMap::new();
for (field, field_info) in info.fields.iter() {
let field_info_json = fields_json.entry(field.clone()).or_insert(FieldInfoJSON {
multiple: false,
required: true,
types: Vec::new(),
});
field_info_json.multiple |= field_info.multiple;
field_info_json.required &= field_info.required;
field_info_json
.types
.extend(field_info.types.iter().map(child_type_to_node_type));
field_info_json.types.sort_unstable();
field_info_json.types.dedup();
}
node_type_json.fields = Some(fields_json);
}
}
let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
for variable in &lexical_grammar.variables {
if variable.kind == VariableType::Named {
result.push(NodeInfoJSON {
kind: variable.name.clone(),
named: true,
fields: None,
subtypes: None,
});
} else if variable.kind == VariableType::Anonymous {
result.push(NodeInfoJSON {
kind: variable.name.clone(),
named: false,
fields: None,
subtypes: None,
});
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
use crate::generate::grammars::{
InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
};
use crate::generate::prepare_grammar::prepare_grammar;
use crate::generate::rules::Rule;
#[test]
fn test_node_types_simple() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec![],
variables: vec![
Variable {
name: "v1".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("f1".to_string(), Rule::named("v2")),
Rule::field("f2".to_string(), Rule::string(";")),
]),
},
Variable {
name: "v2".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
],
});
assert_eq!(
node_types[0],
NodeInfoJSON {
kind: "v1".to_string(),
named: true,
subtypes: None,
fields: Some(
vec![
(
"f1".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: "v2".to_string(),
named: true,
}]
}
),
(
"f2".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: ";".to_string(),
named: false,
}]
}
),
]
.into_iter()
.collect()
)
}
);
assert_eq!(
node_types[1],
NodeInfoJSON {
kind: ";".to_string(),
named: false,
subtypes: None,
fields: None
}
);
assert_eq!(
node_types[2],
NodeInfoJSON {
kind: "v2".to_string(),
named: true,
subtypes: None,
fields: None
}
);
}
#[test]
fn test_node_types_with_supertypes() {
let node_types = get_node_types(InputGrammar {
name: String::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
word_token: None,
supertype_symbols: vec!["_v2".to_string()],
variables: vec![
Variable {
name: "v1".to_string(),
kind: VariableType::Named,
rule: Rule::field("f1".to_string(), Rule::named("_v2")),
},
Variable {
name: "_v2".to_string(),
kind: VariableType::Hidden,
rule: Rule::choice(vec![
Rule::named("v3"),
Rule::named("v4"),
Rule::string("*"),
]),
},
Variable {
name: "v3".to_string(),
kind: VariableType::Named,
rule: Rule::string("x"),
},
Variable {
name: "v4".to_string(),
kind: VariableType::Named,
rule: Rule::string("y"),
},
],
});
assert_eq!(
node_types[0],
NodeInfoJSON {
kind: "_v2".to_string(),
named: true,
fields: None,
subtypes: Some(vec![
NodeTypeJSON {
kind: "*".to_string(),
named: false,
},
NodeTypeJSON {
kind: "v3".to_string(),
named: true,
},
NodeTypeJSON {
kind: "v4".to_string(),
named: true,
},
]),
}
);
assert_eq!(
node_types[1],
NodeInfoJSON {
kind: "v1".to_string(),
named: true,
subtypes: None,
fields: Some(
vec![(
"f1".to_string(),
FieldInfoJSON {
multiple: false,
required: true,
types: vec![NodeTypeJSON {
kind: "_v2".to_string(),
named: true,
}]
}
),]
.into_iter()
.collect()
)
}
);
}
#[test]
fn test_get_variable_info() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
// Required field `field1` has only one node type.
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
],
}],
},
// Hidden node
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(1))],
}],
},
// Optional field `field2` can have two possible node types.
SyntaxVariable {
name: "rule2".to_string(),
kind: VariableType::Named,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(0))],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(2))
.with_field_name("field2"),
],
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::terminal(3))
.with_field_name("field2"),
],
},
],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
assert_eq!(
variable_info[2].fields,
vec![(
"field2".to_string(),
FieldInfo {
required: false,
multiple: false,
types: vec![
ChildType::Normal(Symbol::terminal(2)),
ChildType::Normal(Symbol::terminal(3)),
],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_inherited_fields() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1)),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
// Hidden node with fields
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
],
}],
},
],
vec![],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::terminal(3))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
#[test]
fn test_get_variable_info_with_supertypes() {
let variable_info = get_variable_info(
&build_syntax_grammar(
vec![
SyntaxVariable {
name: "rule0".to_string(),
kind: VariableType::Named,
productions: vec![Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(0)),
ProductionStep::new(Symbol::non_terminal(1))
.with_field_name("field1"),
ProductionStep::new(Symbol::terminal(1)),
],
}],
},
SyntaxVariable {
name: "_rule1".to_string(),
kind: VariableType::Hidden,
productions: vec![
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(2))],
},
Production {
dynamic_precedence: 0,
steps: vec![ProductionStep::new(Symbol::terminal(3))],
},
],
},
],
// _rule1 is a supertype
vec![Symbol::non_terminal(1)],
),
&build_lexical_grammar(),
)
.unwrap();
assert_eq!(
variable_info[0].fields,
vec![(
"field1".to_string(),
FieldInfo {
required: true,
multiple: false,
types: vec![ChildType::Normal(Symbol::non_terminal(1))],
}
)]
.into_iter()
.collect::<HashMap<_, _>>()
);
}
fn get_node_types(grammar: InputGrammar) -> Vec<NodeInfoJSON> {
let (syntax_grammar, lexical_grammar, _, simple_aliases) =
prepare_grammar(&grammar).unwrap();
let variable_info = get_variable_info(&syntax_grammar, &lexical_grammar).unwrap();
generate_node_types_json(
&syntax_grammar,
&lexical_grammar,
&simple_aliases,
&variable_info,
)
}
fn build_syntax_grammar(
variables: Vec<SyntaxVariable>,
supertype_symbols: Vec<Symbol>,
) -> SyntaxGrammar {
let mut syntax_grammar = SyntaxGrammar::default();
syntax_grammar.variables = variables;
syntax_grammar.supertype_symbols = supertype_symbols;
syntax_grammar
}
fn build_lexical_grammar() -> LexicalGrammar {
let mut lexical_grammar = LexicalGrammar::default();
for i in 0..10 {
lexical_grammar.variables.push(LexicalVariable {
name: format!("token_{}", i),
kind: VariableType::Named,
implicit_precedence: 0,
start_state: 0,
});
}
lexical_grammar
}
}

View file

@ -26,6 +26,10 @@ enum RuleJSON {
CHOICE {
members: Vec<RuleJSON>,
},
FIELD {
name: String,
content: Box<RuleJSON>,
},
SEQ {
members: Vec<RuleJSON>,
},
@ -67,6 +71,7 @@ struct GrammarJSON {
externals: Option<Vec<RuleJSON>>,
extras: Option<Vec<RuleJSON>>,
inline: Option<Vec<String>>,
supertypes: Option<Vec<String>>,
word: Option<String>,
}
@ -96,6 +101,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
.collect();
let expected_conflicts = grammar_json.conflicts.unwrap_or(Vec::new());
let variables_to_inline = grammar_json.inline.unwrap_or(Vec::new());
let supertype_symbols = grammar_json.supertypes.unwrap_or(Vec::new());
Ok(InputGrammar {
name: grammar_json.name,
@ -104,6 +110,7 @@ pub(crate) fn parse_grammar(input: &str) -> Result<InputGrammar> {
extra_tokens,
expected_conflicts,
external_tokens,
supertype_symbols,
variables_to_inline,
})
}
@ -120,6 +127,7 @@ fn parse_rule(json: RuleJSON) -> Rule {
RuleJSON::PATTERN { value } => Rule::Pattern(value),
RuleJSON::SYMBOL { name } => Rule::NamedSymbol(name),
RuleJSON::CHOICE { members } => Rule::choice(members.into_iter().map(parse_rule).collect()),
RuleJSON::FIELD { content, name } => Rule::field(name, parse_rule(*content)),
RuleJSON::SEQ { members } => Rule::seq(members.into_iter().map(parse_rule).collect()),
RuleJSON::REPEAT1 { content } => Rule::repeat(parse_rule(*content)),
RuleJSON::REPEAT { content } => {

View file

@ -235,6 +235,7 @@ mod tests {
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
}
}

View file

@ -149,6 +149,7 @@ mod tests {
extra_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
external_tokens: Vec::new(),
word_token: None,
};

View file

@ -77,6 +77,12 @@ pub(super) fn extract_tokens(
})
.collect();
let supertype_symbols = grammar
.supertype_symbols
.into_iter()
.map(|symbol| symbol_replacer.replace_symbol(symbol))
.collect();
let variables_to_inline = grammar
.variables_to_inline
.into_iter()
@ -154,6 +160,7 @@ pub(super) fn extract_tokens(
expected_conflicts,
extra_tokens,
variables_to_inline,
supertype_symbols,
external_tokens,
word_token,
},
@ -519,6 +526,7 @@ mod test {
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
}
}

View file

@ -11,6 +11,7 @@ struct RuleFlattener {
precedence_stack: Vec<i32>,
associativity_stack: Vec<Associativity>,
alias_stack: Vec<Alias>,
field_name_stack: Vec<String>,
}
impl RuleFlattener {
@ -23,6 +24,7 @@ impl RuleFlattener {
precedence_stack: Vec::new(),
associativity_stack: Vec::new(),
alias_stack: Vec::new(),
field_name_stack: Vec::new(),
}
}
@ -60,6 +62,12 @@ impl RuleFlattener {
self.alias_stack.push(alias);
}
let mut has_field_name = false;
if let Some(field_name) = params.field_name {
has_field_name = true;
self.field_name_stack.push(field_name);
}
if params.dynamic_precedence.abs() > self.production.dynamic_precedence.abs() {
self.production.dynamic_precedence = params.dynamic_precedence;
}
@ -86,6 +94,10 @@ impl RuleFlattener {
self.alias_stack.pop();
}
if has_field_name {
self.field_name_stack.pop();
}
did_push
}
Rule::Symbol(symbol) => {
@ -94,6 +106,7 @@ impl RuleFlattener {
precedence: self.precedence_stack.last().cloned().unwrap_or(0),
associativity: self.associativity_stack.last().cloned(),
alias: self.alias_stack.last().cloned(),
field_name: self.field_name_stack.last().cloned(),
});
true
}
@ -190,6 +203,7 @@ unless they are used only as the grammar's start rule.
expected_conflicts: grammar.expected_conflicts,
variables_to_inline: grammar.variables_to_inline,
external_tokens: grammar.external_tokens,
supertype_symbols: grammar.supertype_symbols,
word_token: grammar.word_token,
variables,
})
@ -355,4 +369,42 @@ mod tests {
}]
);
}
#[test]
fn test_flatten_grammar_with_field_names() {
let result = flatten_variable(Variable {
name: "test".to_string(),
kind: VariableType::Named,
rule: Rule::seq(vec![
Rule::field("first-thing".to_string(), Rule::terminal(1)),
Rule::terminal(2),
Rule::choice(vec![
Rule::Blank,
Rule::field("second-thing".to_string(), Rule::terminal(3)),
]),
]),
})
.unwrap();
assert_eq!(
result.productions,
vec![
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
ProductionStep::new(Symbol::terminal(2))
]
},
Production {
dynamic_precedence: 0,
steps: vec![
ProductionStep::new(Symbol::terminal(1)).with_field_name("first-thing"),
ProductionStep::new(Symbol::terminal(2)),
ProductionStep::new(Symbol::terminal(3)).with_field_name("second-thing"),
]
},
]
);
}
}

View file

@ -35,6 +35,15 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
extra_tokens.push(interner.intern_rule(extra_token)?);
}
let mut supertype_symbols = Vec::with_capacity(grammar.supertype_symbols.len());
for supertype_symbol_name in grammar.supertype_symbols.iter() {
supertype_symbols.push(
interner
.intern_name(supertype_symbol_name)
.ok_or_else(|| Error::undefined_symbol(supertype_symbol_name))?,
);
}
let mut expected_conflicts = Vec::new();
for conflict in grammar.expected_conflicts.iter() {
let mut interned_conflict = Vec::with_capacity(conflict.len());
@ -70,6 +79,7 @@ pub(super) fn intern_symbols(grammar: &InputGrammar) -> Result<InternedGrammar>
extra_tokens,
expected_conflicts,
variables_to_inline,
supertype_symbols,
word_token,
})
}
@ -230,6 +240,7 @@ mod tests {
external_tokens: Vec::new(),
expected_conflicts: Vec::new(),
variables_to_inline: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
}
}

View file

@ -25,6 +25,7 @@ pub(crate) struct IntermediateGrammar<T, U> {
expected_conflicts: Vec<Vec<Symbol>>,
external_tokens: Vec<U>,
variables_to_inline: Vec<Symbol>,
supertype_symbols: Vec<Symbol>,
word_token: Option<Symbol>,
}

View file

@ -114,6 +114,11 @@ impl InlinedProductionMapBuilder {
inserted_step.alias = Some(alias.clone());
}
}
if let Some(field_name) = removed_step.field_name {
for inserted_step in inserted_steps.iter_mut() {
inserted_step.field_name = Some(field_name.clone());
}
}
if let Some(last_inserted_step) = inserted_steps.last_mut() {
if last_inserted_step.precedence == 0 {
last_inserted_step.precedence = removed_step.precedence;
@ -193,6 +198,7 @@ mod tests {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
variables_to_inline: vec![Symbol::non_terminal(1)],
variables: vec![
@ -323,6 +329,7 @@ mod tests {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
};
let inline_map = process_inlines(&grammar);
@ -424,6 +431,7 @@ mod tests {
expected_conflicts: Vec::new(),
extra_tokens: Vec::new(),
external_tokens: Vec::new(),
supertype_symbols: Vec::new(),
word_token: None,
};

View file

@ -1,11 +1,14 @@
use super::grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType};
use super::nfa::CharacterSet;
use super::rules::{Alias, AliasMap, Symbol, SymbolType};
use super::tables::{AdvanceAction, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry};
use super::tables::{
AdvanceAction, FieldLocation, LexState, LexTable, ParseAction, ParseTable, ParseTableEntry,
};
use core::ops::Range;
use hashbrown::{HashMap, HashSet};
use std::fmt::Write;
use std::mem::swap;
use tree_sitter::LANGUAGE_VERSION;
macro_rules! add {
($this: tt, $($arg: tt)*) => {{
@ -56,10 +59,12 @@ struct Generator {
alias_ids: HashMap<Alias, String>,
external_scanner_states: Vec<HashSet<usize>>,
alias_map: HashMap<Alias, Option<Symbol>>,
field_names: Vec<String>,
}
impl Generator {
fn generate(mut self) -> String {
self.init();
self.add_includes();
self.add_pragmas();
self.add_stats();
@ -67,7 +72,13 @@ impl Generator {
self.add_symbol_names_list();
self.add_symbol_metadata_list();
if self.parse_table.alias_sequences.len() > 1 {
if !self.field_names.is_empty() {
self.add_field_name_enum();
self.add_field_name_names_list();
self.add_field_sequences();
}
if !self.alias_ids.is_empty() {
self.add_alias_sequences();
}
@ -95,6 +106,49 @@ impl Generator {
self.buffer
}
fn init(&mut self) {
let mut symbol_identifiers = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
let mut field_names = Vec::new();
for production_info in &self.parse_table.production_infos {
for field_name in production_info.field_map.keys() {
field_names.push(field_name);
}
for alias in &production_info.alias_sequence {
if let Some(alias) = &alias {
let alias_kind = if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
};
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
self.alias_map
.entry(alias.clone())
.or_insert(matching_symbol);
}
}
}
field_names.sort_unstable();
field_names.dedup();
self.field_names = field_names.into_iter().cloned().collect();
}
fn add_includes(&mut self) {
add_line!(self, "#include <tree_sitter/parser.h>");
add_line!(self, "");
@ -143,39 +197,7 @@ impl Generator {
})
.count();
let mut symbol_identifiers = HashSet::new();
for i in 0..self.parse_table.symbols.len() {
self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
}
for alias_sequence in &self.parse_table.alias_sequences {
for entry in alias_sequence {
if let Some(alias) = entry {
let alias_kind = if alias.is_named {
VariableType::Named
} else {
VariableType::Anonymous
};
let matching_symbol = self.parse_table.symbols.iter().cloned().find(|symbol| {
let (name, kind) = self.metadata_for_symbol(*symbol);
name == alias.value && kind == alias_kind
});
let alias_id = if let Some(symbol) = matching_symbol {
self.symbol_ids[&symbol].clone()
} else if alias.is_named {
format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
} else {
format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
};
self.alias_ids.entry(alias.clone()).or_insert(alias_id);
self.alias_map
.entry(alias.clone())
.or_insert(matching_symbol);
}
}
}
add_line!(self, "#define LANGUAGE_VERSION {}", 9);
add_line!(self, "#define LANGUAGE_VERSION {}", LANGUAGE_VERSION);
add_line!(
self,
"#define STATE_COUNT {}",
@ -197,6 +219,7 @@ impl Generator {
"#define EXTERNAL_TOKEN_COUNT {}",
self.syntax_grammar.external_tokens.len()
);
add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
add_line!(
self,
"#define MAX_ALIAS_SEQUENCE_LENGTH {}",
@ -253,6 +276,34 @@ impl Generator {
add_line!(self, "");
}
fn add_field_name_enum(&mut self) {
add_line!(self, "enum {{");
indent!(self);
for (i, field_name) in self.field_names.iter().enumerate() {
add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_field_name_names_list(&mut self) {
add_line!(self, "static const char *ts_field_names[] = {{");
indent!(self);
add_line!(self, "[0] = NULL,");
for field_name in &self.field_names {
add_line!(
self,
"[{}] = \"{}\",",
self.field_id(field_name),
field_name
);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_symbol_metadata_list(&mut self) {
add_line!(
self,
@ -307,13 +358,17 @@ impl Generator {
add_line!(
self,
"static TSSymbol ts_alias_sequences[{}][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
self.parse_table.alias_sequences.len()
self.parse_table.production_infos.len()
);
indent!(self);
for (i, sequence) in self.parse_table.alias_sequences.iter().enumerate().skip(1) {
for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
if production_info.alias_sequence.is_empty() {
continue;
}
add_line!(self, "[{}] = {{", i);
indent!(self);
for (j, alias) in sequence.iter().enumerate() {
for (j, alias) in production_info.alias_sequence.iter().enumerate() {
if let Some(alias) = alias {
add_line!(self, "[{}] = {},", j, self.alias_ids[&alias]);
}
@ -326,6 +381,81 @@ impl Generator {
add_line!(self, "");
}
fn add_field_sequences(&mut self) {
let mut flat_field_maps = vec![];
let mut next_flat_field_map_index = 0;
self.get_field_map_id(
&Vec::new(),
&mut flat_field_maps,
&mut next_flat_field_map_index,
);
let mut field_map_ids = Vec::new();
for production_info in &self.parse_table.production_infos {
if !production_info.field_map.is_empty() {
let mut flat_field_map = Vec::new();
for (field_name, locations) in &production_info.field_map {
for location in locations {
flat_field_map.push((field_name.clone(), *location));
}
}
field_map_ids.push((
self.get_field_map_id(
&flat_field_map,
&mut flat_field_maps,
&mut next_flat_field_map_index,
),
flat_field_map.len(),
));
} else {
field_map_ids.push((0, 0));
}
}
add_line!(
self,
"static const TSFieldMapSlice ts_field_map_slices[] = {{",
);
indent!(self);
for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
if length > 0 {
add_line!(
self,
"[{}] = {{.index = {}, .length = {}}},",
production_id,
row_id,
length
);
}
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
add_line!(
self,
"static const TSFieldMapEntry ts_field_map_entries[] = {{",
);
indent!(self);
for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
add_line!(self, "[{}] =", row_index);
indent!(self);
for (field_name, location) in field_pairs {
add_whitespace!(self);
add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
if location.inherited {
add!(self, ", .inherited = true");
}
add!(self, "}},\n");
}
dedent!(self);
}
dedent!(self);
add_line!(self, "}};");
add_line!(self, "");
}
fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
add_line!(
self,
@ -686,15 +816,15 @@ impl Generator {
symbol,
child_count,
dynamic_precedence,
alias_sequence_id,
production_id,
..
} => {
add!(self, "REDUCE({}, {}", self.symbol_ids[&symbol], child_count);
if dynamic_precedence != 0 {
add!(self, ", .dynamic_precedence = {}", dynamic_precedence);
}
if alias_sequence_id != 0 {
add!(self, ", .alias_sequence_id = {}", alias_sequence_id);
if production_id != 0 {
add!(self, ", .production_id = {}", production_id);
}
add!(self, ")");
}
@ -759,13 +889,27 @@ impl Generator {
add_line!(self, ".lex_modes = ts_lex_modes,");
add_line!(self, ".symbol_names = ts_symbol_names,");
if self.parse_table.alias_sequences.len() > 1 {
if !self.alias_ids.is_empty() {
add_line!(
self,
".alias_sequences = (const TSSymbol *)ts_alias_sequences,"
);
}
add_line!(self, ".field_count = FIELD_COUNT,");
if !self.field_names.is_empty() {
add_line!(self, ".field_names = ts_field_names,");
add_line!(
self,
".field_map_slices = (const TSFieldMapSlice *)ts_field_map_slices,"
);
add_line!(
self,
".field_map_entries = (const TSFieldMapEntry *)ts_field_map_entries,"
);
}
add_line!(
self,
".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
@ -820,6 +964,22 @@ impl Generator {
result
}
fn get_field_map_id(
&self,
flat_field_map: &Vec<(String, FieldLocation)>,
flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
next_flat_field_map_index: &mut usize,
) -> usize {
if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
return *index;
}
let result = *next_flat_field_map_index;
flat_field_maps.push((result, flat_field_map.clone()));
*next_flat_field_map_index += flat_field_map.len();
result
}
fn get_external_scanner_state_id(&mut self, external_tokens: HashSet<usize>) -> usize {
self.external_scanner_states
.iter()
@ -865,6 +1025,10 @@ impl Generator {
self.symbol_ids.insert(symbol, id);
}
fn field_id(&self, field_name: &String) -> String {
format!("field_{}", field_name)
}
fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
match symbol.kind {
SymbolType::End => ("end", VariableType::Hidden),
@ -996,6 +1160,7 @@ pub(crate) fn render_c_code(
alias_ids: HashMap::new(),
external_scanner_states: Vec::new(),
alias_map: HashMap::new(),
field_names: Vec::new(),
}
.generate()
}

View file

@ -32,6 +32,7 @@ pub(crate) struct MetadataParams {
pub is_active: bool,
pub is_main_token: bool,
pub alias: Option<Alias>,
pub field_name: Option<String>,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, PartialOrd, Ord)]
@ -57,6 +58,12 @@ pub(crate) enum Rule {
}
impl Rule {
pub fn field(name: String, content: Rule) -> Self {
add_metadata(content, move |params| {
params.field_name = Some(name);
})
}
pub fn alias(content: Rule, value: String, is_named: bool) -> Self {
add_metadata(content, move |params| {
params.alias = Some(Alias { is_named, value });

View file

@ -1,8 +1,9 @@
use super::nfa::CharacterSet;
use super::rules::{Alias, Associativity, Symbol};
use hashbrown::HashMap;
use std::collections::BTreeMap;
pub(crate) type AliasSequenceId = usize;
pub(crate) type ProductionInfoId = usize;
pub(crate) type ParseStateId = usize;
pub(crate) type LexStateId = usize;
@ -21,7 +22,7 @@ pub(crate) enum ParseAction {
precedence: i32,
dynamic_precedence: i32,
associativity: Option<Associativity>,
alias_sequence_id: AliasSequenceId,
production_id: ProductionInfoId,
},
}
@ -39,11 +40,23 @@ pub(crate) struct ParseState {
pub unfinished_item_signature: u64,
}
#[derive(Clone, Copy, Debug, Default, PartialEq, Eq)]
pub(crate) struct FieldLocation {
pub index: usize,
pub inherited: bool,
}
#[derive(Debug, Default, PartialEq, Eq)]
pub(crate) struct ProductionInfo {
pub alias_sequence: Vec<Option<Alias>>,
pub field_map: BTreeMap<String, Vec<FieldLocation>>,
}
#[derive(Debug, PartialEq, Eq)]
pub(crate) struct ParseTable {
pub states: Vec<ParseState>,
pub symbols: Vec<Symbol>,
pub alias_sequences: Vec<Vec<Option<Alias>>>,
pub production_infos: Vec<ProductionInfo>,
pub max_aliased_production_length: usize,
}

View file

@ -95,6 +95,9 @@ pub fn parse_file_at_path(
}
let start = node.start_position();
let end = node.end_position();
if let Some(field_name) = cursor.field_name() {
write!(&mut stdout, "{}: ", field_name)?;
}
write!(
&mut stdout,
"({} [{}, {}] - [{}, {}]",

View file

@ -2,6 +2,7 @@ use crate::error::{Error, Result};
use log::info;
use rsass;
use rsass::sass::Value;
use rsass::selectors::SelectorPart;
use serde_derive::Serialize;
use std::collections::hash_map::Entry;
use std::collections::{btree_map, BTreeMap, HashMap, VecDeque};
@ -27,11 +28,12 @@ type PropertySetId = usize;
#[derive(Clone, PartialEq, Eq)]
struct SelectorStep {
kind: String,
is_named: bool,
is_immediate: bool,
kind: Option<String>,
field: Option<String>,
child_index: Option<usize>,
text_pattern: Option<String>,
is_named: Option<bool>,
is_immediate: bool,
}
#[derive(PartialEq, Eq)]
@ -175,6 +177,7 @@ impl Builder {
transitions
.entry(PropertyTransitionJSON {
kind: step.kind.clone(),
field: step.field.clone(),
named: step.is_named,
index: step.child_index,
text: step.text_pattern.clone(),
@ -203,11 +206,11 @@ impl Builder {
let mut transition_list: Vec<(PropertyTransitionJSON, u32)> =
transitions.into_iter().collect();
transition_list.sort_by(|a, b| {
a.0.kind
.cmp(&b.0.kind)
.then_with(|| a.0.named.cmp(&b.0.named))
.then_with(|| transition_specificity(&b.0).cmp(&transition_specificity(&a.0)))
(transition_specificity(&b.0).cmp(&transition_specificity(&a.0)))
.then_with(|| b.1.cmp(&a.1))
.then_with(|| a.0.kind.cmp(&b.0.kind))
.then_with(|| a.0.named.cmp(&b.0.named))
.then_with(|| a.0.field.cmp(&b.0.field))
});
// For eacy possible state transition, compute the set of items in that transition's
@ -249,9 +252,7 @@ impl Builder {
// rules will override less specific selectors and earlier rules.
let mut properties = PropertySet::new();
selector_matches.sort_unstable_by(|a, b| {
a.specificity
.cmp(&b.specificity)
.then_with(|| a.rule_id.cmp(&b.rule_id))
(a.specificity.cmp(&b.specificity)).then_with(|| a.rule_id.cmp(&b.rule_id))
});
selector_matches.dedup();
for selector_match in selector_matches {
@ -313,6 +314,7 @@ impl Builder {
transition.state_id = *replacement;
}
}
state.transitions.dedup();
}
}
@ -347,8 +349,14 @@ impl Builder {
}
fn selector_specificity(selector: &Selector) -> u32 {
let mut result = selector.0.len() as u32;
let mut result = 0;
for step in &selector.0 {
if step.kind.is_some() {
result += 1;
}
if step.field.is_some() {
result += 1;
}
if step.child_index.is_some() {
result += 1;
}
@ -361,6 +369,12 @@ fn selector_specificity(selector: &Selector) -> u32 {
fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
let mut result = 0;
if transition.kind.is_some() {
result += 1;
}
if transition.field.is_some() {
result += 1;
}
if transition.index.is_some() {
result += 1;
}
@ -371,19 +385,37 @@ fn transition_specificity(transition: &PropertyTransitionJSON) -> u32 {
}
fn step_matches_transition(step: &SelectorStep, transition: &PropertyTransitionJSON) -> bool {
step.kind == transition.kind
&& step.is_named == transition.named
&& (step.child_index == transition.index || step.child_index.is_none())
&& (step.text_pattern == transition.text || step.text_pattern.is_none())
step.kind
.as_ref()
.map_or(true, |kind| transition.kind.as_ref() == Some(kind))
&& step
.is_named
.map_or(true, |named| transition.named == Some(named))
&& step
.field
.as_ref()
.map_or(true, |field| transition.field.as_ref() == Some(field))
&& step
.child_index
.map_or(true, |index| transition.index == Some(index))
&& step
.text_pattern
.as_ref()
.map_or(true, |text| transition.text.as_ref() == Some(text))
}
impl fmt::Debug for SelectorStep {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "(")?;
if self.is_named {
write!(f, "{}", self.kind)?;
} else {
write!(f, "\"{}\"", self.kind)?;
if let Some(kind) = &self.kind {
if self.is_named.unwrap() {
write!(f, "{}", kind)?;
} else {
write!(f, "[token='{}']", kind)?;
}
}
if let Some(field) = &self.field {
write!(f, ".{}", field)?;
}
if let Some(n) = self.child_index {
write!(f, ":nth-child({})", n)?;
@ -407,7 +439,7 @@ impl fmt::Debug for Selector {
}
write!(f, "{:?}", step)?;
}
write!(f, "]")?;
write!(f, " (specificity: {})]", selector_specificity(self))?;
Ok(())
}
}
@ -484,52 +516,134 @@ fn parse_sass_items(
rsass::Item::Rule(selectors, items) => {
let mut full_selectors = Vec::new();
for prefix in selector_prefixes {
let mut part_string = String::new();
let mut next_step_is_immediate = false;
for selector in &selectors.s {
let mut prefix = prefix.clone();
let mut operator_was_immediate: Option<bool> = Some(false);
for part in &selector.0 {
part_string.clear();
write!(&mut part_string, "{}", part).unwrap();
let part_string = part_string.trim();
if !part_string.is_empty() {
if part_string == "&" {
continue;
} else if part_string.starts_with(":nth-child(") {
if let Some(last_step) = prefix.last_mut() {
if let Ok(index) = usize::from_str_radix(
&part_string[11..(part_string.len() - 1)],
10,
) {
last_step.child_index = Some(index);
match part {
SelectorPart::BackRef => {
operator_was_immediate = None;
}
SelectorPart::Simple(value) => {
if let Some(value) = value.single_raw() {
for (i, value) in value.split('.').enumerate() {
if value.is_empty() {
continue;
}
let value = value.to_string();
check_node_kind(&value)?;
if i > 0 {
if let Some(immediate) = operator_was_immediate {
prefix.push(SelectorStep {
kind: None,
field: Some(value),
is_named: None,
child_index: None,
text_pattern: None,
is_immediate: immediate,
})
} else {
prefix.last_mut().unwrap().field = Some(value);
}
} else {
if let Some(immediate) = operator_was_immediate {
prefix.push(SelectorStep {
kind: Some(value.to_string()),
field: None,
child_index: None,
text_pattern: None,
is_named: Some(true),
is_immediate: immediate,
});
} else {
return Err(Error(format!("Node type {} must be separated by whitespace or the `>` operator", value)));
}
}
operator_was_immediate = None;
}
} else {
return Err(interpolation_error());
}
operator_was_immediate = None;
}
SelectorPart::Attribute { name, val, .. } => {
match name.single_raw() {
None => return Err(interpolation_error()),
Some("text") => {
if operator_was_immediate.is_some() {
return Err(Error("The `text` attribute must be used in combination with a node type or field".to_string()));
}
if let Some(last_step) = prefix.last_mut() {
last_step.text_pattern =
Some(get_string_value(val.to_string())?)
}
}
Some("token") => {
if let Some(immediate) = operator_was_immediate {
prefix.push(SelectorStep {
kind: Some(get_string_value(val.to_string())?),
field: None,
is_named: Some(false),
child_index: None,
text_pattern: None,
is_immediate: immediate,
});
operator_was_immediate = None;
} else {
return Err(Error("The `token` attribute canot be used in combination with a node type".to_string()));
}
}
_ => {
return Err(Error(format!(
"Unsupported attribute {}",
part
)));
}
}
} else if part_string.starts_with("[text=") {
if let Some(last_step) = prefix.last_mut() {
last_step.text_pattern = Some(
part_string[7..(part_string.len() - 2)].to_string(),
)
}
SelectorPart::PseudoElement { .. } => {
return Err(Error(
"Pseudo elements are not supported".to_string(),
));
}
SelectorPart::Pseudo { name, arg } => match name.single_raw() {
None => return Err(interpolation_error()),
Some("nth-child") => {
if let Some(arg) = arg {
let mut arg_str = String::new();
write!(&mut arg_str, "{}", arg).unwrap();
if let Some(last_step) = prefix.last_mut() {
if let Ok(i) = usize::from_str_radix(&arg_str, 10) {
last_step.child_index = Some(i);
} else {
return Err(Error(format!(
"Invalid child index {}",
arg
)));
}
}
}
}
_ => {
return Err(Error(format!(
"Unsupported pseudo-class {}",
part
)));
}
},
SelectorPart::Descendant => {
operator_was_immediate = Some(false);
}
SelectorPart::RelOp(operator) => {
let operator = *operator as char;
if operator == '>' {
operator_was_immediate = Some(true);
} else {
return Err(Error(format!(
"Unsupported operator {}",
operator
)));
}
} else if part_string == ">" {
next_step_is_immediate = true;
} else if part_string.starts_with("[token=") {
prefix.push(SelectorStep {
kind: part_string[8..(part_string.len() - 2)].to_string(),
is_named: false,
child_index: None,
text_pattern: None,
is_immediate: next_step_is_immediate,
});
next_step_is_immediate = false;
} else {
prefix.push(SelectorStep {
kind: part_string.to_string(),
is_named: true,
child_index: None,
text_pattern: None,
is_immediate: next_step_is_immediate,
});
next_step_is_immediate = false;
}
}
}
@ -597,7 +711,7 @@ fn parse_sass_value(value: &Value) -> Result<PropertyValue> {
if let Some(s) = s.single_raw() {
Ok(PropertyValue::String(s.to_string()))
} else {
Err(Error("String interpolation is not supported".to_string()))
Err(interpolation_error())
}
}
Value::Call(name, raw_args) => {
@ -665,6 +779,29 @@ fn resolve_path(base: &Path, p: &str) -> Result<PathBuf> {
Err(Error(format!("Could not resolve import path `{}`", p)))
}
fn check_node_kind(name: &String) -> Result<()> {
for c in name.chars() {
if !c.is_alphanumeric() && c != '_' {
return Err(Error(format!("Invalid identifier '{}'", name)));
}
}
Ok(())
}
fn get_string_value(mut s: String) -> Result<String> {
if s.starts_with("'") && s.ends_with("'") || s.starts_with('"') && s.ends_with('"') {
s.pop();
s.remove(0);
Ok(s)
} else {
Err(Error(format!("Unsupported string literal {}", s)))
}
}
fn interpolation_error() -> Error {
Error("String interpolation is not supported".to_string())
}
#[cfg(test)]
mod tests {
use super::*;
@ -801,24 +938,91 @@ mod tests {
.unwrap();
assert_eq!(
*query(&sheet, vec![("f1", true, 0)], "abc"),
*query(&sheet, vec![("f1", None, true, 0)], "abc"),
props(&[("color", string("red"))])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 0)], "Abc"),
*query(&sheet, vec![("f1", None, true, 0)], "Abc"),
props(&[("color", string("green"))])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 0)], "AB_CD"),
*query(&sheet, vec![("f1", None, true, 0)], "AB_CD"),
props(&[("color", string("blue"))])
);
assert_eq!(*query(&sheet, vec![("f2", true, 0)], "Abc"), props(&[]));
assert_eq!(
*query(&sheet, vec![("f2", true, 0)], "ABC"),
*query(&sheet, vec![("f2", None, true, 0)], "Abc"),
props(&[])
);
assert_eq!(
*query(&sheet, vec![("f2", None, true, 0)], "ABC"),
props(&[("color", string("purple"))])
);
}
#[test]
fn test_property_sheet_with_fields() {
let sheet = generate_property_sheet(
"foo.css",
"
a {
color: red;
&.x {
color: green;
b {
color: blue;
&.y { color: yellow; }
}
}
b { color: orange; }
b.y { color: indigo; }
}
.x { color: violet; }
",
)
.unwrap();
assert_eq!(
*query(&sheet, vec![("a", None, true, 0)], ""),
props(&[("color", string("red"))])
);
assert_eq!(
*query(&sheet, vec![("a", Some("x"), true, 0)], ""),
props(&[("color", string("green"))])
);
assert_eq!(
*query(
&sheet,
vec![("a", Some("x"), true, 0), ("b", None, true, 0)],
""
),
props(&[("color", string("blue"))])
);
assert_eq!(
*query(
&sheet,
vec![("a", Some("x"), true, 0), ("b", Some("y"), true, 0)],
""
),
props(&[("color", string("yellow"))])
);
assert_eq!(
*query(&sheet, vec![("b", Some("x"), true, 0)], ""),
props(&[("color", string("violet"))])
);
assert_eq!(
*query(&sheet, vec![("a", None, true, 0), ("b", None, true, 0)], ""),
props(&[("color", string("orange"))])
);
assert_eq!(
*query(
&sheet,
vec![("a", None, true, 0), ("b", Some("y"), true, 0)],
""
),
props(&[("color", string("indigo"))])
);
}
#[test]
fn test_property_sheet_with_cascade_ordering_as_tie_breaker() {
let sheet = generate_property_sheet(
@ -833,29 +1037,49 @@ mod tests {
.unwrap();
assert_eq!(
*query(&sheet, vec![("f1", true, 0), ("f2", true, 0)], "x"),
*query(
&sheet,
vec![("f1", None, true, 0), ("f2", None, true, 0)],
"x"
),
props(&[])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 0), ("f2", true, 1)], "x"),
*query(
&sheet,
vec![("f1", None, true, 0), ("f2", None, true, 1)],
"x"
),
props(&[("color", string("red"))])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "x"),
*query(
&sheet,
vec![("f1", None, true, 1), ("f2", None, true, 1)],
"x"
),
props(&[("color", string("green"))])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "a"),
*query(
&sheet,
vec![("f1", None, true, 1), ("f2", None, true, 1)],
"a"
),
props(&[("color", string("blue"))])
);
assert_eq!(
*query(&sheet, vec![("f1", true, 1), ("f2", true, 1)], "ab"),
*query(
&sheet,
vec![("f1", None, true, 1), ("f2", None, true, 1)],
"ab"
),
props(&[("color", string("violet"))])
);
}
#[test]
fn test_property_sheet_with_function_calls() {
fn test_property_sheet_with_css_function_calls() {
let sheet = generate_property_sheet(
"foo.css",
"
@ -1016,25 +1240,26 @@ mod tests {
) -> &'a PropertySet {
query(
sheet,
node_stack.into_iter().map(|s| (s, true, 0)).collect(),
node_stack.into_iter().map(|s| (s, None, true, 0)).collect(),
"",
)
}
fn query<'a>(
sheet: &'a PropertySheetJSON,
node_stack: Vec<(&'static str, bool, usize)>,
node_stack: Vec<(&'static str, Option<&'static str>, bool, usize)>,
leaf_text: &str,
) -> &'a PropertySet {
let mut state_id = 0;
for (kind, is_named, child_index) in node_stack {
for (kind, field, is_named, child_index) in node_stack {
let state = &sheet.states[state_id];
state_id = state
.transitions
.iter()
.find(|transition| {
transition.kind == kind
&& transition.named == is_named
transition.kind.as_ref().map_or(true, |k| k == kind)
&& transition.named.map_or(true, |n| n == is_named)
&& transition.field.as_ref().map_or(true, |f| field == Some(f))
&& transition.index.map_or(true, |index| index == child_index)
&& (transition
.text

View file

@ -22,6 +22,7 @@ lazy_static! {
.build()
.unwrap();
static ref WHITESPACE_REGEX: Regex = Regex::new(r"\s+").unwrap();
static ref SEXP_FIELD_REGEX: Regex = Regex::new(r" \w+: \(").unwrap();
}
#[derive(Debug, PartialEq, Eq)]
@ -34,6 +35,7 @@ pub enum TestEntry {
name: String,
input: Vec<u8>,
output: String,
has_fields: bool,
},
}
@ -135,6 +137,7 @@ fn run_tests(
name,
input,
output,
has_fields,
} => {
if let Some(filter) = filter {
if !name.contains(filter) {
@ -142,7 +145,10 @@ fn run_tests(
}
}
let tree = parser.parse(&input, None).unwrap();
let actual = tree.root_node().to_sexp();
let mut actual = tree.root_node().to_sexp();
if !has_fields {
actual = strip_sexp_fields(actual);
}
for _ in 0..indent_level {
print!(" ");
}
@ -186,6 +192,10 @@ pub fn parse_tests(path: &Path) -> io::Result<TestEntry> {
}
}
pub fn strip_sexp_fields(sexp: String) -> String {
SEXP_FIELD_REGEX.replace_all(&sexp, " (").to_string()
}
fn parse_test_content(name: String, content: String) -> TestEntry {
let mut children = Vec::new();
let bytes = content.as_bytes();
@ -209,10 +219,12 @@ fn parse_test_content(name: String, content: String) -> TestEntry {
let input = bytes[previous_header_end..divider_start].to_vec();
let output = WHITESPACE_REGEX.replace_all(output.trim(), " ").to_string();
let output = output.replace(" )", ")");
let has_fields = SEXP_FIELD_REGEX.is_match(&output);
children.push(TestEntry::Example {
name: previous_name,
input,
output,
has_fields,
});
}
}
@ -265,11 +277,13 @@ d
name: "The first test".to_string(),
input: "\na b c\n".as_bytes().to_vec(),
output: "(a (b c))".to_string(),
has_fields: false,
},
TestEntry::Example {
name: "The second test".to_string(),
input: "d".as_bytes().to_vec(),
output: "(d)".to_string(),
has_fields: false,
},
]
}

View file

@ -4,7 +4,7 @@ use super::helpers::fixtures::{fixtures_dir, get_language, get_test_language};
use super::helpers::random::Rand;
use super::helpers::scope_sequence::ScopeSequence;
use crate::generate;
use crate::test::{parse_tests, print_diff, print_diff_key, TestEntry};
use crate::test::{parse_tests, print_diff, print_diff_key, strip_sexp_fields, TestEntry};
use crate::util;
use lazy_static::lazy_static;
use std::{env, fs, time, usize};
@ -67,7 +67,7 @@ fn test_real_language_corpus_files() {
eprintln!("language: {:?}", language_name);
}
for (example_name, input, expected_output) in tests {
for (example_name, input, expected_output, has_fields) in tests {
eprintln!(" example: {:?}", example_name);
if TRIAL_FILTER.map_or(true, |t| t == 0) {
@ -76,7 +76,10 @@ fn test_real_language_corpus_files() {
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
drop(tree);
drop(parser);
if actual_output != expected_output {
@ -144,7 +147,11 @@ fn test_real_language_corpus_files() {
let tree3 = parser.parse(&input, Some(&tree2)).unwrap();
// Verify that the final tree matches the expectation from the corpus.
let actual_output = tree3.root_node().to_sexp();
let mut actual_output = tree3.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
if actual_output != expected_output {
println!(
"Incorrect parse for {} - {} - trial {}",
@ -241,7 +248,7 @@ fn test_feature_corpus_files() {
eprintln!("test language: {:?}", language_name);
}
for (name, input, expected_output) in tests {
for (name, input, expected_output, has_fields) in tests {
eprintln!(" example: {:?}", name);
allocations::start_recording();
@ -249,7 +256,11 @@ fn test_feature_corpus_files() {
let mut parser = get_parser(&mut log_session, "log.html");
parser.set_language(language).unwrap();
let tree = parser.parse(&input, None).unwrap();
let actual_output = tree.root_node().to_sexp();
let mut actual_output = tree.root_node().to_sexp();
if !has_fields {
actual_output = strip_sexp_fields(actual_output);
}
drop(tree);
drop(parser);
if actual_output != expected_output {
@ -348,13 +359,14 @@ fn get_parser(session: &mut Option<util::LogSession>, log_filename: &str) -> Par
parser
}
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String)>) {
fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String, bool)> {
fn helper(test: TestEntry, prefix: &str, result: &mut Vec<(String, Vec<u8>, String, bool)>) {
match test {
TestEntry::Example {
mut name,
input,
output,
has_fields,
} => {
if !prefix.is_empty() {
name.insert_str(0, " - ");
@ -365,7 +377,7 @@ fn flatten_tests(test: TestEntry) -> Vec<(String, Vec<u8>, String)> {
return;
}
}
result.push((name, input, output));
result.push((name, input, output, has_fields));
}
TestEntry::Group { mut name, children } => {
if !prefix.is_empty() {

View file

@ -0,0 +1,62 @@
use super::helpers::fixtures::get_test_language;
use crate::generate::generate_parser_for_grammar;
use tree_sitter::Parser;
#[test]
fn test_basic_node_refs() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_refs",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
"rules": {
"rule_a": {
"type": "SEQ",
"members": [
{
"type": "REF",
"value": "ref_1",
"content": {
"type": "STRING",
"value": "child-1"
}
},
{
"type": "CHOICE",
"members": [
{
"type": "STRING",
"value": "child-2"
},
{
"type": "BLANK"
}
]
},
{
"type": "REF",
"value": "ref_2",
"content": {
"type": "STRING",
"value": "child-3"
}
}
]
}
}
}
"#,
)
.unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser.parse("child-1 child-2 child-3", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.child_by_ref("ref_1"), root_node.child(0));
assert_eq!(root_node.child_by_ref("ref_2"), root_node.child(2));
}

View file

@ -1,6 +1,6 @@
use super::helpers::edits::{get_random_edit, perform_edit};
use super::helpers::fixtures::{get_language, get_test_language};
use super::helpers::random::Rand;
use super::helpers::edits::{get_random_edit, perform_edit};
use crate::generate::generate_parser_for_grammar;
use tree_sitter::{Node, Parser, Point, Tree};
@ -321,11 +321,7 @@ fn test_node_edit() {
let nodes_after = get_all_nodes(&tree2);
for (i, node) in nodes_before.into_iter().enumerate() {
assert_eq!(
(
node.kind(),
node.start_byte(),
node.start_position()
),
(node.kind(), node.start_byte(), node.start_position()),
(
nodes_after[i].kind(),
nodes_after[i].start_byte(),
@ -338,6 +334,165 @@ fn test_node_edit() {
}
}
#[test]
fn test_node_field_names() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_fields",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
"rules": {
"rule_a": {
"type": "SEQ",
"members": [
{
"type": "FIELD",
"name": "field_1",
"content": {"type": "STRING", "value": "child-0"}
},
{
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "child-1"},
{"type": "BLANK"},
// This isn't used in the test, but prevents `_hidden_rule1`
// from being eliminated as a unit reduction.
{
"type": "ALIAS",
"value": "x",
"named": true,
"content": {
"type": "SYMBOL",
"name": "_hidden_rule1"
}
}
]
},
{
"type": "FIELD",
"name": "field_2",
"content": {"type": "SYMBOL", "name": "_hidden_rule1"}
},
{"type": "SYMBOL", "name": "_hidden_rule2"}
]
},
// Fields pointing to hidden nodes with a single child resolve to the child.
"_hidden_rule1": {
"type": "CHOICE",
"members": [
{"type": "STRING", "value": "child-2"},
{"type": "STRING", "value": "child-2.5"}
]
},
// Fields within hidden nodes can be referenced through the parent node.
"_hidden_rule2": {
"type": "SEQ",
"members": [
{"type": "STRING", "value": "child-3"},
{
"type": "FIELD",
"name": "field_3",
"content": {"type": "STRING", "value": "child-4"}
}
]
}
}
}
"#,
)
.unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser
.parse("child-0 child-1 child-2 child-3 child-4", None)
.unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.child_by_field_name("field_1"), root_node.child(0));
assert_eq!(root_node.child_by_field_name("field_2"), root_node.child(2));
assert_eq!(root_node.child_by_field_name("field_3"), root_node.child(4));
assert_eq!(
root_node.child(0).unwrap().child_by_field_name("field_1"),
None
);
assert_eq!(root_node.child_by_field_name("not_a_real_field"), None);
let mut cursor = root_node.walk();
assert_eq!(cursor.field_name(), None);
cursor.goto_first_child();
assert_eq!(cursor.node().kind(), "child-0");
assert_eq!(cursor.field_name(), Some("field_1"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-1");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-2");
assert_eq!(cursor.field_name(), Some("field_2"));
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-3");
assert_eq!(cursor.field_name(), None);
cursor.goto_next_sibling();
assert_eq!(cursor.node().kind(), "child-4");
assert_eq!(cursor.field_name(), Some("field_3"));
}
#[test]
fn test_node_field_calls_in_language_without_fields() {
let (parser_name, parser_code) = generate_parser_for_grammar(
r#"
{
"name": "test_grammar_with_no_fields",
"extras": [
{"type": "PATTERN", "value": "\\s+"}
],
"rules": {
"a": {
"type": "SEQ",
"members": [
{
"type": "STRING",
"value": "b"
},
{
"type": "STRING",
"value": "c"
},
{
"type": "STRING",
"value": "d"
}
]
}
}
}
"#,
)
.unwrap();
let mut parser = Parser::new();
let language = get_test_language(&parser_name, &parser_code, None);
parser.set_language(language).unwrap();
let tree = parser.parse("b c d", None).unwrap();
let root_node = tree.root_node();
assert_eq!(root_node.kind(), "a");
assert_eq!(root_node.child_by_field_name("something"), None);
let mut cursor = root_node.walk();
assert_eq!(cursor.field_name(), None);
assert_eq!(cursor.goto_first_child(), true);
assert_eq!(cursor.field_name(), None);
}
fn get_all_nodes(tree: &Tree) -> Vec<Node> {
let mut result = Vec::new();
let mut visited_children = false;

View file

@ -833,7 +833,7 @@ fn test_parsing_with_included_ranges_and_missing_tokens() {
let root = tree.root_node();
assert_eq!(
root.to_sexp(),
"(program (A (MISSING)) (b) (c) (A (MISSING)) (b) (c))"
"(program (A (MISSING a)) (b) (c) (A (MISSING a)) (b) (c))"
);
assert_eq!(root.start_byte(), 2);
assert_eq!(root.child(3).unwrap().start_byte(), 4);

View file

@ -600,8 +600,6 @@ if (valid_symbols[INDENT] || valid_symbol[DEDENT]) {
}
```
[ambiguous-grammar]: https://en.wikipedia.org/wiki/Ambiguous_grammar
[antlr]: http://www.antlr.org/
[bison-dprec]: https://www.gnu.org/software/bison/manual/html_node/Generalized-LR-Parsing.html

View file

@ -605,8 +605,9 @@ where
}
}
impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>> Iterator
for Highlighter<'a, T>
impl<'a, T> Iterator for Highlighter<'a, T>
where
T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
{
type Item = HighlightEvent<'a>;
@ -703,6 +704,31 @@ impl<'a, T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>> Itera
}
}
impl<'a, T> fmt::Debug for Highlighter<'a, T>
where
T: Fn(&str) -> Option<(Language, &'a PropertySheet<Properties>)>,
{
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
if let Some(layer) = self.layers.first() {
let node = layer.cursor.node();
let position = if layer.at_node_end {
node.end_position()
} else {
node.start_position()
};
write!(
f,
"{{Highlighter position: {:?}, kind: {}, at_end: {}, props: {:?}}}",
position,
node.kind(),
layer.at_node_end,
layer.cursor.node_properties()
)?;
}
Ok(())
}
}
impl<'a> Layer<'a> {
fn new(
source: &'a [u8],

View file

@ -3,6 +3,7 @@
pub type __darwin_size_t = ::std::os::raw::c_ulong;
pub type FILE = [u64; 19usize];
pub type TSSymbol = u16;
pub type TSFieldId = u16;
#[repr(C)]
#[derive(Debug, Copy, Clone)]
pub struct TSLanguage {
@ -227,6 +228,16 @@ extern "C" {
extern "C" {
pub fn ts_node_child(arg1: TSNode, arg2: u32) -> TSNode;
}
extern "C" {
pub fn ts_node_child_by_field_id(arg1: TSNode, arg2: TSFieldId) -> TSNode;
}
extern "C" {
pub fn ts_node_child_by_field_name(
arg1: TSNode,
arg2: *const ::std::os::raw::c_char,
arg3: u32,
) -> TSNode;
}
extern "C" {
pub fn ts_node_named_child(arg1: TSNode, arg2: u32) -> TSNode;
}
@ -286,6 +297,14 @@ extern "C" {
extern "C" {
pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode;
}
extern "C" {
pub fn ts_tree_cursor_current_field_id(arg1: *const TSTreeCursor) -> TSFieldId;
}
extern "C" {
pub fn ts_tree_cursor_current_field_name(
arg1: *const TSTreeCursor,
) -> *const ::std::os::raw::c_char;
}
extern "C" {
pub fn ts_tree_cursor_goto_parent(arg1: *mut TSTreeCursor) -> bool;
}
@ -313,6 +332,22 @@ extern "C" {
arg2: *const ::std::os::raw::c_char,
) -> TSSymbol;
}
extern "C" {
pub fn ts_language_field_count(arg1: *const TSLanguage) -> u32;
}
extern "C" {
pub fn ts_language_field_name_for_id(
arg1: *const TSLanguage,
arg2: TSFieldId,
) -> *const ::std::os::raw::c_char;
}
extern "C" {
pub fn ts_language_field_id_for_name(
arg1: *const TSLanguage,
arg2: *const ::std::os::raw::c_char,
arg3: u32,
) -> TSFieldId;
}
extern "C" {
pub fn ts_language_symbol_type(arg1: *const TSLanguage, arg2: TSSymbol) -> TSSymbolType;
}
@ -320,4 +355,5 @@ extern "C" {
pub fn ts_language_version(arg1: *const TSLanguage) -> u32;
}
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 9;
pub const TREE_SITTER_LANGUAGE_VERSION: usize = 10;
pub const TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION: usize = 9;

View file

@ -18,6 +18,7 @@ use std::os::raw::{c_char, c_void};
use std::sync::atomic::AtomicUsize;
use std::{fmt, ptr, slice, str, u16};
pub const LANGUAGE_VERSION: usize = ffi::TREE_SITTER_LANGUAGE_VERSION;
pub const PARSER_HEADER: &'static str = include_str!("../include/tree_sitter/parser.h");
#[derive(Clone, Copy)]
@ -57,13 +58,15 @@ pub struct InputEdit {
}
struct PropertyTransition {
state_id: usize,
child_index: Option<usize>,
text_regex_index: Option<usize>,
state_id: u16,
child_index: Option<u16>,
text_regex_index: Option<u16>,
node_kind_id: Option<u16>,
}
struct PropertyState {
transitions: HashMap<u16, Vec<PropertyTransition>>,
field_transitions: HashMap<u16, Vec<PropertyTransition>>,
kind_transitions: HashMap<u16, Vec<PropertyTransition>>,
property_set_id: usize,
default_next_state_id: usize,
}
@ -83,11 +86,15 @@ pub struct PropertySheet<P = HashMap<String, String>> {
#[derive(Debug, Deserialize, Serialize, Hash, PartialEq, Eq)]
pub struct PropertyTransitionJSON {
#[serde(rename = "type")]
pub kind: String,
pub named: bool,
#[serde(skip_serializing_if = "Option::is_none")]
pub kind: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub named: Option<bool>,
#[serde(skip_serializing_if = "Option::is_none")]
pub index: Option<usize>,
#[serde(skip_serializing_if = "Option::is_none")]
pub field: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub text: Option<String>,
pub state_id: usize,
}
@ -137,6 +144,22 @@ impl Language {
pub fn node_kind_is_named(&self, id: u16) -> bool {
unsafe { ffi::ts_language_symbol_type(self.0, id) == ffi::TSSymbolType_TSSymbolTypeRegular }
}
pub fn field_id_for_name(&self, field_name: impl AsRef<[u8]>) -> Option<u16> {
let field_name = field_name.as_ref();
let id = unsafe {
ffi::ts_language_field_id_for_name(
self.0,
field_name.as_ptr() as *const c_char,
field_name.len() as u32,
)
};
if id == 0 {
None
} else {
Some(id)
}
}
}
unsafe impl Send for Language {}
@ -154,15 +177,21 @@ impl Parser {
pub fn set_language(&mut self, language: Language) -> Result<(), String> {
unsafe {
let version = ffi::ts_language_version(language.0) as usize;
if version == ffi::TREE_SITTER_LANGUAGE_VERSION {
ffi::ts_parser_set_language(self.0, language.0);
Ok(())
} else {
if version < ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION {
Err(format!(
"Incompatible language version {}. Expected {} or greater.",
version,
ffi::TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
))
} else if version > ffi::TREE_SITTER_LANGUAGE_VERSION {
Err(format!(
"Incompatible language version {}. Expected {}.",
version,
ffi::TREE_SITTER_LANGUAGE_VERSION
))
} else {
ffi::ts_parser_set_language(self.0, language.0);
Ok(())
}
}
}
@ -484,6 +513,17 @@ impl<'tree> Node<'tree> {
Self::new(unsafe { ffi::ts_node_child(self.0, i as u32) })
}
pub fn child_by_field_name(&self, field_name: impl AsRef<[u8]>) -> Option<Self> {
let field_name = field_name.as_ref();
Self::new(unsafe {
ffi::ts_node_child_by_field_name(
self.0,
field_name.as_ptr() as *const c_char,
field_name.len() as u32,
)
})
}
pub fn child_count(&self) -> usize {
unsafe { ffi::ts_node_child_count(self.0) as usize }
}
@ -601,6 +641,28 @@ impl<'a> TreeCursor<'a> {
)
}
pub fn field_id(&self) -> Option<u16> {
unsafe {
let id = ffi::ts_tree_cursor_current_field_id(&self.0);
if id == 0 {
None
} else {
Some(id)
}
}
}
pub fn field_name(&self) -> Option<&str> {
unsafe {
let ptr = ffi::ts_tree_cursor_current_field_name(&self.0);
if ptr.is_null() {
None
} else {
Some(CStr::from_ptr(ptr).to_str().unwrap())
}
}
}
pub fn goto_first_child(&mut self) -> bool {
return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) };
}
@ -643,7 +705,9 @@ impl<'a, P> TreePropertyCursor<'a, P> {
property_sheet,
source,
};
let state = result.next_state(&result.current_state(), result.cursor.node().kind_id(), 0);
let kind_id = result.cursor.node().kind_id();
let field_id = result.cursor.field_id();
let state = result.next_state(&result.current_state(), kind_id, field_id, 0);
result.state_stack.push(state);
result
}
@ -662,7 +726,8 @@ impl<'a, P> TreePropertyCursor<'a, P> {
let next_state_id = {
let state = &self.current_state();
let kind_id = self.cursor.node().kind_id();
self.next_state(state, kind_id, child_index)
let field_id = self.cursor.field_id();
self.next_state(state, kind_id, field_id, child_index)
};
self.state_stack.push(next_state_id);
self.child_index_stack.push(child_index);
@ -679,7 +744,8 @@ impl<'a, P> TreePropertyCursor<'a, P> {
let next_state_id = {
let state = &self.current_state();
let kind_id = self.cursor.node().kind_id();
self.next_state(state, kind_id, child_index)
let field_id = self.cursor.field_id();
self.next_state(state, kind_id, field_id, child_index)
};
self.state_stack.push(next_state_id);
self.child_index_stack.push(child_index);
@ -703,34 +769,47 @@ impl<'a, P> TreePropertyCursor<'a, P> {
&self,
state: &PropertyState,
node_kind_id: u16,
node_field_id: Option<u16>,
node_child_index: usize,
) -> usize {
state
.transitions
.get(&node_kind_id)
.and_then(|transitions| {
for transition in transitions.iter() {
if let Some(text_regex_index) = transition.text_regex_index {
let node = self.cursor.node();
let text = &self.source[node.start_byte()..node.end_byte()];
if let Ok(text) = str::from_utf8(text) {
if !self.property_sheet.text_regexes[text_regex_index].is_match(text) {
continue;
}
}
}
let transitions = if let Some(field_id) = node_field_id {
state.field_transitions.get(&field_id)
} else {
state.kind_transitions.get(&node_kind_id)
};
if let Some(child_index) = transition.child_index {
if child_index != node_child_index {
if let Some(transitions) = transitions {
for transition in transitions.iter() {
if transition
.node_kind_id
.map_or(false, |id| id != node_kind_id)
{
continue;
}
if let Some(text_regex_index) = transition.text_regex_index {
let node = self.cursor.node();
let text = &self.source[node.start_byte()..node.end_byte()];
if let Ok(text) = str::from_utf8(text) {
if !self.property_sheet.text_regexes[text_regex_index as usize]
.is_match(text)
{
continue;
}
}
return Some(transition.state_id);
}
None
})
.unwrap_or(state.default_next_state_id)
if let Some(child_index) = transition.child_index {
if child_index != node_child_index as u16 {
continue;
}
}
return transition.state_id as usize;
}
}
state.default_next_state_id
}
fn current_state(&self) -> &PropertyState {
@ -815,40 +894,97 @@ impl<P> PropertySheet<P> {
let mut text_regex_patterns = Vec::new();
for state in input.states.iter() {
let mut transitions = HashMap::new();
let node_kind_count = language.node_kind_count();
let mut kind_transitions = HashMap::new();
let mut field_transitions = HashMap::new();
for transition in state.transitions.iter() {
let field_id = transition
.field
.as_ref()
.and_then(|field| language.field_id_for_name(&field));
if let Some(field_id) = field_id {
field_transitions.entry(field_id).or_insert(Vec::new());
}
}
for transition in state.transitions.iter() {
let text_regex_index = if let Some(regex_pattern) = transition.text.as_ref() {
if let Some(index) =
text_regex_patterns.iter().position(|r| *r == regex_pattern)
{
Some(index)
Some(index as u16)
} else {
text_regex_patterns.push(regex_pattern);
text_regexes.push(
Regex::new(&regex_pattern).map_err(PropertySheetError::InvalidRegex)?,
);
Some(text_regexes.len() - 1)
Some(text_regexes.len() as u16 - 1)
}
} else {
None
};
for i in 0..(node_kind_count as u16) {
if transition.kind == language.node_kind_for_id(i)
&& transition.named == language.node_kind_is_named(i)
{
let entry = transitions.entry(i).or_insert(Vec::new());
entry.push(PropertyTransition {
child_index: transition.index,
state_id: transition.state_id,
let state_id = transition.state_id as u16;
let child_index = transition.index.map(|i| i as u16);
let field_id = transition
.field
.as_ref()
.and_then(|field| language.field_id_for_name(&field));
if let Some(kind) = transition.kind.as_ref() {
for kind_id in 0..(node_kind_count as u16) {
if kind != language.node_kind_for_id(kind_id)
|| transition.named != Some(language.node_kind_is_named(kind_id))
{
continue;
}
if let Some(field_id) = field_id {
field_transitions
.entry(field_id)
.or_insert(Vec::new())
.push(PropertyTransition {
node_kind_id: Some(kind_id),
state_id,
child_index,
text_regex_index,
});
} else {
for (_, entries) in field_transitions.iter_mut() {
entries.push(PropertyTransition {
node_kind_id: Some(kind_id),
state_id,
child_index,
text_regex_index,
});
}
kind_transitions.entry(kind_id).or_insert(Vec::new()).push(
PropertyTransition {
node_kind_id: None,
state_id,
child_index,
text_regex_index,
},
);
}
}
} else if let Some(field_id) = field_id {
field_transitions
.entry(field_id)
.or_insert(Vec::new())
.push(PropertyTransition {
node_kind_id: None,
state_id,
child_index,
text_regex_index,
});
}
}
}
states.push(PropertyState {
transitions,
field_transitions,
kind_transitions,
default_next_state_id: state.default_next_state_id,
property_set_id: state.property_set_id,
});

View file

@ -10,9 +10,11 @@ extern "C" {
#include <stdint.h>
#include <stdbool.h>
#define TREE_SITTER_LANGUAGE_VERSION 9
#define TREE_SITTER_LANGUAGE_VERSION 10
#define TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION 9
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
typedef struct TSParser TSParser;
typedef struct TSTree TSTree;
@ -119,6 +121,8 @@ bool ts_node_has_changes(TSNode);
bool ts_node_has_error(TSNode);
TSNode ts_node_parent(TSNode);
TSNode ts_node_child(TSNode, uint32_t);
TSNode ts_node_child_by_field_id(TSNode, TSFieldId);
TSNode ts_node_child_by_field_name(TSNode, const char *, uint32_t);
TSNode ts_node_named_child(TSNode, uint32_t);
uint32_t ts_node_child_count(TSNode);
uint32_t ts_node_named_child_count(TSNode);
@ -138,6 +142,8 @@ TSTreeCursor ts_tree_cursor_new(TSNode);
void ts_tree_cursor_delete(TSTreeCursor *);
void ts_tree_cursor_reset(TSTreeCursor *, TSNode);
TSNode ts_tree_cursor_current_node(const TSTreeCursor *);
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *);
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *);
bool ts_tree_cursor_goto_parent(TSTreeCursor *);
bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *);
bool ts_tree_cursor_goto_first_child(TSTreeCursor *);
@ -146,6 +152,9 @@ int64_t ts_tree_cursor_goto_first_child_for_byte(TSTreeCursor *, uint32_t);
uint32_t ts_language_symbol_count(const TSLanguage *);
const char *ts_language_symbol_name(const TSLanguage *, TSSymbol);
TSSymbol ts_language_symbol_for_name(const TSLanguage *, const char *);
uint32_t ts_language_field_count(const TSLanguage *);
const char *ts_language_field_name_for_id(const TSLanguage *, TSFieldId);
TSFieldId ts_language_field_id_for_name(const TSLanguage *, const char *, uint32_t);
TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol);
uint32_t ts_language_version(const TSLanguage *);

View file

@ -15,9 +15,21 @@ extern "C" {
#ifndef TREE_SITTER_API_H_
typedef uint16_t TSSymbol;
typedef uint16_t TSFieldId;
typedef struct TSLanguage TSLanguage;
#endif
typedef struct {
TSFieldId field_id;
uint8_t child_index;
bool inherited;
} TSFieldMapEntry;
typedef struct {
uint16_t index;
uint16_t length;
} TSFieldMapSlice;
typedef uint16_t TSStateId;
typedef struct {
@ -54,7 +66,7 @@ typedef struct {
TSSymbol symbol;
int16_t dynamic_precedence;
uint8_t child_count;
uint8_t alias_sequence_id;
uint8_t production_id;
};
} params;
TSParseActionType type : 4;
@ -98,6 +110,10 @@ struct TSLanguage {
unsigned (*serialize)(void *, char *);
void (*deserialize)(void *, const char *, unsigned);
} external_scanner;
uint32_t field_count;
const TSFieldMapSlice *field_map_slices;
const TSFieldMapEntry *field_map_entries;
const char **field_names;
};
/*

View file

@ -148,7 +148,7 @@ static bool iterator_tree_is_visible(const Iterator *self) {
Subtree parent = *self->cursor.stack.contents[self->cursor.stack.size - 2].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent.ptr->alias_sequence_id
parent.ptr->production_id
);
return alias_sequence && alias_sequence[entry.structural_child_index] != 0;
}
@ -171,7 +171,7 @@ static void iterator_get_visible_state(const Iterator *self, Subtree *tree,
const Subtree *parent = self->cursor.stack.contents[i - 1].subtree;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->language,
parent->ptr->alias_sequence_id
parent->ptr->production_id
);
if (alias_sequence) {
*alias_symbol = alias_sequence[entry.structural_child_index];

View file

@ -69,3 +69,39 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *language, TSSymbol symbol
return TSSymbolTypeAuxiliary;
}
}
uint32_t ts_language_field_count(const TSLanguage *self) {
if (self->version >= TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS) {
return self->field_count;
} else {
return 0;
}
}
const char *ts_language_field_name_for_id(const TSLanguage *self, TSFieldId id) {
uint32_t count = ts_language_field_count(self);
if (count) {
return self->field_names[id];
} else {
return NULL;
}
}
TSFieldId ts_language_field_id_for_name(
const TSLanguage *self,
const char *name,
uint32_t name_length
) {
uint32_t count = ts_language_field_count(self);
for (TSSymbol i = 1; i < count + 1; i++) {
switch (strncmp(name, self->field_names[i], name_length)) {
case 0:
return i;
case -1:
return 0;
default:
break;
}
}
return 0;
}

View file

@ -9,6 +9,7 @@ extern "C" {
#include "tree_sitter/parser.h"
#define ts_builtin_sym_error_repeat (ts_builtin_sym_error - 1)
#define TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS 10
typedef struct {
const TSParseAction *actions;
@ -81,12 +82,29 @@ ts_language_enabled_external_tokens(const TSLanguage *self,
}
static inline const TSSymbol *
ts_language_alias_sequence(const TSLanguage *self, unsigned id) {
return id > 0 ?
self->alias_sequences + id * self->max_alias_sequence_length :
ts_language_alias_sequence(const TSLanguage *self, uint32_t production_id) {
return production_id > 0 ?
self->alias_sequences + production_id * self->max_alias_sequence_length :
NULL;
}
static inline void ts_language_field_map(
const TSLanguage *self,
uint32_t production_id,
const TSFieldMapEntry **start,
const TSFieldMapEntry **end
) {
if (self->version < TREE_SITTER_LANGUAGE_VERSION_WITH_FIELDS || self->field_count == 0) {
*start = NULL;
*end = NULL;
return;
}
TSFieldMapSlice slice = self->field_map_slices[production_id];
*start = &self->field_map_entries[slice.index];
*end = &self->field_map_entries[slice.index] + slice.length;
}
#ifdef __cplusplus
}
#endif

View file

@ -53,7 +53,7 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
node->tree->language,
subtree.ptr->alias_sequence_id
subtree.ptr->production_id
);
return (NodeChildIterator) {
.tree = node->tree,
@ -65,8 +65,12 @@ static inline NodeChildIterator ts_node_iterate_children(const TSNode *node) {
};
}
static inline bool ts_node_child_iterator_done(NodeChildIterator *self) {
return self->child_index == self->parent.ptr->child_count;
}
static inline bool ts_node_child_iterator_next(NodeChildIterator *self, TSNode *result) {
if (!self->parent.ptr || self->child_index == self->parent.ptr->child_count) return false;
if (!self->parent.ptr || ts_node_child_iterator_done(self)) return false;
const Subtree *child = &self->parent.ptr->children[self->child_index];
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child)) {
@ -453,6 +457,85 @@ TSNode ts_node_named_child(TSNode self, uint32_t child_index) {
return ts_node__child(self, child_index, false);
}
TSNode ts_node_child_by_field_id(TSNode self, TSFieldId field_id) {
recur:
if (!field_id || ts_node_child_count(self) == 0) return ts_node__null();
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self.tree->language,
ts_node__subtree(self).ptr->production_id,
&field_map,
&field_map_end
);
if (field_map == field_map_end) return ts_node__null();
// The field mappings are sorted by their field id. Scan all
// the mappings to find the ones for the given field id.
while (field_map->field_id < field_id) {
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
while (field_map_end[-1].field_id > field_id) {
field_map_end--;
if (field_map == field_map_end) return ts_node__null();
}
TSNode child;
NodeChildIterator iterator = ts_node_iterate_children(&self);
while (ts_node_child_iterator_next(&iterator, &child)) {
if (!ts_subtree_extra(ts_node__subtree(child))) {
uint32_t index = iterator.structural_child_index - 1;
if (index < field_map->child_index) continue;
// Hidden nodes' fields are "inherited" by their visible parent.
if (field_map->inherited) {
// If this is the *last* possible child node for this field,
// then perform a tail call to avoid recursion.
if (field_map + 1 == field_map_end) {
self = child;
goto recur;
}
// Otherwise, descend into this child, but if it doesn't contain
// the field, continue searching subsequent children.
else {
TSNode result = ts_node_child_by_field_id(child, field_id);
if (result.id) return result;
field_map++;
if (field_map == field_map_end) return ts_node__null();
}
}
else if (ts_node__is_relevant(child, true)) {
return child;
}
// If the field refers to a hidden node, return its first visible
// child.
else {
return ts_node_child(child, 0);
}
}
}
return ts_node__null();
}
TSNode ts_node_child_by_field_name(
TSNode self,
const char *name,
uint32_t name_length
) {
TSFieldId field_id = ts_language_field_id_for_name(
self.tree->language,
name,
name_length
);
return ts_node_child_by_field_id(self, field_id);
}
uint32_t ts_node_child_count(TSNode self) {
Subtree tree = ts_node__subtree(self);
if (ts_subtree_child_count(tree) > 0) {

View file

@ -681,7 +681,7 @@ static bool ts_parser__replace_children(TSParser *self, MutableSubtree *tree, Su
static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSymbol symbol,
uint32_t count, int dynamic_precedence,
uint16_t alias_sequence_id, bool fragile) {
uint16_t production_id, bool fragile) {
uint32_t initial_version_count = ts_stack_version_count(self->stack);
uint32_t removed_version_count = 0;
StackSliceArray pop = ts_stack_pop_count(self->stack, version, count);
@ -715,7 +715,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
}
MutableSubtree parent = ts_subtree_new_node(&self->tree_pool,
symbol, &children, alias_sequence_id, self->language
symbol, &children, production_id, self->language
);
// This pop operation may have caused multiple stack versions to collapse
@ -741,7 +741,7 @@ static StackVersion ts_parser__reduce(TSParser *self, StackVersion version, TSSy
}
parent.ptr->dynamic_precedence += dynamic_precedence;
parent.ptr->alias_sequence_id = alias_sequence_id;
parent.ptr->production_id = production_id;
TSStateId state = ts_stack_state(self->stack, slice_version);
TSStateId next_state = ts_language_next_state(self->language, state, symbol);
@ -797,7 +797,7 @@ static void ts_parser__accept(TSParser *self, StackVersion version, Subtree look
&self->tree_pool,
ts_subtree_symbol(child),
&trees,
child.ptr->alias_sequence_id,
child.ptr->production_id,
self->language
));
ts_subtree_release(&self->tree_pool, child);
@ -873,7 +873,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
.symbol = action.params.symbol,
.count = action.params.child_count,
.dynamic_precedence = action.params.dynamic_precedence,
.alias_sequence_id = action.params.alias_sequence_id,
.production_id = action.params.production_id,
});
default:
break;
@ -887,7 +887,7 @@ static bool ts_parser__do_all_potential_reductions(TSParser *self,
reduction_version = ts_parser__reduce(
self, version, action.symbol, action.count,
action.dynamic_precedence, action.alias_sequence_id,
action.dynamic_precedence, action.production_id,
true
);
}
@ -1331,7 +1331,7 @@ static bool ts_parser__advance(
LOG("reduce sym:%s, child_count:%u", SYM_NAME(action.params.symbol), action.params.child_count);
StackVersion reduction_version = ts_parser__reduce(
self, version, action.params.symbol, action.params.child_count,
action.params.dynamic_precedence, action.params.alias_sequence_id,
action.params.dynamic_precedence, action.params.production_id,
is_fragile
);
if (reduction_version != STACK_VERSION_NONE) {
@ -1549,7 +1549,10 @@ const TSLanguage *ts_parser_language(const TSParser *self) {
}
bool ts_parser_set_language(TSParser *self, const TSLanguage *language) {
if (language && language->version != TREE_SITTER_LANGUAGE_VERSION) return false;
if (language) {
if (language->version > TREE_SITTER_LANGUAGE_VERSION) return false;
if (language->version < TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION) return false;
}
if (self->external_scanner_payload && self->language->external_scanner.destroy) {
self->language->external_scanner.destroy(self->external_scanner_payload);

View file

@ -12,7 +12,7 @@ typedef struct {
uint32_t count;
TSSymbol symbol;
int dynamic_precedence;
unsigned short alias_sequence_id;
unsigned short production_id;
} ReduceAction;
typedef Array(ReduceAction) ReduceActionSet;

View file

@ -379,7 +379,7 @@ void ts_subtree_set_children(
self.ptr->dynamic_precedence = 0;
uint32_t non_extra_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
uint32_t lookahead_end_byte = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
@ -474,7 +474,7 @@ void ts_subtree_set_children(
}
MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
SubtreeArray *children, unsigned alias_sequence_id,
SubtreeArray *children, unsigned production_id,
const TSLanguage *language) {
TSSymbolMetadata metadata = ts_language_symbol_metadata(language, symbol);
bool fragile = symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat;
@ -482,7 +482,7 @@ MutableSubtree ts_subtree_new_node(SubtreePool *pool, TSSymbol symbol,
*data = (SubtreeHeapData) {
.ref_count = 1,
.symbol = symbol,
.alias_sequence_id = alias_sequence_id,
.production_id = production_id,
.visible = metadata.visible,
.named = metadata.named,
.has_changes = false,
@ -805,56 +805,90 @@ static void ts_subtree__write_dot_string(FILE *f, const char *string) {
}
}
static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t limit,
const TSLanguage *language, bool is_root,
bool include_all, TSSymbol alias_symbol,
bool alias_is_named) {
static const char *ROOT_FIELD = "__ROOT__";
static size_t ts_subtree__write_to_string(
Subtree self, char *string, size_t limit,
const TSLanguage *language, bool include_all,
TSSymbol alias_symbol, bool alias_is_named, const char *field_name
) {
if (!self.ptr) return snprintf(string, limit, "(NULL)");
char *cursor = string;
char **writer = (limit > 0) ? &cursor : &string;
bool visible =
include_all ||
is_root ||
alias_is_named ||
ts_subtree_missing(self) ||
(ts_subtree_visible(self) && ts_subtree_named(self)) ||
alias_is_named;
if (visible && !is_root) {
cursor += snprintf(*writer, limit, " ");
}
(ts_subtree_visible(self) && ts_subtree_named(self));
if (visible) {
if (field_name != ROOT_FIELD) {
cursor += snprintf(*writer, limit, " ");
if (field_name) {
cursor += snprintf(*writer, limit, "%s: ", field_name);
}
}
if (ts_subtree_is_error(self) && ts_subtree_child_count(self) == 0 && self.ptr->size.bytes > 0) {
cursor += snprintf(*writer, limit, "(UNEXPECTED ");
cursor += ts_subtree__write_char_to_string(*writer, limit, self.ptr->lookahead_char);
} else if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING");
} else {
TSSymbol symbol = alias_symbol ? alias_symbol : ts_subtree_symbol(self);
const char *symbol_name = ts_language_symbol_name(language, symbol);
cursor += snprintf(*writer, limit, "(%s", symbol_name);
if (ts_subtree_missing(self)) {
cursor += snprintf(*writer, limit, "(MISSING ");
if (alias_is_named || ts_subtree_named(self)) {
cursor += snprintf(*writer, limit, "%s", symbol_name);
} else {
cursor += snprintf(*writer, limit, "\"%s\"", symbol_name);
}
} else {
cursor += snprintf(*writer, limit, "(%s", symbol_name);
}
}
}
if (ts_subtree_child_count(self)) {
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->alias_sequence_id);
const TSSymbol *alias_sequence = ts_language_alias_sequence(language, self.ptr->production_id);
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
language,
self.ptr->production_id,
&field_map,
&field_map_end
);
uint32_t structural_child_index = 0;
for (uint32_t i = 0; i < self.ptr->child_count; i++) {
Subtree child = self.ptr->children[i];
if (ts_subtree_extra(child)) {
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
0, false
language, include_all,
0, false, NULL
);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
TSSymbol alias_symbol = alias_sequence
? alias_sequence[structural_child_index]
: 0;
bool alias_is_named = alias_symbol
? ts_language_symbol_metadata(language, alias_symbol).named
: false;
const char *child_field_name = visible ? NULL : field_name;
for (const TSFieldMapEntry *i = field_map; i < field_map_end; i++) {
if (!i->inherited && i->child_index == structural_child_index) {
child_field_name = language->field_names[i->field_id];
break;
}
}
cursor += ts_subtree__write_to_string(
child, *writer, limit,
language, false, include_all,
alias_symbol,
alias_symbol ? ts_language_symbol_metadata(language, alias_symbol).named : false
language, include_all,
alias_symbol, alias_is_named, child_field_name
);
structural_child_index++;
}
@ -866,15 +900,23 @@ static size_t ts_subtree__write_to_string(Subtree self, char *string, size_t lim
return cursor - string;
}
char *ts_subtree_string(Subtree self, const TSLanguage *language, bool include_all) {
char *ts_subtree_string(
Subtree self,
const TSLanguage *language,
bool include_all
) {
char scratch_string[1];
size_t size = ts_subtree__write_to_string(
self, scratch_string, 0,
language, true,
include_all, 0, false
language, include_all,
0, false, ROOT_FIELD
) + 1;
char *result = malloc(size * sizeof(char));
ts_subtree__write_to_string(self, result, size, language, true, include_all, 0, false);
ts_subtree__write_to_string(
self, result, size,
language, include_all,
0, false, ROOT_FIELD
);
return result;
}
@ -913,20 +955,17 @@ void ts_subtree__print_dot_graph(const Subtree *self, uint32_t start_offset,
fprintf(f, "\"]\n");
uint32_t child_start_offset = start_offset;
uint32_t structural_child_index = 0;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
language,
ts_subtree_alias_sequence_id(*self)
);
uint32_t child_info_offset =
language->max_alias_sequence_length *
ts_subtree_production_id(*self);
for (uint32_t i = 0, n = ts_subtree_child_count(*self); i < n; i++) {
const Subtree *child = &self->ptr->children[i];
if (ts_subtree_extra(*child)) {
ts_subtree__print_dot_graph(child, child_start_offset, language, 0, f);
} else {
TSSymbol alias_symbol = alias_sequence ? alias_sequence[structural_child_index] : 0;
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
structural_child_index++;
TSSymbol alias_symbol = 0;
if (!ts_subtree_extra(*child) && child_info_offset) {
alias_symbol = language->alias_sequences[child_info_offset];
child_info_offset++;
}
ts_subtree__print_dot_graph(child, child_start_offset, language, alias_symbol, f);
fprintf(f, "tree_%p -> tree_%p [tooltip=%u]\n", self, child, i);
child_start_offset += ts_subtree_total_bytes(*child);
}

View file

@ -73,7 +73,7 @@ typedef struct {
uint32_t node_count;
uint32_t repeat_depth;
int32_t dynamic_precedence;
uint16_t alias_sequence_id;
uint16_t production_id;
struct {
TSSymbol symbol;
TSStateId parse_state;
@ -229,9 +229,9 @@ static inline int32_t ts_subtree_dynamic_precedence(Subtree self) {
return (self.data.is_inline || self.ptr->child_count == 0) ? 0 : self.ptr->dynamic_precedence;
}
static inline uint16_t ts_subtree_alias_sequence_id(Subtree self) {
static inline uint16_t ts_subtree_production_id(Subtree self) {
if (ts_subtree_child_count(self) > 0) {
return self.ptr->alias_sequence_id;
return self.ptr->production_id;
} else {
return 0;
}

View file

@ -22,7 +22,7 @@ static inline CursorChildIterator ts_tree_cursor_iterate_children(const TreeCurs
}
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
last_entry->subtree->ptr->alias_sequence_id
last_entry->subtree->ptr->production_id
);
return (CursorChildIterator) {
.tree = self->tree,
@ -49,11 +49,11 @@ static inline bool ts_tree_cursor_child_iterator_next(CursorChildIterator *self,
bool extra = ts_subtree_extra(*child);
if (!extra && self->alias_sequence) {
*visible |= self->alias_sequence[self->structural_child_index];
self->structural_child_index++;
}
self->position = length_add(self->position, ts_subtree_size(*child));
self->child_index++;
if (!extra) self->structural_child_index++;
if (self->child_index < self->parent.ptr->child_count) {
Subtree next_child = self->parent.ptr->children[self->child_index];
@ -210,7 +210,7 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) {
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->alias_sequence_id
parent_entry->subtree->ptr->production_id
);
is_aliased = alias_sequence && alias_sequence[entry->structural_child_index];
}
@ -230,7 +230,7 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
TreeCursorEntry *parent_entry = &self->stack.contents[self->stack.size - 2];
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->alias_sequence_id
parent_entry->subtree->ptr->production_id
);
if (alias_sequence && !ts_subtree_extra(*last_entry->subtree)) {
alias_symbol = alias_sequence[last_entry->structural_child_index];
@ -243,3 +243,51 @@ TSNode ts_tree_cursor_current_node(const TSTreeCursor *_self) {
alias_symbol
);
}
TSFieldId ts_tree_cursor_current_field_id(const TSTreeCursor *_self) {
const TreeCursor *self = (const TreeCursor *)_self;
// Walk up the tree, visiting the current node and its invisible ancestors.
for (unsigned i = self->stack.size - 1; i > 0; i--) {
TreeCursorEntry *entry = &self->stack.contents[i];
TreeCursorEntry *parent_entry = &self->stack.contents[i - 1];
// Stop walking up when another visible node is found.
if (i != self->stack.size - 1) {
if (ts_subtree_visible(*entry->subtree)) break;
const TSSymbol *alias_sequence = ts_language_alias_sequence(
self->tree->language,
parent_entry->subtree->ptr->production_id
);
if (alias_sequence && alias_sequence[entry->structural_child_index]) {
break;
}
}
const TSFieldMapEntry *field_map, *field_map_end;
ts_language_field_map(
self->tree->language,
parent_entry->subtree->ptr->production_id,
&field_map, &field_map_end
);
while (field_map < field_map_end) {
if (
!field_map->inherited &&
field_map->child_index == entry->structural_child_index
) return field_map->field_id;
field_map++;
}
}
return 0;
}
const char *ts_tree_cursor_current_field_name(const TSTreeCursor *_self) {
TSFieldId id = ts_tree_cursor_current_field_id(_self);
if (id) {
const TreeCursor *self = (const TreeCursor *)_self;
return self->tree->language->field_names[id];
} else {
return NULL;
}
}

View file

@ -12,6 +12,13 @@ bindgen \
$header_path > $output_path
echo "" >> $output_path
version_constant='TREE_SITTER_LANGUAGE_VERSION'
version_number=$(egrep "#define $version_constant (.*)" $header_path | cut -d' ' -f3)
echo "pub const $version_constant: usize = $version_number;" >> $output_path
defines=(
TREE_SITTER_LANGUAGE_VERSION
TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION
)
for define in ${defines[@]}; do
define_value=$(egrep "#define $define (.*)" $header_path | cut -d' ' -f3)
echo "pub const $define: usize = $define_value;" >> $output_path
done

View file

@ -14,8 +14,8 @@ int main() {
(primitive_type)
(function_declarator (identifier) (parameter_list))
(compound_statement
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING)))))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";"))
(expression_statement (call_expression (identifier) (argument_list (string_literal))) (MISSING ";")))))
==============================================
Top-level declarations with missing semicolons
@ -27,8 +27,8 @@ static int b
---
(translation_unit
(declaration (primitive_type) (identifier) (MISSING))
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING)))
(declaration (primitive_type) (identifier) (MISSING ";"))
(declaration (storage_class_specifier) (primitive_type) (identifier) (MISSING ";")))
==========================================
Partial declaration lists inside ifdefs
@ -58,7 +58,7 @@ int c() {
(comment)
(declaration (primitive_type) (identifier))
(function_definition (primitive_type) (function_declarator (identifier) (parameter_list)) (compound_statement (return_statement (number_literal))))
(preproc_ifdef (identifier) (MISSING))))))
(preproc_ifdef (identifier) (MISSING "#endif"))))))
==========================================
If statements with incomplete expressions
@ -83,12 +83,12 @@ int main() {
(if_statement
(parenthesized_expression (field_expression
(identifier)
(MISSING)))
(MISSING field_identifier)))
(compound_statement
(expression_statement (call_expression (identifier) (argument_list)))
(expression_statement (call_expression (identifier) (argument_list)))
(if_statement
(parenthesized_expression (pointer_expression (MISSING)))
(parenthesized_expression (pointer_expression (MISSING identifier)))
(expression_statement (call_expression (identifier) (argument_list)))))))))
====================================

View file

@ -36,7 +36,7 @@ Missing object-literal values
(program (expression_statement (object
(pair (property_identifier) (identifier))
(pair (property_identifier) (MISSING)))))
(pair (property_identifier) (MISSING identifier)))))
===================================================
Extra identifiers in expressions
@ -81,7 +81,7 @@ if ({a: 'b'} {c: 'd'}) {
(assignment_expression
(identifier)
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))
(MISSING))
(MISSING ";"))
(function (formal_parameters (identifier)) (statement_block (expression_statement (identifier)))))))
===================================================
@ -153,7 +153,7 @@ const h = `i ${j(k} l`
(identifier)
(template_string (template_substitution (call_expression
(identifier)
(arguments (identifier) (MISSING))))))))
(arguments (identifier) (MISSING ")"))))))))
=========================================================
Long sequences of invalid tokens

View file

@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: expression '+' (math_operation expression '+' expression)
2: (math_operation expression '+' expression) • '+' …
1: (math_operation expression '+' expression) • '+' …
2: expression '+' (math_operation expression '+' expression)
Possible resolutions:

View file

@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: '[' (array_type_repeat1 identifier) • identifier …
2: '[' (array_repeat1 identifier) • identifier …
1: '[' (array_repeat1 identifier) • identifier …
2: '[' (array_type_repeat1 identifier) • identifier …
Possible resolutions:

View file

@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: _program_start '[' (array_type_repeat1 identifier) • identifier …
2: _program_start '[' (array_repeat1 identifier) • identifier …
1: _program_start '[' (array_repeat1 identifier) • identifier …
2: _program_start '[' (array_type_repeat1 identifier) • identifier …
Possible resolutions:

View file

@ -4,9 +4,9 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left)
1: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left)
2: expression '+' (other_thing expression • '*' '*') (precedence: -1, associativity: Left)
3: (sum expression '+' expression) • '*' … (precedence: 0, associativity: Left)
3: expression '+' (product expression • '*' expression) (precedence: 1, associativity: Left)
Possible resolutions:

View file

@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: (unary_b '!' expression) • '<' … (precedence: 2)
2: (unary_a '!' expression) • '<' … (precedence: 2)
1: (unary_a '!' expression) • '<' … (precedence: 2)
2: (unary_b '!' expression) • '<' … (precedence: 2)
Possible resolutions:

View file

@ -4,8 +4,8 @@ Unresolved conflict for symbol sequence:
Possible interpretations:
1: identifier (function_call identifier • block) (precedence: 0, associativity: Right)
2: identifier (expression identifier) • '{' …
1: identifier (expression identifier) • '{' …
2: identifier (function_call identifier • block) (precedence: 0, associativity: Right)
Possible resolutions: