diff --git a/cli/src/tests/language_test.rs b/cli/src/tests/language_test.rs new file mode 100644 index 00000000..f8a4653f --- /dev/null +++ b/cli/src/tests/language_test.rs @@ -0,0 +1,41 @@ +use super::helpers::fixtures::get_language; +use tree_sitter::Parser; + +#[test] +fn test_lookahead_iterator() { + let mut parser = Parser::new(); + let language = get_language("rust"); + parser.set_language(language).unwrap(); + + let tree = parser.parse("struct Stuff {}", None).unwrap(); + + let mut cursor = tree.walk(); + + assert!(cursor.goto_first_child()); // struct + assert!(cursor.goto_first_child()); // struct keyword + + let next_state = cursor.node().next_parse_state(); + assert_ne!(next_state, 0); + assert_eq!( + next_state, + language.next_state(cursor.node().parse_state(), cursor.node().grammar_id()) + ); + assert!((next_state as usize) < language.parse_state_count()); + assert!(cursor.goto_next_sibling()); // type_identifier + assert_eq!(next_state, cursor.node().parse_state()); + assert_eq!(cursor.node().grammar_name(), "identifier"); + assert_ne!(cursor.node().grammar_id(), cursor.node().kind_id()); + + let expected_symbols = ["identifier", "block_comment", "line_comment"]; + let lookahead = language.lookahead_iterator(next_state).unwrap(); + assert_eq!(lookahead.language(), language); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset_state(next_state); + assert!(lookahead.iter_names().eq(expected_symbols)); + + lookahead.reset(language, next_state); + assert!(lookahead + .map(|s| language.node_kind_for_id(s).unwrap()) + .eq(expected_symbols)); +} diff --git a/cli/src/tests/mod.rs b/cli/src/tests/mod.rs index 223b6d3c..577770a2 100644 --- a/cli/src/tests/mod.rs +++ b/cli/src/tests/mod.rs @@ -2,6 +2,7 @@ mod corpus_test; mod github_issue_test; mod helpers; mod highlight_test; +mod language_test; mod node_test; mod parser_test; mod pathological_test; diff --git a/cli/src/tests/tree_test.rs b/cli/src/tests/tree_test.rs index be0c4ff1..7d091c3f 100644 --- a/cli/src/tests/tree_test.rs +++ b/cli/src/tests/tree_test.rs @@ -306,7 +306,7 @@ fn test_tree_cursor() { .parse( " struct Stuff { - a: A; + a: A, b: Option, } ", @@ -331,6 +331,49 @@ fn test_tree_cursor() { assert!(cursor.goto_next_sibling()); assert_eq!(cursor.node().kind(), "field_declaration_list"); assert_eq!(cursor.node().is_named(), true); + + assert!(cursor.goto_last_child()); + assert_eq!(cursor.node().kind(), "}"); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 4, column: 16 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 32 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert_eq!(cursor.node().is_named(), true); + assert_eq!(cursor.node().start_position(), Point { row: 3, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), ","); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 24 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "field_declaration"); + assert_eq!(cursor.node().is_named(), true); + assert_eq!(cursor.node().start_position(), Point { row: 2, column: 20 }); + + assert!(cursor.goto_previous_sibling()); + assert_eq!(cursor.node().kind(), "{"); + assert_eq!(cursor.node().is_named(), false); + assert_eq!(cursor.node().start_position(), Point { row: 1, column: 29 }); + + let mut copy = tree.walk(); + copy.reset_to(cursor); + + assert_eq!(copy.node().kind(), "{"); + assert_eq!(copy.node().is_named(), false); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "field_declaration_list"); + assert_eq!(copy.node().is_named(), true); + + assert!(copy.goto_parent()); + assert_eq!(copy.node().kind(), "struct_item"); } #[test] diff --git a/lib/binding_rust/bindings.rs b/lib/binding_rust/bindings.rs index 82eec594..912916c0 100644 --- a/lib/binding_rust/bindings.rs +++ b/lib/binding_rust/bindings.rs @@ -1,5 +1,6 @@ /* automatically generated by rust-bindgen 0.66.1 */ +pub type TSStateId = u16; pub type TSSymbol = u16; pub type TSFieldId = u16; #[repr(C)] @@ -27,6 +28,11 @@ pub struct TSQuery { pub struct TSQueryCursor { _unused: [u8; 0], } +#[repr(C)] +#[derive(Debug)] +pub struct TSLookaheadIterator { + _unused: [u8; 0], +} pub const TSInputEncoding_TSInputEncodingUTF8: TSInputEncoding = 0; pub const TSInputEncoding_TSInputEncodingUTF16: TSInputEncoding = 1; pub type TSInputEncoding = ::std::os::raw::c_uint; @@ -282,6 +288,14 @@ extern "C" { #[doc = " Get the node's language."] pub fn ts_node_language(arg1: TSNode) -> *const TSLanguage; } +extern "C" { + #[doc = " Get the node's type as it appears in the grammar ignoring aliases as a\n null-terminated string."] + pub fn ts_node_grammar_type(arg1: TSNode) -> *const ::std::os::raw::c_char; +} +extern "C" { + #[doc = " Get the node's type as a numerical id as it appears in the grammar ignoring\n aliases. This should be used in `ts_language_next_state` instead of\n `ts_node_symbol`."] + pub fn ts_node_grammar_symbol(arg1: TSNode) -> TSSymbol; +} extern "C" { #[doc = " Get the node's start byte."] pub fn ts_node_start_byte(arg1: TSNode) -> u32; @@ -326,6 +340,18 @@ extern "C" { #[doc = " Check if the node is a syntax error or contains any syntax errors."] pub fn ts_node_has_error(arg1: TSNode) -> bool; } +extern "C" { + #[doc = " Check if the node is a syntax error."] + pub fn ts_node_is_error(arg1: TSNode) -> bool; +} +extern "C" { + #[doc = " Get this node's parse state."] + pub fn ts_node_parse_state(arg1: TSNode) -> TSStateId; +} +extern "C" { + #[doc = " Get the parse state after this node."] + pub fn ts_node_next_parse_state(arg1: TSNode) -> TSStateId; +} extern "C" { #[doc = " Get the node's immediate parent."] pub fn ts_node_parent(arg1: TSNode) -> TSNode; @@ -427,6 +453,10 @@ extern "C" { #[doc = " Re-initialize a tree cursor to start at a different node."] pub fn ts_tree_cursor_reset(arg1: *mut TSTreeCursor, arg2: TSNode); } +extern "C" { + #[doc = " Re-initialize a tree cursor to the same position as another cursor.\n\n Unlike `ts_tree_cursor_reset`, this will not lose parent information and\n allows reusing already created cursors."] + pub fn ts_tree_cursor_reset_to(arg1: *mut TSTreeCursor, arg2: *const TSTreeCursor); +} extern "C" { #[doc = " Get the tree cursor's current node."] pub fn ts_tree_cursor_current_node(arg1: *const TSTreeCursor) -> TSNode; @@ -449,10 +479,18 @@ extern "C" { #[doc = " Move the cursor to the next sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there was no next sibling node."] pub fn ts_tree_cursor_goto_next_sibling(arg1: *mut TSTreeCursor) -> bool; } +extern "C" { + #[doc = " Move the cursor to the previous sibling of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there was no previous sibling node.\n\n Note, that this function may be slower than\n `ts_tree_cursor_goto_next_sibling` due to how node positions are stored. In\n the worst case, this will need to iterate through all the children upto the\n previous sibling node to recalculate its position."] + pub fn ts_tree_cursor_goto_previous_sibling(arg1: *mut TSTreeCursor) -> bool; +} extern "C" { #[doc = " Move the cursor to the first child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false`\n if there were no children."] pub fn ts_tree_cursor_goto_first_child(arg1: *mut TSTreeCursor) -> bool; } +extern "C" { + #[doc = " Move the cursor to the last child of its current node.\n\n This returns `true` if the cursor successfully moved, and returns `false` if\n there were no children.\n\n Note that this function may be slower than `ts_tree_cursor_goto_first_child`\n because it needs to iterate through all the children to compute the child's\n position."] + pub fn ts_tree_cursor_goto_last_child(arg1: *mut TSTreeCursor) -> bool; +} extern "C" { #[doc = " Move the cursor to the node that is the nth descendant of\n the original node that the cursor was constructed with, where\n zero represents the original node itself."] pub fn ts_tree_cursor_goto_descendant(arg1: *mut TSTreeCursor, arg2: u32); @@ -608,6 +646,10 @@ extern "C" { #[doc = " Get the number of distinct node types in the language."] pub fn ts_language_symbol_count(arg1: *const TSLanguage) -> u32; } +extern "C" { + #[doc = " Get the number of valid states in this language."] + pub fn ts_language_state_count(arg1: *const TSLanguage) -> u32; +} extern "C" { #[doc = " Get a node type string for the given numerical id."] pub fn ts_language_symbol_name( @@ -651,6 +693,58 @@ extern "C" { #[doc = " Get the ABI version number for this language. This version number is used\n to ensure that languages were generated by a compatible version of\n Tree-sitter.\n\n See also `ts_parser_set_language`."] pub fn ts_language_version(arg1: *const TSLanguage) -> u32; } +extern "C" { + #[doc = " Get the next parse state. Combine this with lookahead iterators to generate\n completion suggestions or valid symbols in error nodes. Use\n `ts_node_grammar_symbol` for valid symbols."] + pub fn ts_language_next_state( + arg1: *const TSLanguage, + arg2: TSStateId, + arg3: TSSymbol, + ) -> TSStateId; +} +extern "C" { + #[doc = " Create a new lookahead iterator for the given language and parse state.\n\n This returns `NULL` if state is invalid for the language.\n\n Repeatedly using `ts_lookahead_iterator_advance` and\n `ts_lookahead_iterator_current_symbol` will generate valid symbols in the\n given parse state. Newly created lookahead iterators will contain the `ERROR`\n symbol.\n\n Lookahead iterators can be useful to generate suggestions and improve syntax\n error diagnostics. To get symbols valid in an ERROR node, use the lookahead\n iterator on its first leaf node state. For `MISSING` nodes, a lookahead\n iterator created on the previous non-extra leaf node may be appropriate."] + pub fn ts_lookahead_iterator_new( + arg1: *const TSLanguage, + arg2: TSStateId, + ) -> *mut TSLookaheadIterator; +} +extern "C" { + #[doc = " Delete a lookahead iterator freeing all the memory used."] + pub fn ts_lookahead_iterator_delete(arg1: *mut TSLookaheadIterator); +} +extern "C" { + #[doc = " Reset the lookahead iterator to another state.\n\n This returns `true` if the iterator was reset to the given state and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset_state( + arg1: *mut TSLookaheadIterator, + arg2: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Reset the lookahead iterator.\n\n This returns `true` if the language was set successfully and `false`\n otherwise."] + pub fn ts_lookahead_iterator_reset( + arg1: *mut TSLookaheadIterator, + arg2: *const TSLanguage, + arg3: TSStateId, + ) -> bool; +} +extern "C" { + #[doc = " Get the current language of the lookahead iterator."] + pub fn ts_lookahead_iterator_language(arg1: *const TSLookaheadIterator) -> *const TSLanguage; +} +extern "C" { + #[doc = " Advance the lookahead iterator to the next symbol.\n\n This returns `true` if there is a new symbol and `false` otherwise."] + pub fn ts_lookahead_iterator_advance(arg1: *mut TSLookaheadIterator) -> bool; +} +extern "C" { + #[doc = " Get the current symbol of the lookahead iterator;"] + pub fn ts_lookahead_iterator_current_symbol(arg1: *const TSLookaheadIterator) -> TSSymbol; +} +extern "C" { + #[doc = " Get the current symbol type of the lookahead iterator as a null terminated\n string."] + pub fn ts_lookahead_iterator_current_symbol_name( + arg1: *const TSLookaheadIterator, + ) -> *const ::std::os::raw::c_char; +} extern "C" { #[doc = " Set the allocation functions used by the library.\n\n By default, Tree-sitter uses the standard libc allocation functions,\n but aborts the process when an allocation fails. This function lets\n you supply alternative allocation functions at runtime.\n\n If you pass `NULL` for any parameter, Tree-sitter will switch back to\n its default implementation of that function.\n\n If you call this function after the library has already been used, then\n you must ensure that either:\n 1. All the existing objects have been freed.\n 2. The new allocator shares its state with the old one, so it is capable\n of freeing memory that was allocated by the old allocator."] pub fn ts_set_allocator( diff --git a/lib/binding_rust/ffi.rs b/lib/binding_rust/ffi.rs index fc872041..ac4da98b 100644 --- a/lib/binding_rust/ffi.rs +++ b/lib/binding_rust/ffi.rs @@ -8,7 +8,9 @@ extern "C" { pub(crate) fn dup(fd: std::os::raw::c_int) -> std::os::raw::c_int; } -use crate::{Language, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor}; +use crate::{ + Language, LookaheadIterator, Node, Parser, Query, QueryCursor, QueryError, Tree, TreeCursor, +}; use std::{marker::PhantomData, mem::ManuallyDrop, ptr::NonNull, str}; impl Language { @@ -130,3 +132,19 @@ impl QueryCursor { ManuallyDrop::new(self).ptr.as_ptr() } } + +impl LookaheadIterator { + /// Reconstructs a [LookaheadIterator] from a raw pointer. + /// + /// # Safety + /// + /// `ptr` must be non-null. + pub unsafe fn from_raw(ptr: *mut TSLookaheadIterator) -> LookaheadIterator { + LookaheadIterator(NonNull::new_unchecked(ptr)) + } + + /// Consumes the [LookaheadIterator], returning a raw pointer to the underlying C structure. + pub fn into_raw(self) -> *mut TSLookaheadIterator { + ManuallyDrop::new(self).0.as_ptr() + } +} diff --git a/lib/binding_rust/lib.rs b/lib/binding_rust/lib.rs index 2f8323df..2e3403b4 100644 --- a/lib/binding_rust/lib.rs +++ b/lib/binding_rust/lib.rs @@ -87,6 +87,10 @@ pub struct Node<'a>(ffi::TSNode, PhantomData<&'a ()>); #[doc(alias = "TSParser")] pub struct Parser(NonNull); +/// A stateful object that is used to look up symbols valid in a specific parse state +#[doc(alias = "TSLookaheadIterator")] +pub struct LookaheadIterator(NonNull); + /// A type of log message. #[derive(Debug, PartialEq, Eq)] pub enum LogType { @@ -269,6 +273,12 @@ impl Language { unsafe { ffi::ts_language_symbol_count(self.0) as usize } } + /// Get the number of valid states in this language. + #[doc(alias = "ts_language_state_count")] + pub fn parse_state_count(&self) -> usize { + unsafe { ffi::ts_language_state_count(self.0) as usize } + } + /// Get the name of the node kind for the given numerical id. #[doc(alias = "ts_language_symbol_name")] pub fn node_kind_for_id(&self, id: u16) -> Option<&'static str> { @@ -336,6 +346,41 @@ impl Language { }; FieldId::new(id) } + + /// Get the next parse state. Combine this with [lookahead_iterator] to + /// generate completion suggestions or valid symbols in error nodes. + /// + /// Example: + /// ``` + /// let state = language.next_state(node.parse_state(), node.grammar_id()); + /// ``` + #[doc(alias = "ts_language_next_state")] + pub fn next_state(&self, state: u16, id: u16) -> u16 { + unsafe { ffi::ts_language_next_state(self.0, state, id) } + } + + /// Create a new lookahead iterator for this language and parse state. + /// + /// This returns `None` if state is invalid for this language. + /// + /// Iterating [LookaheadIterator] will yield valid symbols in the given + /// parse state. Newly created lookahead iterators will return the `ERROR` + /// symbol from [LookaheadIterator::current_symbol]. + /// + /// Lookahead iterators can be useful to generate suggestions and improve + /// syntax error diagnostics. To get symbols valid in an ERROR node, use the + /// lookahead iterator on its first leaf node state. For `MISSING` nodes, a + /// lookahead iterator created on the previous non-extra leaf node may be + /// appropriate. + #[doc(alias = "ts_lookahead_iterator_new")] + pub fn lookahead_iterator(&self, state: u16) -> Option { + let ptr = unsafe { ffi::ts_lookahead_iterator_new(self.0, state) }; + if ptr.is_null() { + None + } else { + Some(unsafe { LookaheadIterator::from_raw(ptr) }) + } + } } impl Parser { @@ -832,6 +877,13 @@ impl<'tree> Node<'tree> { unsafe { ffi::ts_node_symbol(self.0) } } + /// Get the node's type as a numerical id as it appears in the grammar + /// ignoring aliases. + #[doc(alias = "ts_node_grammar_symbol")] + pub fn grammar_id(&self) -> u16 { + unsafe { ffi::ts_node_grammar_symbol(self.0) } + } + /// Get this node's type as a string. #[doc(alias = "ts_node_type")] pub fn kind(&self) -> &'static str { @@ -840,6 +892,15 @@ impl<'tree> Node<'tree> { .unwrap() } + /// Get this node's symbol name as it appears in the grammar ignoring + /// aliases as a string. + #[doc(alias = "ts_node_grammar_type")] + pub fn grammar_name(&self) -> &'static str { + unsafe { CStr::from_ptr(ffi::ts_node_grammar_type(self.0)) } + .to_str() + .unwrap() + } + /// Get the [Language] that was used to parse this node's syntax tree. #[doc(alias = "ts_node_language")] pub fn language(&self) -> Language { @@ -881,8 +942,21 @@ impl<'tree> Node<'tree> { /// /// Syntax errors represent parts of the code that could not be incorporated into a /// valid syntax tree. + #[doc(alias = "ts_node_is_error")] pub fn is_error(&self) -> bool { - self.kind_id() == u16::MAX + unsafe { ffi::ts_node_is_error(self.0) } + } + + /// Get this node's parse state. + #[doc(alias = "ts_node_parse_state")] + pub fn parse_state(&self) -> u16 { + unsafe { ffi::ts_node_parse_state(self.0) } + } + + /// Get the parse state after this node. + #[doc(alias = "ts_node_next_parse_state")] + pub fn next_parse_state(&self) -> u16 { + unsafe { ffi::ts_node_next_parse_state(self.0) } } /// Check if this node is *missing*. @@ -1305,6 +1379,19 @@ impl<'a> TreeCursor<'a> { return unsafe { ffi::ts_tree_cursor_goto_first_child(&mut self.0) }; } + /// Move this cursor to the last child of its current node. + /// + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there were no children. + /// + /// Note that this function may be slower than + /// [`goto_first_child`](TreeCursor::goto_first_child) because it needs to + /// iterate through all the children to compute the child's position. + #[doc(alias = "ts_tree_cursor_goto_last_child")] + pub fn goto_last_child(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_last_child(&mut self.0) }; + } + /// Move this cursor to the parent of its current node. /// /// This returns `true` if the cursor successfully moved, and returns `false` @@ -1333,6 +1420,21 @@ impl<'a> TreeCursor<'a> { }; } + /// Move this cursor to the previous sibling of its current node. + /// + /// This returns `true` if the cursor successfully moved, and returns + /// `false` if there was no previous sibling node. + /// + /// Note, that this function may be slower than + /// [`goto_next_sibling`](TreeCursor::goto_next_sibling) due to how node + /// positions are stored. In the worst case, this will need to iterate + /// through all the children upto the previous sibling node to recalculate + /// its position. + #[doc(alias = "ts_tree_cursor_goto_previous_sibling")] + pub fn goto_previous_sibling(&mut self) -> bool { + return unsafe { ffi::ts_tree_cursor_goto_previous_sibling(&mut self.0) }; + } + /// Move this cursor to the first child of its current node that extends beyond /// the given byte offset. /// @@ -1370,6 +1472,15 @@ impl<'a> TreeCursor<'a> { pub fn reset(&mut self, node: Node<'a>) { unsafe { ffi::ts_tree_cursor_reset(&mut self.0, node.0) }; } + + /// Re-initialize a tree cursor to the same position as another cursor. + /// + /// Unlike `reset`, this will not lose parent information and + /// allows reusing already created cursors. + #[doc(alias = "ts_tree_cursor_reset_to")] + pub fn reset_to(&mut self, cursor: TreeCursor<'a>) { + unsafe { ffi::ts_tree_cursor_reset_to(&mut self.0, &cursor.0) }; + } } impl<'a> Clone for TreeCursor<'a> { @@ -1384,6 +1495,91 @@ impl<'a> Drop for TreeCursor<'a> { } } +impl LookaheadIterator { + /// Get the current language of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_language")] + pub fn language(&self) -> Language { + Language(unsafe { ffi::ts_lookahead_iterator_language(self.0.as_ptr()) }) + } + + /// Get the current symbol of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol")] + pub fn current_symbol(&self) -> u16 { + unsafe { ffi::ts_lookahead_iterator_current_symbol(self.0.as_ptr()) } + } + + /// Get the current symbol name of the lookahead iterator. + #[doc(alias = "ts_lookahead_iterator_current_symbol_name")] + pub fn current_symbol_name(&self) -> &'static str { + unsafe { + CStr::from_ptr(ffi::ts_lookahead_iterator_current_symbol_name( + self.0.as_ptr(), + )) + .to_str() + .unwrap() + } + } + + /// Reset the lookahead iterator. + /// + /// This returns `true` if the language was set successfully and `false` + /// otherwise. + #[doc(alias = "ts_lookahead_iterator_reset")] + pub fn reset(&self, language: Language, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset(self.0.as_ptr(), language.0, state) } + } + + /// Reset the lookahead iterator to another state. + /// + /// This returns `true` if the iterator was reset to the given state and `false` + /// otherwise. + #[doc(alias = "ts_lookahead_iterator_reset_state")] + pub fn reset_state(&self, state: u16) -> bool { + unsafe { ffi::ts_lookahead_iterator_reset_state(self.0.as_ptr(), state) } + } + + /// Iterate symbol names. + pub fn iter_names<'a>(&'a self) -> impl Iterator + 'a { + NameLookaheadIterator(&self) + } +} + +struct NameLookaheadIterator<'a>(&'a LookaheadIterator); + +impl<'a> Iterator for NameLookaheadIterator<'a> { + type Item = &'static str; + + #[doc(alias = "ts_lookahead_iterator_advance")] + fn next(&mut self) -> Option { + if !(unsafe { ffi::ts_lookahead_iterator_advance(self.0 .0.as_ptr()) }) { + None + } else { + Some(self.0.current_symbol_name()) + } + } +} + +impl Iterator for LookaheadIterator { + type Item = u16; + + #[doc(alias = "ts_lookahead_iterator_advance")] + fn next(&mut self) -> Option { + // the first symbol is always `0` so we can safely skip it + if !(unsafe { ffi::ts_lookahead_iterator_advance(self.0.as_ptr()) }) { + None + } else { + Some(self.current_symbol()) + } + } +} + +impl Drop for LookaheadIterator { + #[doc(alias = "ts_lookahead_iterator_delete")] + fn drop(&mut self) { + unsafe { ffi::ts_lookahead_iterator_delete(self.0.as_ptr()) } + } +} + impl Query { /// Create a new query from a string containing one or more S-expression /// patterns. diff --git a/lib/binding_web/binding.c b/lib/binding_web/binding.c index 27292911..5a8e4e34 100644 --- a/lib/binding_web/binding.c +++ b/lib/binding_web/binding.c @@ -243,6 +243,13 @@ void ts_tree_cursor_reset_wasm(const TSTree *tree) { marshal_cursor(&cursor); } +void ts_tree_cursor_reset_to_wasm(const TSTree *_dst, const TSTree *_src) { + TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, _dst); + TSTreeCursor src = unmarshal_cursor(&TRANSFER_BUFFER[3], _src); + ts_tree_cursor_reset_to(&cursor, &src); + marshal_cursor(&cursor); +} + bool ts_tree_cursor_goto_first_child_wasm(const TSTree *tree) { TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); bool result = ts_tree_cursor_goto_first_child(&cursor); @@ -250,6 +257,13 @@ bool ts_tree_cursor_goto_first_child_wasm(const TSTree *tree) { return result; } +bool ts_tree_cursor_goto_last_child_wasm(const TSTree *tree) { + TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); + bool result = ts_tree_cursor_goto_last_child(&cursor); + marshal_cursor(&cursor); + return result; +} + bool ts_tree_cursor_goto_next_sibling_wasm(const TSTree *tree) { TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); bool result = ts_tree_cursor_goto_next_sibling(&cursor); @@ -257,6 +271,13 @@ bool ts_tree_cursor_goto_next_sibling_wasm(const TSTree *tree) { return result; } +bool ts_tree_cursor_goto_previous_sibling_wasm(const TSTree *tree) { + TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); + bool result = ts_tree_cursor_goto_previous_sibling(&cursor); + marshal_cursor(&cursor); + return result; +} + bool ts_tree_cursor_goto_parent_wasm(const TSTree *tree) { TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); bool result = ts_tree_cursor_goto_parent(&cursor); @@ -270,6 +291,12 @@ uint16_t ts_tree_cursor_current_node_type_id_wasm(const TSTree *tree) { return ts_node_symbol(node); } +uint16_t ts_tree_cursor_current_node_state_id_wasm(const TSTree *tree) { + TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); + TSNode node = ts_tree_cursor_current_node(&cursor); + return ts_node_parse_state(node); +} + bool ts_tree_cursor_current_node_is_named_wasm(const TSTree *tree) { TSTreeCursor cursor = unmarshal_cursor(TRANSFER_BUFFER, tree); TSNode node = ts_tree_cursor_current_node(&cursor); @@ -334,6 +361,11 @@ uint16_t ts_node_symbol_wasm(const TSTree *tree) { return ts_node_symbol(node); } +uint16_t ts_node_grammar_symbol_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_grammar_symbol(node); +} + uint32_t ts_node_child_count_wasm(const TSTree *tree) { TSNode node = unmarshal_node(tree); return ts_node_child_count(node); @@ -579,11 +611,26 @@ int ts_node_has_error_wasm(const TSTree *tree) { return ts_node_has_error(node); } +int ts_node_is_error_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_is_error(node); +} + int ts_node_is_missing_wasm(const TSTree *tree) { TSNode node = unmarshal_node(tree); return ts_node_is_missing(node); } +uint16_t ts_node_parse_state_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_parse_state(node); +} + +uint16_t ts_node_next_parse_state_wasm(const TSTree *tree) { + TSNode node = unmarshal_node(tree); + return ts_node_next_parse_state(node); +} + /******************/ /* Section - Query */ /******************/ diff --git a/lib/binding_web/binding.js b/lib/binding_web/binding.js index 5352cb18..e6c2b27e 100644 --- a/lib/binding_web/binding.js +++ b/lib/binding_web/binding.js @@ -1,6 +1,7 @@ const C = Module; const INTERNAL = {}; const SIZE_OF_INT = 4; +const SIZE_OF_CURSOR = 3 * SIZE_OF_INT; const SIZE_OF_NODE = 5 * SIZE_OF_INT; const SIZE_OF_POINT = 2 * SIZE_OF_INT; const SIZE_OF_RANGE = 2 * SIZE_OF_INT + 2 * SIZE_OF_POINT; @@ -208,10 +209,19 @@ class Node { return C._ts_node_symbol_wasm(this.tree[0]); } + get grammarId() { + marshalNode(this); + return C._ts_node_grammar_symbol_wasm(this.tree[0]); + } + get type() { return this.tree.language.types[this.typeId] || 'ERROR'; } + get grammarType() { + return this.tree.language.types[this.grammarId] || 'ERROR'; + } + get endPosition() { marshalNode(this); C._ts_node_end_point_wasm(this.tree[0]); @@ -227,6 +237,16 @@ class Node { return getText(this.tree, this.startIndex, this.endIndex); } + get parseState() { + marshalNode(this); + return C._ts_node_parse_state_wasm(this.tree[0]); + } + + get nextParseState() { + marshalNode(this); + return C._ts_node_next_parse_state_wasm(this.tree[0]); + } + isNamed() { marshalNode(this); return C._ts_node_is_named_wasm(this.tree[0]) === 1; @@ -242,6 +262,11 @@ class Node { return C._ts_node_has_changes_wasm(this.tree[0]) === 1; } + isError() { + marshalNode(this); + return C._ts_node_is_error_wasm(this.tree[0]) === 1; + } + isMissing() { marshalNode(this); return C._ts_node_is_missing_wasm(this.tree[0]) === 1; @@ -505,6 +530,13 @@ class TreeCursor { unmarshalTreeCursor(this); } + resetTo(cursor) { + marshalTreeCursor(this, TRANSFER_BUFFER); + marshalTreeCursor(cursor, TRANSFER_BUFFER + SIZE_OF_CURSOR); + C._ts_tree_cursor_reset_to_wasm(this.tree[0], cursor.tree[0]); + unmarshalTreeCursor(this); + } + get nodeType() { return this.tree.language.types[this.nodeTypeId] || 'ERROR'; } @@ -514,6 +546,11 @@ class TreeCursor { return C._ts_tree_cursor_current_node_type_id_wasm(this.tree[0]); } + get nodeStateId() { + marshalTreeCursor(this); + return C._ts_tree_cursor_current_node_state_id_wasm(this.tree[0]); + } + get nodeId() { marshalTreeCursor(this); return C._ts_tree_cursor_current_node_id_wasm(this.tree[0]); @@ -580,6 +617,13 @@ class TreeCursor { return result === 1; } + gotoLastChild() { + marshalTreeCursor(this); + const result = C._ts_tree_cursor_goto_last_child_wasm(this.tree[0]); + unmarshalTreeCursor(this); + return result === 1; + } + gotoNextSibling() { marshalTreeCursor(this); const result = C._ts_tree_cursor_goto_next_sibling_wasm(this.tree[0]); @@ -587,6 +631,13 @@ class TreeCursor { return result === 1; } + gotoPreviousSibling() { + marshalTreeCursor(this); + const result = C._ts_tree_cursor_goto_previous_sibling_wasm(this.tree[0]); + unmarshalTreeCursor(this); + return result === 1; + } + gotoParent() { marshalTreeCursor(this); const result = C._ts_tree_cursor_goto_parent_wasm(this.tree[0]); @@ -624,6 +675,10 @@ class Language { return this.fields.length - 1; } + get stateCount() { + return C._ts_language_state_count(this[0]); + } + fieldIdForName(fieldName) { const result = this.fields.indexOf(fieldName); if (result !== -1) { @@ -663,6 +718,15 @@ class Language { return C._ts_language_type_is_visible_wasm(this[0], typeId) ? true : false; } + nextState(stateId, typeId) { + return C._ts_language_next_state(this[0], stateId, typeId); + } + + lookaheadIterator(stateId) { + const address = C._ts_lookahead_iterator_new(this[0], stateId); + if (address) return new LookaheadIterable(INTERNAL, address, this); + } + query(source) { const sourceLength = lengthBytesUTF8(source); const sourceAddress = C._malloc(sourceLength + 1); @@ -924,6 +988,53 @@ class Language { } } +class LookaheadIterable { + constructor(internal, address, language) { + assertInternal(internal); + this[0] = address; + this.language = language; + } + + get currentTypeId() { + return C._ts_lookahead_iterator_current_symbol(this[0]); + } + + get currentType() { + return this.language.types[this.currentTypeId] || 'ERROR' + } + + delete() { + C._ts_lookahead_iterator_delete(this[0]); + this[0] = 0; + } + + resetState(stateId) { + return C._ts_lookahead_iterator_reset_state(this[0], stateId); + } + + reset(language, stateId) { + if (C._ts_lookahead_iterator_reset(this[0], language[0], stateId)) { + this.language = language; + return true; + } + + return false; + } + + [Symbol.iterator]() { + const self = this; + return { + next() { + if (C._ts_lookahead_iterator_advance(self[0])) { + return { done: false, value: self.currentType }; + } + + return { done: true, value: "" }; + } + }; + } +} + class Query { constructor( internal, address, captureNames, textPredicates, predicates, diff --git a/lib/binding_web/exports.json b/lib/binding_web/exports.json index eb4553dc..9e219d70 100644 --- a/lib/binding_web/exports.json +++ b/lib/binding_web/exports.json @@ -38,10 +38,12 @@ "_ts_language_type_is_named_wasm", "_ts_language_type_is_visible_wasm", "_ts_language_symbol_count", + "_ts_language_state_count", "_ts_language_symbol_for_name", "_ts_language_symbol_name", "_ts_language_symbol_type", "_ts_language_version", + "_ts_language_next_state", "_ts_node_child_by_field_id_wasm", "_ts_node_child_count_wasm", "_ts_node_child_wasm", @@ -53,8 +55,11 @@ "_ts_node_end_point_wasm", "_ts_node_has_changes_wasm", "_ts_node_has_error_wasm", + "_ts_node_is_error_wasm", "_ts_node_is_missing_wasm", "_ts_node_is_named_wasm", + "_ts_node_parse_state_wasm", + "_ts_node_next_parse_state_wasm", "_ts_node_named_child_count_wasm", "_ts_node_named_child_wasm", "_ts_node_named_children_wasm", @@ -68,6 +73,7 @@ "_ts_node_start_index_wasm", "_ts_node_start_point_wasm", "_ts_node_symbol_wasm", + "_ts_node_grammar_symbol_wasm", "_ts_node_to_string_wasm", "_ts_parser_delete", "_ts_parser_enable_logger_wasm", @@ -93,19 +99,29 @@ "_ts_tree_cursor_current_node_is_missing_wasm", "_ts_tree_cursor_current_node_is_named_wasm", "_ts_tree_cursor_current_node_type_id_wasm", + "_ts_tree_cursor_current_node_state_id_wasm", "_ts_tree_cursor_current_node_wasm", "_ts_tree_cursor_delete_wasm", "_ts_tree_cursor_end_index_wasm", "_ts_tree_cursor_end_position_wasm", "_ts_tree_cursor_goto_first_child_wasm", + "_ts_tree_cursor_goto_last_child_wasm", "_ts_tree_cursor_goto_next_sibling_wasm", + "_ts_tree_cursor_goto_previous_sibling_wasm", "_ts_tree_cursor_goto_parent_wasm", "_ts_tree_cursor_new_wasm", "_ts_tree_cursor_reset_wasm", + "_ts_tree_cursor_reset_to_wasm", "_ts_tree_cursor_start_index_wasm", "_ts_tree_cursor_start_position_wasm", "_ts_tree_delete", "_ts_tree_edit_wasm", "_ts_tree_get_changed_ranges_wasm", - "_ts_tree_root_node_wasm" + "_ts_tree_root_node_wasm", + "_ts_lookahead_iterator_new", + "_ts_lookahead_iterator_delete", + "_ts_lookahead_iterator_reset_state", + "_ts_lookahead_iterator_reset", + "_ts_lookahead_iterator_advance", + "_ts_lookahead_iterator_current_symbol" ] diff --git a/lib/binding_web/test/language-test.js b/lib/binding_web/test/language-test.js index 385b77ed..394e32b4 100644 --- a/lib/binding_web/test/language-test.js +++ b/lib/binding_web/test/language-test.js @@ -42,3 +42,46 @@ describe("Language", () => { }); }); }); + +describe("Lookahead iterator", () => { + let lookahead; + let state; + before(async () => { + let Parser; + ({ JavaScript, Parser } = await require("./helper")); + const parser = new Parser().setLanguage(JavaScript); + const tree = parser.parse("function fn() {}"); + parser.delete(); + const cursor = tree.walk(); + assert(cursor.gotoFirstChild()); + assert(cursor.gotoFirstChild()); + state = cursor.currentNode().nextParseState; + lookahead = JavaScript.lookaheadIterator(state); + assert.exists(lookahead); + }); + + after(() => { + lookahead.delete(); + }); + + const expected = ["identifier", "comment", "(", "*", "formal_parameters"]; + it("should iterate over valid symbols in the state", () => { + const symbols = Array.from(lookahead); + assert.includeMembers(symbols, expected); + assert.lengthOf(symbols, expected.length); + }); + + it("should reset to the initial state", () => { + assert(lookahead.resetState(state)); + const symbols = Array.from(lookahead); + assert.includeMembers(symbols, expected); + assert.lengthOf(symbols, expected.length); + }); + + it("should reset", () => { + assert(lookahead.reset(JavaScript, state)); + const symbols = Array.from(lookahead); + assert.includeMembers(symbols, expected); + assert.lengthOf(symbols, expected.length); + }); +}); diff --git a/lib/binding_web/test/node-test.js b/lib/binding_web/test/node-test.js index 6bbcafb0..a87658c7 100644 --- a/lib/binding_web/test/node-test.js +++ b/lib/binding_web/test/node-test.js @@ -268,6 +268,24 @@ describe("Node", () => { }); }); + describe(".isError()", () => { + it("returns true if the node is an error", () => { + tree = parser.parse("2 * * 3"); + const node = tree.rootNode; + assert.equal( + node.toString(), + '(program (expression_statement (binary_expression left: (number) (ERROR) right: (number))))' + ); + + const multi = node.firstChild.firstChild; + assert(multi.hasError()); + assert(!multi.children[0].isError()); + assert(!multi.children[1].isError()); + assert(multi.children[2].isError()); + assert(!multi.children[3].isError()); + }); + }); + describe(".isMissing()", () => { it("returns true if the node is missing from the source and was inserted via error recovery", () => { tree = parser.parse("(2 ||)"); @@ -308,6 +326,34 @@ describe("Node", () => { ); }); + describe(".parseState, .nextParseState", () => { + const text = "10 / 5"; + + it("returns node parse state ids", async () => { + tree = await parser.parse(text) + const quotientNode = tree.rootNode.firstChild.firstChild; + const [numerator, slash, denominator] = quotientNode.children; + + assert.equal(tree.rootNode.parseState, 0); + // parse states will change on any change to the grammar so test that it + // returns something instead + assert.isAbove(numerator.parseState, 0); + assert.isAbove(slash.parseState, 0); + assert.isAbove(denominator.parseState, 0); + }) + + it("returns next parse state equal to the language", async () => { + tree = await parser.parse(text); + const quotientNode = tree.rootNode.firstChild.firstChild; + quotientNode.children.forEach(node => { + assert.equal( + node.nextParseState, + JavaScript.nextState(node.parseState, node.grammarId) + ); + }); + }); + }); + describe('.descendantsOfType(type, min, max)', () => { it('finds all of the descendants of the given type in the given range', () => { tree = parser.parse("a + 1 * b * 2 + c + 3"); diff --git a/lib/binding_web/test/tree-test.js b/lib/binding_web/test/tree-test.js index 8c04e63e..a98d216a 100644 --- a/lib/binding_web/test/tree-test.js +++ b/lib/binding_web/test/tree-test.js @@ -244,6 +244,50 @@ describe("Tree", () => { endIndex: 13 }); + { + const copy = tree.walk(); + copy.resetTo(cursor); + + assert(copy.gotoPreviousSibling()); + assertCursorState(copy, { + nodeType: '+', + nodeIsNamed: false, + startPosition: {row: 0, column: 6}, + endPosition: {row: 0, column: 7}, + startIndex: 6, + endIndex: 7 + }); + + assert(copy.gotoPreviousSibling()); + assertCursorState(copy, { + nodeType: 'binary_expression', + nodeIsNamed: true, + startPosition: {row: 0, column: 0}, + endPosition: {row: 0, column: 5}, + startIndex: 0, + endIndex: 5 + }); + + assert(copy.gotoLastChild()); + assertCursorState(copy, { + nodeType: "identifier", + nodeIsNamed: true, + startPosition: {row: 0, column: 4}, + endPosition: {row: 0, column: 5}, + startIndex: 4, + endIndex: 5 + }) + + assert(copy.gotoParent()); + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'binary_expression') + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'expression_statement') + assert(copy.gotoParent()); + assert.equal(copy.nodeType, 'program') + assert(!copy.gotoParent()); + } + // const childIndex = cursor.gotoFirstChildForIndex(12); // assertCursorState(cursor, { // nodeType: 'identifier', diff --git a/lib/binding_web/tree-sitter-web.d.ts b/lib/binding_web/tree-sitter-web.d.ts index 016af4ae..dfe7766c 100644 --- a/lib/binding_web/tree-sitter-web.d.ts +++ b/lib/binding_web/tree-sitter-web.d.ts @@ -55,10 +55,14 @@ declare module 'web-tree-sitter' { ) => string | null; export interface SyntaxNode { - id: number; + typeId: number; + grammarId: number; tree: Tree; type: string; + grammarType: string; text: string; + parseState: number; + nextParseState: number; startPosition: Point; endPosition: Point; startIndex: number; @@ -80,6 +84,7 @@ declare module 'web-tree-sitter' { hasChanges(): boolean; hasError(): boolean; equals(other: SyntaxNode): boolean; + isError(): boolean; isMissing(): boolean; isNamed(): boolean; toString(): string; @@ -104,6 +109,7 @@ declare module 'web-tree-sitter' { export interface TreeCursor { nodeType: string; nodeTypeId: number; + nodeStateId: number; nodeText: string; nodeId: number; nodeIsNamed: boolean; @@ -114,14 +120,17 @@ declare module 'web-tree-sitter' { endIndex: number; reset(node: SyntaxNode): void; + resetTo(cursor: TreeCursor): void; delete(): void; currentNode(): SyntaxNode; currentFieldId(): number; currentFieldName(): string; gotoParent(): boolean; gotoFirstChild(): boolean; + gotoLastChild(): boolean; gotoFirstChildForIndex(index: number): boolean; gotoNextSibling(): boolean; + gotoPreviousSibling(): boolean; } export interface Tree { @@ -141,6 +150,7 @@ declare module 'web-tree-sitter' { readonly version: number; readonly fieldCount: number; + readonly stateCount: number; readonly nodeTypeCount: number; fieldNameForId(fieldId: number): string | null; @@ -149,7 +159,20 @@ declare module 'web-tree-sitter' { nodeTypeForId(typeId: number): string | null; nodeTypeIsNamed(typeId: number): boolean; nodeTypeIsVisible(typeId: number): boolean; + nextState(stateId: number, typeId: number): number; query(source: string): Query; + lookaheadIterator(stateId: number): LookaheadIterable | null; + } + + class LookaheadIterable { + readonly language: Language; + readonly currentTypeId: number; + readonly currentType: string; + + delete(): void; + resetState(stateId: number): boolean; + reset(language: Language, stateId: number): boolean; + [Symbol.iterator](): Iterator; } interface QueryCapture { diff --git a/lib/include/tree_sitter/api.h b/lib/include/tree_sitter/api.h index 1e67f217..d9560f24 100644 --- a/lib/include/tree_sitter/api.h +++ b/lib/include/tree_sitter/api.h @@ -32,6 +32,7 @@ extern "C" { /* Section - Types */ /*******************/ +typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; @@ -39,6 +40,7 @@ typedef struct TSParser TSParser; typedef struct TSTree TSTree; typedef struct TSQuery TSQuery; typedef struct TSQueryCursor TSQueryCursor; +typedef struct TSLookaheadIterator TSLookaheadIterator; typedef enum { TSInputEncodingUTF8, @@ -440,6 +442,19 @@ TSSymbol ts_node_symbol(TSNode); */ const TSLanguage *ts_node_language(TSNode); +/** + * Get the node's type as it appears in the grammar ignoring aliases as a + * null-terminated string. + */ +const char *ts_node_grammar_type(TSNode); + +/** + * Get the node's type as a numerical id as it appears in the grammar ignoring + * aliases. This should be used in `ts_language_next_state` instead of + * `ts_node_symbol`. + */ +TSSymbol ts_node_grammar_symbol(TSNode); + /** * Get the node's start byte. */ @@ -504,6 +519,21 @@ bool ts_node_has_changes(TSNode); */ bool ts_node_has_error(TSNode); +/** + * Check if the node is a syntax error. +*/ +bool ts_node_is_error(TSNode); + +/** + * Get this node's parse state. +*/ +TSStateId ts_node_parse_state(TSNode); + +/** + * Get the parse state after this node. +*/ +TSStateId ts_node_next_parse_state(TSNode); + /** * Get the node's immediate parent. */ @@ -637,6 +667,14 @@ void ts_tree_cursor_delete(TSTreeCursor *); */ void ts_tree_cursor_reset(TSTreeCursor *, TSNode); +/** + * Re-initialize a tree cursor to the same position as another cursor. + * + * Unlike `ts_tree_cursor_reset`, this will not lose parent information and + * allows reusing already created cursors. +*/ +void ts_tree_cursor_reset_to(TSTreeCursor *, const TSTreeCursor *); + /** * Get the tree cursor's current node. */ @@ -674,6 +712,19 @@ bool ts_tree_cursor_goto_parent(TSTreeCursor *); */ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); +/** + * Move the cursor to the previous sibling of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there was no previous sibling node. + * + * Note, that this function may be slower than + * `ts_tree_cursor_goto_next_sibling` due to how node positions are stored. In + * the worst case, this will need to iterate through all the children upto the + * previous sibling node to recalculate its position. + */ +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *); + /** * Move the cursor to the first child of its current node. * @@ -682,6 +733,18 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *); */ bool ts_tree_cursor_goto_first_child(TSTreeCursor *); +/** + * Move the cursor to the last child of its current node. + * + * This returns `true` if the cursor successfully moved, and returns `false` if + * there were no children. + * + * Note that this function may be slower than `ts_tree_cursor_goto_first_child` + * because it needs to iterate through all the children to compute the child's + * position. + */ +bool ts_tree_cursor_goto_last_child(TSTreeCursor *); + /** * Move the cursor to the node that is the nth descendant of * the original node that the cursor was constructed with, where @@ -939,6 +1002,11 @@ void ts_query_cursor_set_max_start_depth(TSQueryCursor *, uint32_t); */ uint32_t ts_language_symbol_count(const TSLanguage *); +/** + * Get the number of valid states in this language. +*/ +uint32_t ts_language_state_count(const TSLanguage *); + /** * Get a node type string for the given numerical id. */ @@ -986,6 +1054,78 @@ TSSymbolType ts_language_symbol_type(const TSLanguage *, TSSymbol); */ uint32_t ts_language_version(const TSLanguage *); +/** + * Get the next parse state. Combine this with lookahead iterators to generate + * completion suggestions or valid symbols in error nodes. Use + * `ts_node_grammar_symbol` for valid symbols. +*/ +TSStateId ts_language_next_state(const TSLanguage *, TSStateId, TSSymbol); + +/********************************/ +/* Section - Lookahead Iterator */ +/********************************/ + +/** + * Create a new lookahead iterator for the given language and parse state. + * + * This returns `NULL` if state is invalid for the language. + * + * Repeatedly using `ts_lookahead_iterator_advance` and + * `ts_lookahead_iterator_current_symbol` will generate valid symbols in the + * given parse state. Newly created lookahead iterators will contain the `ERROR` + * symbol. + * + * Lookahead iterators can be useful to generate suggestions and improve syntax + * error diagnostics. To get symbols valid in an ERROR node, use the lookahead + * iterator on its first leaf node state. For `MISSING` nodes, a lookahead + * iterator created on the previous non-extra leaf node may be appropriate. +*/ +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *, TSStateId); + +/** + * Delete a lookahead iterator freeing all the memory used. +*/ +void ts_lookahead_iterator_delete(TSLookaheadIterator *); + +/** + * Reset the lookahead iterator to another state. + * + * This returns `true` if the iterator was reset to the given state and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator *, TSStateId); + +/** + * Reset the lookahead iterator. + * + * This returns `true` if the language was set successfully and `false` + * otherwise. +*/ +bool ts_lookahead_iterator_reset(TSLookaheadIterator *, const TSLanguage *, TSStateId); + +/** + * Get the current language of the lookahead iterator. +*/ +const TSLanguage * ts_lookahead_iterator_language(const TSLookaheadIterator *); + +/** + * Advance the lookahead iterator to the next symbol. + * + * This returns `true` if there is a new symbol and `false` otherwise. +*/ +bool ts_lookahead_iterator_advance(TSLookaheadIterator *); + +/** + * Get the current symbol of the lookahead iterator; +*/ +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *); + +/** + * Get the current symbol type of the lookahead iterator as a null terminated + * string. +*/ +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *); + /**********************************/ /* Section - Global Configuration */ /**********************************/ diff --git a/lib/include/tree_sitter/parser.h b/lib/include/tree_sitter/parser.h index ac773580..17b4fde9 100644 --- a/lib/include/tree_sitter/parser.h +++ b/lib/include/tree_sitter/parser.h @@ -13,9 +13,8 @@ extern "C" { #define ts_builtin_sym_end 0 #define TREE_SITTER_SERIALIZATION_BUFFER_SIZE 1024 -typedef uint16_t TSStateId; - #ifndef TREE_SITTER_API_H_ +typedef uint16_t TSStateId; typedef uint16_t TSSymbol; typedef uint16_t TSFieldId; typedef struct TSLanguage TSLanguage; diff --git a/lib/src/language.c b/lib/src/language.c index c87a8dc1..df5c6de1 100644 --- a/lib/src/language.c +++ b/lib/src/language.c @@ -7,6 +7,10 @@ uint32_t ts_language_symbol_count(const TSLanguage *self) { return self->symbol_count + self->alias_count; } +uint32_t ts_language_state_count(const TSLanguage *self) { + return self->state_count; +} + uint32_t ts_language_version(const TSLanguage *self) { return self->version; } @@ -56,6 +60,28 @@ TSSymbol ts_language_public_symbol( return self->public_symbol_map[symbol]; } +TSStateId ts_language_next_state( + const TSLanguage *self, + TSStateId state, + TSSymbol symbol +) { + if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { + return 0; + } else if (symbol < self->token_count) { + uint32_t count; + const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); + if (count > 0) { + TSParseAction action = actions[count - 1]; + if (action.type == TSParseActionTypeShift) { + return action.shift.extra ? state : action.shift.state; + } + } + return 0; + } else { + return ts_language_lookup(self, state, symbol); + } +} + const char *ts_language_symbol_name( const TSLanguage *self, TSSymbol symbol @@ -135,3 +161,48 @@ TSFieldId ts_language_field_id_for_name( } return 0; } + +TSLookaheadIterator *ts_lookahead_iterator_new(const TSLanguage *self, TSStateId state) { + if (state >= self->state_count) return NULL; + LookaheadIterator *iterator = ts_malloc(sizeof(LookaheadIterator)); + *iterator = ts_language_lookaheads(self, state); + return (TSLookaheadIterator *)iterator; +} + +void ts_lookahead_iterator_delete(TSLookaheadIterator *self) { + ts_free(self); +} + +bool ts_lookahead_iterator_reset_state(TSLookaheadIterator * self, TSStateId state) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + if (state >= iterator->language->state_count) return false; + *iterator = ts_language_lookaheads(iterator->language, state); + return true; +} + +const TSLanguage * ts_lookahead_iterator_language(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->language; +} + +bool ts_lookahead_iterator_reset(TSLookaheadIterator *self, const TSLanguage *language, TSStateId state) { + if (state >= language->state_count) return false; + LookaheadIterator *iterator = (LookaheadIterator *)self; + *iterator = ts_language_lookaheads(language, state); + return true; +} + +bool ts_lookahead_iterator_advance(TSLookaheadIterator *self) { + LookaheadIterator *iterator = (LookaheadIterator *)self; + return ts_lookahead_iterator_next(iterator); +} + +TSSymbol ts_lookahead_iterator_current_symbol(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return iterator->symbol; +} + +const char *ts_lookahead_iterator_current_symbol_name(const TSLookaheadIterator *self) { + const LookaheadIterator *iterator = (const LookaheadIterator *)self; + return ts_language_symbol_name(iterator->language, iterator->symbol); +} diff --git a/lib/src/language.h b/lib/src/language.h index db61b602..002f564f 100644 --- a/lib/src/language.h +++ b/lib/src/language.h @@ -38,6 +38,8 @@ TSSymbolMetadata ts_language_symbol_metadata(const TSLanguage *, TSSymbol); TSSymbol ts_language_public_symbol(const TSLanguage *, TSSymbol); +TSStateId ts_language_next_state(const TSLanguage *self, TSStateId state, TSSymbol symbol); + static inline bool ts_language_is_symbol_external(const TSLanguage *self, TSSymbol symbol) { return 0 < symbol && symbol < self->external_token_count + 1; } @@ -178,28 +180,6 @@ static inline bool ts_lookahead_iterator_next(LookaheadIterator *self) { return true; } -static inline TSStateId ts_language_next_state( - const TSLanguage *self, - TSStateId state, - TSSymbol symbol -) { - if (symbol == ts_builtin_sym_error || symbol == ts_builtin_sym_error_repeat) { - return 0; - } else if (symbol < self->token_count) { - uint32_t count; - const TSParseAction *actions = ts_language_actions(self, state, symbol, &count); - if (count > 0) { - TSParseAction action = actions[count - 1]; - if (action.type == TSParseActionTypeShift) { - return action.shift.extra ? state : action.shift.state; - } - } - return 0; - } else { - return ts_language_lookup(self, state, symbol); - } -} - // Whether the state is a "primary state". If this returns false, it indicates that there exists // another state that behaves identically to this one with respect to query analysis. static inline bool ts_language_state_is_primary( diff --git a/lib/src/node.c b/lib/src/node.c index eafaa7bb..092e96f8 100644 --- a/lib/src/node.c +++ b/lib/src/node.c @@ -429,6 +429,15 @@ const TSLanguage *ts_node_language(TSNode self) { return self.tree->language; } +TSSymbol ts_node_grammar_symbol(TSNode self) { + return ts_subtree_symbol(ts_node__subtree(self)); +} + +const char *ts_node_grammar_type(TSNode self) { + TSSymbol symbol = ts_subtree_symbol(ts_node__subtree(self)); + return ts_language_symbol_name(self.tree->language, symbol); +} + char *ts_node_string(TSNode self) { return ts_subtree_string(ts_node__subtree(self), self.tree->language, false); } @@ -464,10 +473,27 @@ bool ts_node_has_error(TSNode self) { return ts_subtree_error_cost(ts_node__subtree(self)) > 0; } +bool ts_node_is_error(TSNode self) { + TSSymbol symbol = ts_node_symbol(self); + return symbol == ts_builtin_sym_error; +} + uint32_t ts_node_descendant_count(TSNode self) { return ts_subtree_visible_descendant_count(ts_node__subtree(self)) + 1; } +TSStateId ts_node_parse_state(TSNode self) { + return ts_subtree_parse_state(ts_node__subtree(self)); +} + +TSStateId ts_node_next_parse_state(TSNode self) { + const TSLanguage *language = self.tree->language; + uint16_t state = ts_node_parse_state(self); + uint16_t symbol = ts_node_grammar_symbol(self); + + return ts_language_next_state(language, state, symbol); +} + TSNode ts_node_parent(TSNode self) { TSNode node = ts_tree_root_node(self.tree); uint32_t end_byte = ts_node_end_byte(self); diff --git a/lib/src/tree_cursor.c b/lib/src/tree_cursor.c index 97a53152..25eca482 100644 --- a/lib/src/tree_cursor.c +++ b/lib/src/tree_cursor.c @@ -97,6 +97,57 @@ static inline bool ts_tree_cursor_child_iterator_next( return true; } +// Return a position that, when `b` is added to it, yields `a`. This +// can only be computed if `b` has zero rows. Otherwise, this function +// returns `LENGTH_UNDEFINED`, and the caller needs to recompute +// the position some other way. +static inline Length length_backtrack(Length a, Length b) { + if (length_is_undefined(a) || b.extent.row != 0) { + return LENGTH_UNDEFINED; + } + + Length result; + result.bytes = a.bytes - b.bytes; + result.extent.row = a.extent.row; + result.extent.column = a.extent.column - b.extent.column; + return result; +} + +static inline bool ts_tree_cursor_child_iterator_previous( + CursorChildIterator *self, + TreeCursorEntry *result, + bool *visible +) { + // this is mostly a reverse `ts_tree_cursor_child_iterator_next` taking into + // account unsigned underflow + if (!self->parent.ptr || (int8_t)self->child_index == -1) return false; + const Subtree *child = &ts_subtree_children(self->parent)[self->child_index]; + *result = (TreeCursorEntry) { + .subtree = child, + .position = self->position, + .child_index = self->child_index, + .structural_child_index = self->structural_child_index, + }; + *visible = ts_subtree_visible(*child); + bool extra = ts_subtree_extra(*child); + if (!extra && self->alias_sequence) { + *visible |= self->alias_sequence[self->structural_child_index]; + self->structural_child_index--; + } + + self->position = length_backtrack(self->position, ts_subtree_padding(*child)); + self->child_index--; + + // unsigned can underflow so compare it to child_count + if (self->child_index < self->parent.ptr->child_count) { + Subtree previous_child = ts_subtree_children(self->parent)[self->child_index]; + Length size = ts_subtree_size(previous_child); + self->position = length_backtrack(self->position, size); + } + + return true; +} + // TSTreeCursor - lifecycle TSTreeCursor ts_tree_cursor_new(TSNode node) { @@ -163,6 +214,47 @@ bool ts_tree_cursor_goto_first_child(TSTreeCursor *self) { return false; } +TreeCursorStep ts_tree_cursor_goto_last_child_internal(TSTreeCursor *_self) { + TreeCursor *self = (TreeCursor *)_self; + bool visible; + TreeCursorEntry entry; + CursorChildIterator iterator = ts_tree_cursor_iterate_children(self); + if (!iterator.parent.ptr || iterator.parent.ptr->child_count == 0) return TreeCursorStepNone; + + TreeCursorEntry last_entry; + TreeCursorStep last_step = TreeCursorStepNone; + while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + if (visible) { + last_entry = entry; + last_step = TreeCursorStepVisible; + } + else if (ts_subtree_visible_child_count(*entry.subtree) > 0) { + last_entry = entry; + last_step = TreeCursorStepHidden; + } + } + if (last_entry.subtree) { + array_push(&self->stack, last_entry); + return last_step; + } + + return TreeCursorStepNone; +} + +bool ts_tree_cursor_goto_last_child(TSTreeCursor *self) { + for (;;) { + switch (ts_tree_cursor_goto_last_child_internal(self)) { + case TreeCursorStepHidden: + continue; + case TreeCursorStepVisible: + return true; + default: + return false; + } + } + return false; +} + static inline int64_t ts_tree_cursor_goto_first_child_for_byte_and_point( TSTreeCursor *_self, uint32_t goal_byte, @@ -213,7 +305,9 @@ int64_t ts_tree_cursor_goto_first_child_for_point(TSTreeCursor *self, TSPoint go return ts_tree_cursor_goto_first_child_for_byte_and_point(self, 0, goal_point); } -TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { +TreeCursorStep ts_tree_cursor_goto_sibling_internal( + TSTreeCursor *_self, + bool (*advance)(CursorChildIterator *, TreeCursorEntry *, bool *)) { TreeCursor *self = (TreeCursor *)_self; uint32_t initial_size = self->stack.size; @@ -226,10 +320,10 @@ TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { iterator.descendant_index = entry.descendant_index; bool visible = false; - ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible); + advance(&iterator, &entry, &visible); if (visible && self->stack.size + 1 < initial_size) break; - while (ts_tree_cursor_child_iterator_next(&iterator, &entry, &visible)) { + while (advance(&iterator, &entry, &visible)) { if (visible) { array_push(&self->stack, entry); return TreeCursorStepVisible; @@ -246,6 +340,10 @@ TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { return TreeCursorStepNone; } +TreeCursorStep ts_tree_cursor_goto_next_sibling_internal(TSTreeCursor *_self) { + return ts_tree_cursor_goto_sibling_internal(_self, ts_tree_cursor_child_iterator_next); +} + bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { switch (ts_tree_cursor_goto_next_sibling_internal(self)) { case TreeCursorStepHidden: @@ -258,6 +356,50 @@ bool ts_tree_cursor_goto_next_sibling(TSTreeCursor *self) { } } +TreeCursorStep ts_tree_cursor_goto_previous_sibling_internal(TSTreeCursor *_self) { + // since subtracting across row loses column information, we may have to + // restore it + TreeCursor *self = (TreeCursor *)_self; + + // for that, save current position before traversing + Length position = array_back(&self->stack)->position; + TreeCursorStep step = ts_tree_cursor_goto_sibling_internal( + _self, ts_tree_cursor_child_iterator_previous); + if (step == TreeCursorStepNone) + return step; + + // if length is already valid, there's no need to recompute it + if (!length_is_undefined(array_back(&self->stack)->position)) + return step; + + // restore position from the parent node + const TreeCursorEntry *parent = &self->stack.contents[self->stack.size - 2]; + position = parent->position; + uint32_t child_index = array_back(&self->stack)->child_index; + const Subtree *children = ts_subtree_children((*(parent->subtree))); + for (uint32_t i = 0; i < child_index; ++i) { + position = length_add(position, ts_subtree_total_size(children[i])); + } + if (child_index > 0) + position = length_add(position, ts_subtree_padding(children[child_index])); + + array_back(&self->stack)->position = position; + + return step; +} + +bool ts_tree_cursor_goto_previous_sibling(TSTreeCursor *self) { + switch (ts_tree_cursor_goto_previous_sibling_internal(self)) { + case TreeCursorStepHidden: + ts_tree_cursor_goto_last_child(self); + return true; + case TreeCursorStepVisible: + return true; + default: + return false; + } +} + bool ts_tree_cursor_goto_parent(TSTreeCursor *_self) { TreeCursor *self = (TreeCursor *)_self; for (unsigned i = self->stack.size - 2; i + 1 > 0; i--) { @@ -556,3 +698,11 @@ TSTreeCursor ts_tree_cursor_copy(const TSTreeCursor *_cursor) { array_push_all(©->stack, &cursor->stack); return res; } + +void ts_tree_cursor_reset_to(TSTreeCursor *_dst, const TSTreeCursor *_src) { + const TreeCursor *cursor = (const TreeCursor *)_src; + TreeCursor *copy = (TreeCursor *)_dst; + copy->tree = cursor->tree; + array_clear(©->stack); + array_push_all(©->stack, &cursor->stack); +} diff --git a/script/generate-bindings b/script/generate-bindings index da1796ba..fb47e247 100755 --- a/script/generate-bindings +++ b/script/generate-bindings @@ -6,6 +6,7 @@ no_derive_copy=( TSInput TSLanguage TSLogger + TSLookaheadIterator TSParser TSTree TSQuery