diff --git a/docs/src/6-contributing.md b/docs/src/6-contributing.md index 862580db..001d6337 100644 --- a/docs/src/6-contributing.md +++ b/docs/src/6-contributing.md @@ -108,6 +108,13 @@ Additionally, if you want to run a particular _example_ from the corpus, you can cargo xtask test -l javascript -e Arrays ``` +If you are using `lldb` to debug the C library, tree-sitter provides custom pretty printers for several of its types. +You can enable these helpers by importing them: + +```sh +(lldb) command script import /path/to/tree-sitter/lib/lldb_pretty_printers/tree_sitter_types.py +``` + ## Published Packages The main [`tree-sitter/tree-sitter`][ts repo] repository contains the source code for diff --git a/lib/lldb_pretty_printers/table_entry.py b/lib/lldb_pretty_printers/table_entry.py new file mode 100644 index 00000000..f46b9d43 --- /dev/null +++ b/lib/lldb_pretty_printers/table_entry.py @@ -0,0 +1,60 @@ +from lldb import SBValue + +# typedef struct { +# const TSParseAction *actions; +# uint32_t action_count; +# bool is_reusable; +# } TableEntry; + +# TODO: Same inline issue as with `TSTreeSyntheticProvider`. + + +class TableEntrySyntheticProvider: + def __init__(self, valobj: SBValue, _dict): + self.valobj: SBValue = valobj + self.update() + + def num_children(self) -> int: + # is_reusable, action_count, actions + return 2 + max(1, self.action_count.GetValueAsUnsigned()) + + def get_child_index(self, name: str) -> int: + if name == "is_reusable": + return 0 + elif name == "action_count": + return 1 + else: + if self.action_count.GetValueAsUnsigned() == 0: + return 2 + index = name.lstrip("actions[").rstrip("]") + if index.isdigit(): + return int(index) + else: + return -1 + + def get_child_at_index(self, index: int) -> SBValue: + if index == 0: + return self.is_reusable + elif index == 1: + return self.action_count + else: + if self.action_count.GetValueAsUnsigned() == 0: + return self.actions + offset: int = index - 3 + start: int = self.actions.GetValueAsUnsigned() + address: int = start + offset * self.element_type_size + element: SBValue = self.actions.CreateValueFromAddress( + "action[%s]" % (offset), address, self.element_type + ) + return element + + def update(self): + self.is_reusable: SBValue = self.valobj.GetChildMemberWithName("is_reusable") + self.action_count: SBValue = self.valobj.GetChildMemberWithName("action_count") + self.actions: SBValue = self.valobj.GetChildMemberWithName("actions") + + self.element_type: SBType = self.actions.GetType().GetPointeeType() + self.element_type_size: int = self.element_type.GetByteSize() + + def has_children(self) -> bool: + return True diff --git a/lib/lldb_pretty_printers/tree_sitter_types.py b/lib/lldb_pretty_printers/tree_sitter_types.py new file mode 100644 index 00000000..b26c67ac --- /dev/null +++ b/lib/lldb_pretty_printers/tree_sitter_types.py @@ -0,0 +1,64 @@ +import lldb + +# Even though these are "unused", we still need them in scope in order for the classes +# to exist when we register them with the debugger +from ts_tree import TSTreeSyntheticProvider +from table_entry import TableEntrySyntheticProvider +from ts_array import ArraySyntheticProvider, anon_array_recognizer + + +class TreeSitterType(object): + TS_TREE: str = "TSTree" + SUBTREE_ARRAY: str = "SubtreeArray" + MUTABLE_SUBTREE_ARRAY: str = "MutableSubtreeArray" + STACK_SLICE_ARRAY: str = "StackSliceArray" + STACK_SUMMARY: str = "StackSummary" + STACK_ENTRY: str = "StackEntry" + REUSABLE_NODE: str = "ReusableNode" + REDUCE_ACTION_SET: str = "ReduceActionSet" + TABLE_ENTRY: str = "TableEntry" + TS_RANGE_ARRAY: str = "TSRangeArray" + CAPTURE_QUANTIFIERS: str = "CaptureQuantifiers" + CAPTURE_LIST: str = "CaptureList" + ANALYSIS_STATE_SET: str = "AnalysisStateSet" + ANALYSIS_SUBGRAPH_ARRAY: str = "AnalysisSubgraphArray" + STACK_NODE_ARRAY: str = "StackNodeArray" + STRING_DATA: str = "StringData" + + +def ts_type_to_regex(type: str) -> str: + return f"^{type}$|^struct {type}$|^typedef {type}$" + + +# Holds all tree-sitter types defined via the `Array` macro. These types will +# all share the same `ArrayTypeSyntheticProvider` synthetic provider +TS_ARRAY_TYPES = [ + TreeSitterType.REDUCE_ACTION_SET, + TreeSitterType.TS_RANGE_ARRAY, + TreeSitterType.CAPTURE_QUANTIFIERS, + TreeSitterType.ANALYSIS_STATE_SET, + TreeSitterType.CAPTURE_LIST, + TreeSitterType.ANALYSIS_SUBGRAPH_ARRAY, + TreeSitterType.STACK_SLICE_ARRAY, + TreeSitterType.STACK_SUMMARY, + TreeSitterType.SUBTREE_ARRAY, + TreeSitterType.MUTABLE_SUBTREE_ARRAY, + TreeSitterType.STRING_DATA, + TreeSitterType.STACK_NODE_ARRAY, +] + + +def __lldb_init_module(debugger: lldb.SBDebugger, _dict): + debugger.HandleCommand( + f"type synthetic add -l tree_sitter_types.TSTreeSyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TS_TREE)}'" + ) + debugger.HandleCommand( + f"type synthetic add -l tree_sitter_types.TableEntrySyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TABLE_ENTRY)}'" + ) + debugger.HandleCommand( + f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider --recognizer-function tree_sitter_types.anon_array_recognizer" + ) + for type in TS_ARRAY_TYPES: + debugger.HandleCommand( + f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider -x '{ts_type_to_regex(type)}'" + ) diff --git a/lib/lldb_pretty_printers/ts_array.py b/lib/lldb_pretty_printers/ts_array.py new file mode 100644 index 00000000..f51cd0a9 --- /dev/null +++ b/lib/lldb_pretty_printers/ts_array.py @@ -0,0 +1,78 @@ +from lldb import SBValue, SBType +import re + +# define Array(T) \ +# struct { \ +# T *contents; \ +# uint32_t size; \ +# uint32_t capacity; \ +# } + + +class ArraySyntheticProvider: + def __init__(self, valobj: SBValue, _dict): + self.valobj: SBValue = valobj + self.update() + + def num_children(self) -> int: + return 2 + self.size.GetValueAsUnsigned() # size, capacity, and elements + + def get_child_index(self, name: str) -> int: + if name == "size": + return 0 + elif name == "capacity": + return 1 + else: + if self.size.GetValueAsUnsigned() == 0: + return 2 + index = name.lstrip("[").rstrip("]") + if index.isdigit(): + return int(index) + else: + return -1 + + def get_child_at_index(self, index: int) -> SBValue: + if index == 0: + return self.size + elif index == 1: + return self.capacity + else: + if self.size.GetValueAsUnsigned() == 0: + return self.contents + offset: int = index - 2 + start: int = self.contents.GetValueAsUnsigned() + address: int = start + offset * self.element_type_size + element: SBValue = self.contents.CreateValueFromAddress( + "[%s]" % (offset), address, self.element_type + ) + return element + + def update(self): + self.contents: SBValue = self.valobj.GetChildMemberWithName("contents") + self.size: SBValue = self.valobj.GetChildMemberWithName("size") + self.capacity: SBValue = self.valobj.GetChildMemberWithName("capacity") + + self.element_type: SBType = self.contents.GetType().GetPointeeType() + self.element_type_size: int = self.element_type.GetByteSize() + + def has_children(self) -> bool: + return True + + +anon_re = re.compile( + r"struct\s*{$\s*\w+ \*contents;$\s*uint32_t size;$\s*uint32_t capacity;$\s*}", + re.MULTILINE, +) + + +# Used to recognize "anonymous" `Array(T)` types, i.e.: +# struct Foo { +# Array(Bar) bars; // Render this field usign `ArraySyntheticProvider` +# }; +def anon_array_recognizer(valobj: SBType, _dict) -> bool: + type_name = valobj.GetName() + if type_name == "(unnamed struct)": + type_str = str(valobj) + return anon_re.search(type_str) is not None + else: + return False diff --git a/lib/lldb_pretty_printers/ts_tree.py b/lib/lldb_pretty_printers/ts_tree.py new file mode 100644 index 00000000..1f690ebd --- /dev/null +++ b/lib/lldb_pretty_printers/ts_tree.py @@ -0,0 +1,101 @@ +from lldb import SBType, SBValue + +# struct TSTree { +# Subtree root; +# const TSLanguage *language; +# TSRange *included_ranges; +# unsigned included_range_count; +# }; + +# TODO: Ideally, we'd display the elements of `included_ranges` as +# children of `included_ranges` rather than separate items, i.e.: + +# (TSTree) { +# root = ... +# language = ... +# included_range_count = ... +# included_ranges = { +# [0] = { +# ... +# } +# [1] = { +# ... +# } +# ... +# } +# } +# +# instead of the current behavior: +# +# (TSTree) { +# root = ... +# language = ... +# included_range_count = ... +# included_ranges[0] = { +# ... +# } +# included_ranges[1] = { +# ... +# } +# } +# + + +class TSTreeSyntheticProvider: + def __init__(self, valobj: SBValue, _dict): + self.valobj: SBValue = valobj + self.update() + + def num_children(self) -> int: + # root, language, included_range_count, included_ranges + return 3 + self.included_range_count.GetValueAsUnsigned() + + def get_child_index(self, name: str) -> int: + if name == "root": + return 0 + elif name == "language": + return 1 + elif name == "included_range_count": + return 2 + else: + if self.included_range_count.GetValueAsUnsigned() == 0: + return 3 + index = name.lstrip("included_ranges[").rstrip("]") + if index.isdigit(): + return int(index) + else: + return -1 + + def get_child_at_index(self, index: int) -> SBValue: + if index == 0: + return self.root + elif index == 1: + return self.language + elif index == 2: + return self.included_range_count + else: + if self.included_range_count.GetValueAsUnsigned() == 0: + return self.included_ranges + offset: int = index - 3 + start: int = self.included_ranges.GetValueAsUnsigned() + address: int = start + offset * self.element_type_size + element: SBValue = self.included_ranges.CreateValueFromAddress( + "included_ranges[%s]" % (offset), address, self.element_type + ) + return element + + def update(self): + self.root: SBValue = self.valobj.GetChildMemberWithName("root") + self.language: SBValue = self.valobj.GetChildMemberWithName("language") + self.included_range_count: SBValue = self.valobj.GetChildMemberWithName( + "included_range_count" + ) + self.included_ranges: SBValue = self.valobj.GetChildMemberWithName( + "included_ranges" + ) + + self.element_type: SBType = self.included_ranges.GetType().GetPointeeType() + self.element_type_size: int = self.element_type.GetByteSize() + + def has_children(self) -> bool: + return True