feat(lib): add lldb pretty-printers for some C lib types

This commit is contained in:
Will Lillis 2025-08-13 18:04:53 -04:00
parent be888a5fef
commit fee50ad0ce
5 changed files with 310 additions and 0 deletions

View file

@ -108,6 +108,13 @@ Additionally, if you want to run a particular _example_ from the corpus, you can
cargo xtask test -l javascript -e Arrays
```
If you are using `lldb` to debug the C library, tree-sitter provides custom pretty printers for several of its types.
You can enable these helpers by importing them:
```sh
(lldb) command script import /path/to/tree-sitter/lib/lldb_pretty_printers/tree_sitter_types.py
```
## Published Packages
The main [`tree-sitter/tree-sitter`][ts repo] repository contains the source code for

View file

@ -0,0 +1,60 @@
from lldb import SBValue
# typedef struct {
# const TSParseAction *actions;
# uint32_t action_count;
# bool is_reusable;
# } TableEntry;
# TODO: Same inline issue as with `TSTreeSyntheticProvider`.
class TableEntrySyntheticProvider:
def __init__(self, valobj: SBValue, _dict):
self.valobj: SBValue = valobj
self.update()
def num_children(self) -> int:
# is_reusable, action_count, actions
return 2 + max(1, self.action_count.GetValueAsUnsigned())
def get_child_index(self, name: str) -> int:
if name == "is_reusable":
return 0
elif name == "action_count":
return 1
else:
if self.action_count.GetValueAsUnsigned() == 0:
return 2
index = name.lstrip("actions[").rstrip("]")
if index.isdigit():
return int(index)
else:
return -1
def get_child_at_index(self, index: int) -> SBValue:
if index == 0:
return self.is_reusable
elif index == 1:
return self.action_count
else:
if self.action_count.GetValueAsUnsigned() == 0:
return self.actions
offset: int = index - 3
start: int = self.actions.GetValueAsUnsigned()
address: int = start + offset * self.element_type_size
element: SBValue = self.actions.CreateValueFromAddress(
"action[%s]" % (offset), address, self.element_type
)
return element
def update(self):
self.is_reusable: SBValue = self.valobj.GetChildMemberWithName("is_reusable")
self.action_count: SBValue = self.valobj.GetChildMemberWithName("action_count")
self.actions: SBValue = self.valobj.GetChildMemberWithName("actions")
self.element_type: SBType = self.actions.GetType().GetPointeeType()
self.element_type_size: int = self.element_type.GetByteSize()
def has_children(self) -> bool:
return True

View file

@ -0,0 +1,64 @@
import lldb
# Even though these are "unused", we still need them in scope in order for the classes
# to exist when we register them with the debugger
from ts_tree import TSTreeSyntheticProvider
from table_entry import TableEntrySyntheticProvider
from ts_array import ArraySyntheticProvider, anon_array_recognizer
class TreeSitterType(object):
TS_TREE: str = "TSTree"
SUBTREE_ARRAY: str = "SubtreeArray"
MUTABLE_SUBTREE_ARRAY: str = "MutableSubtreeArray"
STACK_SLICE_ARRAY: str = "StackSliceArray"
STACK_SUMMARY: str = "StackSummary"
STACK_ENTRY: str = "StackEntry"
REUSABLE_NODE: str = "ReusableNode"
REDUCE_ACTION_SET: str = "ReduceActionSet"
TABLE_ENTRY: str = "TableEntry"
TS_RANGE_ARRAY: str = "TSRangeArray"
CAPTURE_QUANTIFIERS: str = "CaptureQuantifiers"
CAPTURE_LIST: str = "CaptureList"
ANALYSIS_STATE_SET: str = "AnalysisStateSet"
ANALYSIS_SUBGRAPH_ARRAY: str = "AnalysisSubgraphArray"
STACK_NODE_ARRAY: str = "StackNodeArray"
STRING_DATA: str = "StringData"
def ts_type_to_regex(type: str) -> str:
return f"^{type}$|^struct {type}$|^typedef {type}$"
# Holds all tree-sitter types defined via the `Array` macro. These types will
# all share the same `ArrayTypeSyntheticProvider` synthetic provider
TS_ARRAY_TYPES = [
TreeSitterType.REDUCE_ACTION_SET,
TreeSitterType.TS_RANGE_ARRAY,
TreeSitterType.CAPTURE_QUANTIFIERS,
TreeSitterType.ANALYSIS_STATE_SET,
TreeSitterType.CAPTURE_LIST,
TreeSitterType.ANALYSIS_SUBGRAPH_ARRAY,
TreeSitterType.STACK_SLICE_ARRAY,
TreeSitterType.STACK_SUMMARY,
TreeSitterType.SUBTREE_ARRAY,
TreeSitterType.MUTABLE_SUBTREE_ARRAY,
TreeSitterType.STRING_DATA,
TreeSitterType.STACK_NODE_ARRAY,
]
def __lldb_init_module(debugger: lldb.SBDebugger, _dict):
debugger.HandleCommand(
f"type synthetic add -l tree_sitter_types.TSTreeSyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TS_TREE)}'"
)
debugger.HandleCommand(
f"type synthetic add -l tree_sitter_types.TableEntrySyntheticProvider -x '{ts_type_to_regex(TreeSitterType.TABLE_ENTRY)}'"
)
debugger.HandleCommand(
f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider --recognizer-function tree_sitter_types.anon_array_recognizer"
)
for type in TS_ARRAY_TYPES:
debugger.HandleCommand(
f"type synthetic add -l tree_sitter_types.ArraySyntheticProvider -x '{ts_type_to_regex(type)}'"
)

View file

@ -0,0 +1,78 @@
from lldb import SBValue, SBType
import re
# define Array(T) \
# struct { \
# T *contents; \
# uint32_t size; \
# uint32_t capacity; \
# }
class ArraySyntheticProvider:
def __init__(self, valobj: SBValue, _dict):
self.valobj: SBValue = valobj
self.update()
def num_children(self) -> int:
return 2 + self.size.GetValueAsUnsigned() # size, capacity, and elements
def get_child_index(self, name: str) -> int:
if name == "size":
return 0
elif name == "capacity":
return 1
else:
if self.size.GetValueAsUnsigned() == 0:
return 2
index = name.lstrip("[").rstrip("]")
if index.isdigit():
return int(index)
else:
return -1
def get_child_at_index(self, index: int) -> SBValue:
if index == 0:
return self.size
elif index == 1:
return self.capacity
else:
if self.size.GetValueAsUnsigned() == 0:
return self.contents
offset: int = index - 2
start: int = self.contents.GetValueAsUnsigned()
address: int = start + offset * self.element_type_size
element: SBValue = self.contents.CreateValueFromAddress(
"[%s]" % (offset), address, self.element_type
)
return element
def update(self):
self.contents: SBValue = self.valobj.GetChildMemberWithName("contents")
self.size: SBValue = self.valobj.GetChildMemberWithName("size")
self.capacity: SBValue = self.valobj.GetChildMemberWithName("capacity")
self.element_type: SBType = self.contents.GetType().GetPointeeType()
self.element_type_size: int = self.element_type.GetByteSize()
def has_children(self) -> bool:
return True
anon_re = re.compile(
r"struct\s*{$\s*\w+ \*contents;$\s*uint32_t size;$\s*uint32_t capacity;$\s*}",
re.MULTILINE,
)
# Used to recognize "anonymous" `Array(T)` types, i.e.:
# struct Foo {
# Array(Bar) bars; // Render this field usign `ArraySyntheticProvider`
# };
def anon_array_recognizer(valobj: SBType, _dict) -> bool:
type_name = valobj.GetName()
if type_name == "(unnamed struct)":
type_str = str(valobj)
return anon_re.search(type_str) is not None
else:
return False

View file

@ -0,0 +1,101 @@
from lldb import SBType, SBValue
# struct TSTree {
# Subtree root;
# const TSLanguage *language;
# TSRange *included_ranges;
# unsigned included_range_count;
# };
# TODO: Ideally, we'd display the elements of `included_ranges` as
# children of `included_ranges` rather than separate items, i.e.:
# (TSTree) {
# root = ...
# language = ...
# included_range_count = ...
# included_ranges = {
# [0] = {
# ...
# }
# [1] = {
# ...
# }
# ...
# }
# }
#
# instead of the current behavior:
#
# (TSTree) {
# root = ...
# language = ...
# included_range_count = ...
# included_ranges[0] = {
# ...
# }
# included_ranges[1] = {
# ...
# }
# }
#
class TSTreeSyntheticProvider:
def __init__(self, valobj: SBValue, _dict):
self.valobj: SBValue = valobj
self.update()
def num_children(self) -> int:
# root, language, included_range_count, included_ranges
return 3 + self.included_range_count.GetValueAsUnsigned()
def get_child_index(self, name: str) -> int:
if name == "root":
return 0
elif name == "language":
return 1
elif name == "included_range_count":
return 2
else:
if self.included_range_count.GetValueAsUnsigned() == 0:
return 3
index = name.lstrip("included_ranges[").rstrip("]")
if index.isdigit():
return int(index)
else:
return -1
def get_child_at_index(self, index: int) -> SBValue:
if index == 0:
return self.root
elif index == 1:
return self.language
elif index == 2:
return self.included_range_count
else:
if self.included_range_count.GetValueAsUnsigned() == 0:
return self.included_ranges
offset: int = index - 3
start: int = self.included_ranges.GetValueAsUnsigned()
address: int = start + offset * self.element_type_size
element: SBValue = self.included_ranges.CreateValueFromAddress(
"included_ranges[%s]" % (offset), address, self.element_type
)
return element
def update(self):
self.root: SBValue = self.valobj.GetChildMemberWithName("root")
self.language: SBValue = self.valobj.GetChildMemberWithName("language")
self.included_range_count: SBValue = self.valobj.GetChildMemberWithName(
"included_range_count"
)
self.included_ranges: SBValue = self.valobj.GetChildMemberWithName(
"included_ranges"
)
self.element_type: SBType = self.included_ranges.GetType().GetPointeeType()
self.element_type_size: int = self.element_type.GetByteSize()
def has_children(self) -> bool:
return True