[ty] Add infrastructure for AST garbage collection (#18445)

## Summary

https://github.com/astral-sh/ty/issues/214 will require a couple
invasive changes that I would like to get merged even before garbage
collection is fully implemented (to avoid rebasing):
- `ParsedModule` can no longer be dereferenced directly. Instead you
need to load a `ParsedModuleRef` to access the AST, which requires a
reference to the salsa database (as it may require re-parsing the AST if
it was collected).
- `AstNodeRef` can only be dereferenced with the `node` method, which
takes a reference to the `ParsedModuleRef`. This allows us to encode the
fact that ASTs do not live as long as the database and may be collected
as soon a given instance of a `ParsedModuleRef` is dropped. There are a
number of places where we currently merge the `'db` and `'ast`
lifetimes, so this requires giving some types/functions two separate
lifetime parameters.
This commit is contained in:
Ibraheem Ahmed 2025-06-05 11:43:18 -04:00 committed by GitHub
parent 55100209c7
commit 8531f4b3ca
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
33 changed files with 886 additions and 689 deletions

View file

@ -1,15 +1,14 @@
use std::hash::Hash;
use std::ops::Deref;
use std::sync::Arc;
use ruff_db::parsed::ParsedModule;
use ruff_db::parsed::ParsedModuleRef;
/// Ref-counted owned reference to an AST node.
///
/// The type holds an owned reference to the node's ref-counted [`ParsedModule`].
/// Holding on to the node's [`ParsedModule`] guarantees that the reference to the
/// The type holds an owned reference to the node's ref-counted [`ParsedModuleRef`].
/// Holding on to the node's [`ParsedModuleRef`] guarantees that the reference to the
/// node must still be valid.
///
/// Holding on to any [`AstNodeRef`] prevents the [`ParsedModule`] from being released.
/// Holding on to any [`AstNodeRef`] prevents the [`ParsedModuleRef`] from being released.
///
/// ## Equality
/// Two `AstNodeRef` are considered equal if their pointer addresses are equal.
@ -33,11 +32,11 @@ use ruff_db::parsed::ParsedModule;
/// run on every AST change. All other queries only run when the expression's identity changes.
#[derive(Clone)]
pub struct AstNodeRef<T> {
/// Owned reference to the node's [`ParsedModule`].
/// Owned reference to the node's [`ParsedModuleRef`].
///
/// The node's reference is guaranteed to remain valid as long as it's enclosing
/// [`ParsedModule`] is alive.
parsed: ParsedModule,
/// [`ParsedModuleRef`] is alive.
parsed: ParsedModuleRef,
/// Pointer to the referenced node.
node: std::ptr::NonNull<T>,
@ -45,15 +44,15 @@ pub struct AstNodeRef<T> {
#[expect(unsafe_code)]
impl<T> AstNodeRef<T> {
/// Creates a new `AstNodeRef` that references `node`. The `parsed` is the [`ParsedModule`] to
/// Creates a new `AstNodeRef` that references `node`. The `parsed` is the [`ParsedModuleRef`] to
/// which the `AstNodeRef` belongs.
///
/// ## Safety
///
/// Dereferencing the `node` can result in undefined behavior if `parsed` isn't the
/// [`ParsedModule`] to which `node` belongs. It's the caller's responsibility to ensure that
/// [`ParsedModuleRef`] to which `node` belongs. It's the caller's responsibility to ensure that
/// the invariant `node belongs to parsed` is upheld.
pub(super) unsafe fn new(parsed: ParsedModule, node: &T) -> Self {
pub(super) unsafe fn new(parsed: ParsedModuleRef, node: &T) -> Self {
Self {
parsed,
node: std::ptr::NonNull::from(node),
@ -61,54 +60,26 @@ impl<T> AstNodeRef<T> {
}
/// Returns a reference to the wrapped node.
pub const fn node(&self) -> &T {
///
/// Note that this method will panic if the provided module is from a different file or Salsa revision
/// than the module this node was created with.
pub fn node<'ast>(&self, parsed: &'ast ParsedModuleRef) -> &'ast T {
debug_assert!(Arc::ptr_eq(self.parsed.as_arc(), parsed.as_arc()));
// SAFETY: Holding on to `parsed` ensures that the AST to which `node` belongs is still
// alive and not moved.
unsafe { self.node.as_ref() }
}
}
impl<T> Deref for AstNodeRef<T> {
type Target = T;
fn deref(&self) -> &Self::Target {
self.node()
}
}
impl<T> std::fmt::Debug for AstNodeRef<T>
where
T: std::fmt::Debug,
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("AstNodeRef").field(&self.node()).finish()
}
}
impl<T> PartialEq for AstNodeRef<T>
where
T: PartialEq,
{
fn eq(&self, other: &Self) -> bool {
if self.parsed == other.parsed {
// Comparing the pointer addresses is sufficient to determine equality
// if the parsed are the same.
self.node.eq(&other.node)
} else {
// Otherwise perform a deep comparison.
self.node().eq(other.node())
}
}
}
impl<T> Eq for AstNodeRef<T> where T: Eq {}
impl<T> Hash for AstNodeRef<T>
where
T: Hash,
{
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.node().hash(state);
f.debug_tuple("AstNodeRef")
.field(self.node(&self.parsed))
.finish()
}
}
@ -117,7 +88,9 @@ unsafe impl<T> salsa::Update for AstNodeRef<T> {
unsafe fn maybe_update(old_pointer: *mut Self, new_value: Self) -> bool {
let old_ref = unsafe { &mut (*old_pointer) };
if old_ref.parsed == new_value.parsed && old_ref.node.eq(&new_value.node) {
if Arc::ptr_eq(old_ref.parsed.as_arc(), new_value.parsed.as_arc())
&& old_ref.node.eq(&new_value.node)
{
false
} else {
*old_ref = new_value;
@ -130,73 +103,3 @@ unsafe impl<T> salsa::Update for AstNodeRef<T> {
unsafe impl<T> Send for AstNodeRef<T> where T: Send {}
#[expect(unsafe_code)]
unsafe impl<T> Sync for AstNodeRef<T> where T: Sync {}
#[cfg(test)]
mod tests {
use crate::ast_node_ref::AstNodeRef;
use ruff_db::parsed::ParsedModule;
use ruff_python_ast::PySourceType;
use ruff_python_parser::parse_unchecked_source;
#[test]
#[expect(unsafe_code)]
fn equality() {
let parsed_raw = parse_unchecked_source("1 + 2", PySourceType::Python);
let parsed = ParsedModule::new(parsed_raw.clone());
let stmt = &parsed.syntax().body[0];
let node1 = unsafe { AstNodeRef::new(parsed.clone(), stmt) };
let node2 = unsafe { AstNodeRef::new(parsed.clone(), stmt) };
assert_eq!(node1, node2);
// Compare from different trees
let cloned = ParsedModule::new(parsed_raw);
let stmt_cloned = &cloned.syntax().body[0];
let cloned_node = unsafe { AstNodeRef::new(cloned.clone(), stmt_cloned) };
assert_eq!(node1, cloned_node);
let other_raw = parse_unchecked_source("2 + 2", PySourceType::Python);
let other = ParsedModule::new(other_raw);
let other_stmt = &other.syntax().body[0];
let other_node = unsafe { AstNodeRef::new(other.clone(), other_stmt) };
assert_ne!(node1, other_node);
}
#[expect(unsafe_code)]
#[test]
fn inequality() {
let parsed_raw = parse_unchecked_source("1 + 2", PySourceType::Python);
let parsed = ParsedModule::new(parsed_raw);
let stmt = &parsed.syntax().body[0];
let node = unsafe { AstNodeRef::new(parsed.clone(), stmt) };
let other_raw = parse_unchecked_source("2 + 2", PySourceType::Python);
let other = ParsedModule::new(other_raw);
let other_stmt = &other.syntax().body[0];
let other_node = unsafe { AstNodeRef::new(other.clone(), other_stmt) };
assert_ne!(node, other_node);
}
#[test]
#[expect(unsafe_code)]
fn debug() {
let parsed_raw = parse_unchecked_source("1 + 2", PySourceType::Python);
let parsed = ParsedModule::new(parsed_raw);
let stmt = &parsed.syntax().body[0];
let stmt_node = unsafe { AstNodeRef::new(parsed.clone(), stmt) };
let debug = format!("{stmt_node:?}");
assert_eq!(debug, format!("AstNodeRef({stmt:?})"));
}
}