[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -241,9 +241,8 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
) {
let children_start = self.scopes.next_index() + 1;
// SAFETY: `node` is guaranteed to be a child of `self.module`
#[expect(unsafe_code)]
let node_with_kind = unsafe { node.to_kind(self.module.clone()) };
// Note `node` is guaranteed to be a child of `self.module`
let node_with_kind = node.to_kind(self.module);
let scope = Scope::new(
parent,
@ -473,9 +472,8 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
) -> (Definition<'db>, usize) {
let definition_node: DefinitionNodeRef<'ast, 'db> = definition_node.into();
#[expect(unsafe_code)]
// SAFETY: `definition_node` is guaranteed to be a child of `self.module`
let kind = unsafe { definition_node.into_owned(self.module.clone()) };
// Note `definition_node` is guaranteed to be a child of `self.module`
let kind = definition_node.into_owned(self.module);
let category = kind.category(self.source_type.is_stub(), self.module);
let is_reexported = kind.is_reexported();
@ -782,13 +780,8 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.db,
self.file,
self.current_scope(),
#[expect(unsafe_code)]
unsafe {
AstNodeRef::new(self.module.clone(), expression_node)
},
#[expect(unsafe_code)]
assigned_to
.map(|assigned_to| unsafe { AstNodeRef::new(self.module.clone(), assigned_to) }),
AstNodeRef::new(self.module, expression_node),
assigned_to.map(|assigned_to| AstNodeRef::new(self.module, assigned_to)),
expression_kind,
countme::Count::default(),
);
@ -810,6 +803,7 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
let (name, bound, default) = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar {
range: _,
node_index: _,
name,
bound,
default,
@ -989,11 +983,8 @@ impl<'db, 'ast> SemanticIndexBuilder<'db, 'ast> {
self.file,
value_file_scope,
self.current_scope(),
// SAFETY: `target` belongs to the `self.module` tree
#[expect(unsafe_code)]
unsafe {
AstNodeRef::new(self.module.clone(), target)
},
// Note `target` belongs to the `self.module` tree
AstNodeRef::new(self.module, target),
UnpackValue::new(unpackable.kind(), value),
countme::Count::default(),
));
@ -1103,6 +1094,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
body,
is_async: _,
range: _,
node_index: _,
} = function_def;
for decorator in decorator_list {
self.visit_decorator(decorator);
@ -1377,6 +1369,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
test,
msg,
range: _,
node_index: _,
}) => {
// We model an `assert test, msg` statement here. Conceptually, we can think of
// this as being equivalent to the following:
@ -1447,6 +1440,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
ast::Stmt::AugAssign(
aug_assign @ ast::StmtAugAssign {
range: _,
node_index: _,
target,
op,
value,
@ -1553,6 +1547,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
body,
orelse,
range: _,
node_index: _,
}) => {
self.visit_expr(test);
@ -1620,6 +1615,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
}) => {
for item @ ast::WithItem {
range: _,
node_index: _,
context_expr,
optional_vars,
} in items
@ -1643,6 +1639,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
ast::Stmt::For(
for_stmt @ ast::StmtFor {
range: _,
node_index: _,
is_async: _,
target,
iter,
@ -1680,6 +1677,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
subject,
cases,
range: _,
node_index: _,
}) => {
debug_assert_eq!(self.current_match_case, None);
@ -1767,6 +1765,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
finalbody,
is_star,
range: _,
node_index: _,
}) => {
self.record_ambiguous_visibility();
@ -1814,6 +1813,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
type_: handled_exceptions,
body: handler_body,
range: _,
node_index: _,
} = except_handler;
if let Some(handled_exceptions) = handled_exceptions {
@ -1892,7 +1892,11 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
// Everything in the current block after a terminal statement is unreachable.
self.mark_unreachable();
}
ast::Stmt::Global(ast::StmtGlobal { range: _, names }) => {
ast::Stmt::Global(ast::StmtGlobal {
range: _,
node_index: _,
names,
}) => {
for name in names {
let symbol_id = self.add_symbol(name.id.clone());
let symbol_table = self.current_place_table();
@ -1915,7 +1919,11 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
}
walk_stmt(self, stmt);
}
ast::Stmt::Delete(ast::StmtDelete { targets, range: _ }) => {
ast::Stmt::Delete(ast::StmtDelete {
targets,
range: _,
node_index: _,
}) => {
// We will check the target expressions and then delete them.
walk_stmt(self, stmt);
for target in targets {
@ -1926,7 +1934,11 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
}
}
}
ast::Stmt::Expr(ast::StmtExpr { value, range: _ }) if self.in_module_scope() => {
ast::Stmt::Expr(ast::StmtExpr {
value,
range: _,
node_index: _,
}) if self.in_module_scope() => {
if let Some(expr) = dunder_all_extend_argument(value) {
self.add_standalone_expression(expr);
}
@ -2186,6 +2198,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
ast::Expr::BoolOp(ast::ExprBoolOp {
values,
range: _,
node_index: _,
op,
}) => {
let pre_op = self.flow_snapshot();
@ -2273,6 +2286,7 @@ impl<'ast> Visitor<'ast> for SemanticIndexBuilder<'_, 'ast> {
if let ast::Pattern::MatchStar(ast::PatternMatchStar {
name: Some(name),
range: _,
node_index: _,
}) = pattern
{
let symbol = self.add_symbol(name.id().clone());
@ -2556,6 +2570,7 @@ fn dunder_all_extend_argument(value: &ast::Expr) -> Option<&ast::Expr> {
args,
keywords,
range: _,
node_index: _,
},
..
} = value.as_call_expr()?;

View file

@ -333,15 +333,14 @@ pub(crate) struct MatchPatternDefinitionNodeRef<'ast> {
}
impl<'db> DefinitionNodeRef<'_, 'db> {
#[expect(unsafe_code)]
pub(super) unsafe fn into_owned(self, parsed: ParsedModuleRef) -> DefinitionKind<'db> {
pub(super) fn into_owned(self, parsed: &ParsedModuleRef) -> DefinitionKind<'db> {
match self {
DefinitionNodeRef::Import(ImportDefinitionNodeRef {
node,
alias_index,
is_reexported,
}) => DefinitionKind::Import(ImportDefinitionKind {
node: unsafe { AstNodeRef::new(parsed, node) },
node: AstNodeRef::new(parsed, node),
alias_index,
is_reexported,
}),
@ -351,28 +350,28 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
alias_index,
is_reexported,
}) => DefinitionKind::ImportFrom(ImportFromDefinitionKind {
node: unsafe { AstNodeRef::new(parsed, node) },
node: AstNodeRef::new(parsed, node),
alias_index,
is_reexported,
}),
DefinitionNodeRef::ImportStar(star_import) => {
let StarImportDefinitionNodeRef { node, place_id } = star_import;
DefinitionKind::StarImport(StarImportDefinitionKind {
node: unsafe { AstNodeRef::new(parsed, node) },
node: AstNodeRef::new(parsed, node),
place_id,
})
}
DefinitionNodeRef::Function(function) => {
DefinitionKind::Function(unsafe { AstNodeRef::new(parsed, function) })
DefinitionKind::Function(AstNodeRef::new(parsed, function))
}
DefinitionNodeRef::Class(class) => {
DefinitionKind::Class(unsafe { AstNodeRef::new(parsed, class) })
DefinitionKind::Class(AstNodeRef::new(parsed, class))
}
DefinitionNodeRef::TypeAlias(type_alias) => {
DefinitionKind::TypeAlias(unsafe { AstNodeRef::new(parsed, type_alias) })
DefinitionKind::TypeAlias(AstNodeRef::new(parsed, type_alias))
}
DefinitionNodeRef::NamedExpression(named) => {
DefinitionKind::NamedExpression(unsafe { AstNodeRef::new(parsed, named) })
DefinitionKind::NamedExpression(AstNodeRef::new(parsed, named))
}
DefinitionNodeRef::Assignment(AssignmentDefinitionNodeRef {
unpack,
@ -380,8 +379,8 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
target,
}) => DefinitionKind::Assignment(AssignmentDefinitionKind {
target_kind: TargetKind::from(unpack),
value: unsafe { AstNodeRef::new(parsed.clone(), value) },
target: unsafe { AstNodeRef::new(parsed, target) },
value: AstNodeRef::new(parsed, value),
target: AstNodeRef::new(parsed, target),
}),
DefinitionNodeRef::AnnotatedAssignment(AnnotatedAssignmentDefinitionNodeRef {
node: _,
@ -389,14 +388,12 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
value,
target,
}) => DefinitionKind::AnnotatedAssignment(AnnotatedAssignmentDefinitionKind {
target: unsafe { AstNodeRef::new(parsed.clone(), target) },
annotation: unsafe { AstNodeRef::new(parsed.clone(), annotation) },
value: value.map(|v| unsafe { AstNodeRef::new(parsed, v) }),
target: AstNodeRef::new(parsed, target),
annotation: AstNodeRef::new(parsed, annotation),
value: value.map(|v| AstNodeRef::new(parsed, v)),
}),
DefinitionNodeRef::AugmentedAssignment(augmented_assignment) => {
DefinitionKind::AugmentedAssignment(unsafe {
AstNodeRef::new(parsed, augmented_assignment)
})
DefinitionKind::AugmentedAssignment(AstNodeRef::new(parsed, augmented_assignment))
}
DefinitionNodeRef::For(ForStmtDefinitionNodeRef {
unpack,
@ -405,8 +402,8 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
is_async,
}) => DefinitionKind::For(ForStmtDefinitionKind {
target_kind: TargetKind::from(unpack),
iterable: unsafe { AstNodeRef::new(parsed.clone(), iterable) },
target: unsafe { AstNodeRef::new(parsed, target) },
iterable: AstNodeRef::new(parsed, iterable),
target: AstNodeRef::new(parsed, target),
is_async,
}),
DefinitionNodeRef::Comprehension(ComprehensionDefinitionNodeRef {
@ -417,23 +414,19 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
is_async,
}) => DefinitionKind::Comprehension(ComprehensionDefinitionKind {
target_kind: TargetKind::from(unpack),
iterable: unsafe { AstNodeRef::new(parsed.clone(), iterable) },
target: unsafe { AstNodeRef::new(parsed, target) },
iterable: AstNodeRef::new(parsed, iterable),
target: AstNodeRef::new(parsed, target),
first,
is_async,
}),
DefinitionNodeRef::VariadicPositionalParameter(parameter) => {
DefinitionKind::VariadicPositionalParameter(unsafe {
AstNodeRef::new(parsed, parameter)
})
DefinitionKind::VariadicPositionalParameter(AstNodeRef::new(parsed, parameter))
}
DefinitionNodeRef::VariadicKeywordParameter(parameter) => {
DefinitionKind::VariadicKeywordParameter(unsafe {
AstNodeRef::new(parsed, parameter)
})
DefinitionKind::VariadicKeywordParameter(AstNodeRef::new(parsed, parameter))
}
DefinitionNodeRef::Parameter(parameter) => {
DefinitionKind::Parameter(unsafe { AstNodeRef::new(parsed, parameter) })
DefinitionKind::Parameter(AstNodeRef::new(parsed, parameter))
}
DefinitionNodeRef::WithItem(WithItemDefinitionNodeRef {
unpack,
@ -442,8 +435,8 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
is_async,
}) => DefinitionKind::WithItem(WithItemDefinitionKind {
target_kind: TargetKind::from(unpack),
context_expr: unsafe { AstNodeRef::new(parsed.clone(), context_expr) },
target: unsafe { AstNodeRef::new(parsed, target) },
context_expr: AstNodeRef::new(parsed, context_expr),
target: AstNodeRef::new(parsed, target),
is_async,
}),
DefinitionNodeRef::MatchPattern(MatchPatternDefinitionNodeRef {
@ -451,25 +444,25 @@ impl<'db> DefinitionNodeRef<'_, 'db> {
identifier,
index,
}) => DefinitionKind::MatchPattern(MatchPatternDefinitionKind {
pattern: unsafe { AstNodeRef::new(parsed.clone(), pattern) },
identifier: unsafe { AstNodeRef::new(parsed, identifier) },
pattern: AstNodeRef::new(parsed, pattern),
identifier: AstNodeRef::new(parsed, identifier),
index,
}),
DefinitionNodeRef::ExceptHandler(ExceptHandlerDefinitionNodeRef {
handler,
is_star,
}) => DefinitionKind::ExceptHandler(ExceptHandlerDefinitionKind {
handler: unsafe { AstNodeRef::new(parsed, handler) },
handler: AstNodeRef::new(parsed, handler),
is_star,
}),
DefinitionNodeRef::TypeVar(node) => {
DefinitionKind::TypeVar(unsafe { AstNodeRef::new(parsed, node) })
DefinitionKind::TypeVar(AstNodeRef::new(parsed, node))
}
DefinitionNodeRef::ParamSpec(node) => {
DefinitionKind::ParamSpec(unsafe { AstNodeRef::new(parsed, node) })
DefinitionKind::ParamSpec(AstNodeRef::new(parsed, node))
}
DefinitionNodeRef::TypeVarTuple(node) => {
DefinitionKind::TypeVarTuple(unsafe { AstNodeRef::new(parsed, node) })
DefinitionKind::TypeVarTuple(AstNodeRef::new(parsed, node))
}
}
}

View file

@ -778,46 +778,42 @@ pub(crate) enum NodeWithScopeRef<'a> {
impl NodeWithScopeRef<'_> {
/// Converts the unowned reference to an owned [`NodeWithScopeKind`].
///
/// # Safety
/// The node wrapped by `self` must be a child of `module`.
#[expect(unsafe_code)]
pub(super) unsafe fn to_kind(self, module: ParsedModuleRef) -> NodeWithScopeKind {
unsafe {
match self {
NodeWithScopeRef::Module => NodeWithScopeKind::Module,
NodeWithScopeRef::Class(class) => {
NodeWithScopeKind::Class(AstNodeRef::new(module, class))
}
NodeWithScopeRef::Function(function) => {
NodeWithScopeKind::Function(AstNodeRef::new(module, function))
}
NodeWithScopeRef::TypeAlias(type_alias) => {
NodeWithScopeKind::TypeAlias(AstNodeRef::new(module, type_alias))
}
NodeWithScopeRef::TypeAliasTypeParameters(type_alias) => {
NodeWithScopeKind::TypeAliasTypeParameters(AstNodeRef::new(module, type_alias))
}
NodeWithScopeRef::Lambda(lambda) => {
NodeWithScopeKind::Lambda(AstNodeRef::new(module, lambda))
}
NodeWithScopeRef::FunctionTypeParameters(function) => {
NodeWithScopeKind::FunctionTypeParameters(AstNodeRef::new(module, function))
}
NodeWithScopeRef::ClassTypeParameters(class) => {
NodeWithScopeKind::ClassTypeParameters(AstNodeRef::new(module, class))
}
NodeWithScopeRef::ListComprehension(comprehension) => {
NodeWithScopeKind::ListComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::SetComprehension(comprehension) => {
NodeWithScopeKind::SetComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::DictComprehension(comprehension) => {
NodeWithScopeKind::DictComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::GeneratorExpression(generator) => {
NodeWithScopeKind::GeneratorExpression(AstNodeRef::new(module, generator))
}
/// Note that node wrapped by `self` must be a child of `module`.
pub(super) fn to_kind(self, module: &ParsedModuleRef) -> NodeWithScopeKind {
match self {
NodeWithScopeRef::Module => NodeWithScopeKind::Module,
NodeWithScopeRef::Class(class) => {
NodeWithScopeKind::Class(AstNodeRef::new(module, class))
}
NodeWithScopeRef::Function(function) => {
NodeWithScopeKind::Function(AstNodeRef::new(module, function))
}
NodeWithScopeRef::TypeAlias(type_alias) => {
NodeWithScopeKind::TypeAlias(AstNodeRef::new(module, type_alias))
}
NodeWithScopeRef::TypeAliasTypeParameters(type_alias) => {
NodeWithScopeKind::TypeAliasTypeParameters(AstNodeRef::new(module, type_alias))
}
NodeWithScopeRef::Lambda(lambda) => {
NodeWithScopeKind::Lambda(AstNodeRef::new(module, lambda))
}
NodeWithScopeRef::FunctionTypeParameters(function) => {
NodeWithScopeKind::FunctionTypeParameters(AstNodeRef::new(module, function))
}
NodeWithScopeRef::ClassTypeParameters(class) => {
NodeWithScopeKind::ClassTypeParameters(AstNodeRef::new(module, class))
}
NodeWithScopeRef::ListComprehension(comprehension) => {
NodeWithScopeKind::ListComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::SetComprehension(comprehension) => {
NodeWithScopeKind::SetComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::DictComprehension(comprehension) => {
NodeWithScopeKind::DictComprehension(AstNodeRef::new(module, comprehension))
}
NodeWithScopeRef::GeneratorExpression(generator) => {
NodeWithScopeKind::GeneratorExpression(AstNodeRef::new(module, generator))
}
}
}

View file

@ -104,6 +104,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
name,
asname,
range: _,
node_index: _,
} = alias;
let name = &name.id;
@ -126,6 +127,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
pattern,
name,
range: _,
node_index: _,
}) => {
if let Some(pattern) = pattern {
self.visit_pattern(pattern);
@ -145,6 +147,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
rest,
keys: _,
range: _,
node_index: _,
}) => {
for pattern in patterns {
self.visit_pattern(pattern);
@ -153,7 +156,11 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
self.possibly_add_export(&rest.id, PossibleExportKind::Normal);
}
}
ast::Pattern::MatchStar(ast::PatternMatchStar { name, range: _ }) => {
ast::Pattern::MatchStar(ast::PatternMatchStar {
name,
range: _,
node_index: _,
}) => {
if let Some(name) = name {
self.possibly_add_export(&name.id, PossibleExportKind::Normal);
}
@ -176,6 +183,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
type_params: _, // We don't want to visit the type params of the class
body: _, // We don't want to visit the body of the class
range: _,
node_index: _,
}) => {
self.possibly_add_export(&name.id, PossibleExportKind::Normal);
for decorator in decorator_list {
@ -194,6 +202,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
type_params: _, // We don't want to visit the type params of the function
body: _, // We don't want to visit the body of the function
range: _,
node_index: _,
is_async: _,
}) => {
self.possibly_add_export(&name.id, PossibleExportKind::Normal);
@ -212,6 +221,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
annotation,
simple: _,
range: _,
node_index: _,
}) => {
if value.is_some() || self.visiting_stub_file {
self.visit_expr(target);
@ -227,6 +237,7 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
type_params: _,
value: _,
range: _,
node_index: _,
}) => {
self.visit_expr(name);
// Neither walrus expressions nor statements cannot appear in type aliases;
@ -286,7 +297,12 @@ impl<'db> Visitor<'db> for ExportFinder<'db> {
fn visit_expr(&mut self, expr: &'db ast::Expr) {
match expr {
ast::Expr::Name(ast::ExprName { id, ctx, range: _ }) => {
ast::Expr::Name(ast::ExprName {
id,
ctx,
range: _,
node_index: _,
}) => {
if ctx.is_store() {
self.possibly_add_export(id, PossibleExportKind::Normal);
}
@ -359,11 +375,13 @@ impl<'db> Visitor<'db> for WalrusFinder<'_, 'db> {
target,
value: _,
range: _,
node_index: _,
}) => {
if let ast::Expr::Name(ast::ExprName {
id,
ctx: ast::ExprContext::Store,
range: _,
node_index: _,
}) = &**target
{
self.export_finder