[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -30,6 +30,7 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
let ExprAttribute {
value,
range: _,
node_index: _,
attr,
ctx: _,
} = item;
@ -188,7 +189,12 @@ impl NeedsParentheses for ExprAttribute {
// Non Hex, octal or binary number literals need parentheses to disambiguate the attribute `.` from
// a decimal point. Floating point numbers don't strictly need parentheses but it reads better (rather than 0.0.test()).
fn is_base_ten_number_literal(expr: &Expr, source: &str) -> bool {
if let Some(ExprNumberLiteral { value, range }) = expr.as_number_literal_expr() {
if let Some(ExprNumberLiteral {
value,
range,
node_index: _,
}) = expr.as_number_literal_expr()
{
match value {
Number::Float(_) => true,
Number::Int(_) => {

View file

@ -13,7 +13,11 @@ pub struct FormatExprAwait;
impl FormatNodeRule<ExprAwait> for FormatExprAwait {
fn fmt_fields(&self, item: &ExprAwait, f: &mut PyFormatter) -> FormatResult<()> {
let ExprAwait { range: _, value } = item;
let ExprAwait {
range: _,
node_index: _,
value,
} = item;
write!(
f,

View file

@ -27,6 +27,7 @@ impl FormatNodeRule<ExprCall> for FormatExprCall {
fn fmt_fields(&self, item: &ExprCall, f: &mut PyFormatter) -> FormatResult<()> {
let ExprCall {
range: _,
node_index: _,
func,
arguments,
} = item;

View file

@ -13,7 +13,11 @@ pub struct FormatExprDict;
impl FormatNodeRule<ExprDict> for FormatExprDict {
fn fmt_fields(&self, item: &ExprDict, f: &mut PyFormatter) -> FormatResult<()> {
let ExprDict { range: _, items } = item;
let ExprDict {
range: _,
node_index: _,
items,
} = item;
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);

View file

@ -14,6 +14,7 @@ impl FormatNodeRule<ExprDictComp> for FormatExprDictComp {
fn fmt_fields(&self, item: &ExprDictComp, f: &mut PyFormatter) -> FormatResult<()> {
let ExprDictComp {
range: _,
node_index: _,
key,
value,
generators,

View file

@ -37,6 +37,7 @@ impl FormatNodeRule<ExprGenerator> for FormatExprGenerator {
fn fmt_fields(&self, item: &ExprGenerator, f: &mut PyFormatter) -> FormatResult<()> {
let ExprGenerator {
range: _,
node_index: _,
elt,
generators,
parenthesized: is_parenthesized,

View file

@ -46,6 +46,7 @@ impl FormatNodeRule<ExprIf> for FormatExprIf {
fn fmt_fields(&self, item: &ExprIf, f: &mut PyFormatter) -> FormatResult<()> {
let ExprIf {
range: _,
node_index: _,
test,
body,
orelse,

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<ExprLambda> for FormatExprLambda {
fn fmt_fields(&self, item: &ExprLambda, f: &mut PyFormatter) -> FormatResult<()> {
let ExprLambda {
range: _,
node_index: _,
parameters,
body,
} = item;

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<ExprList> for FormatExprList {
fn fmt_fields(&self, item: &ExprList, f: &mut PyFormatter) -> FormatResult<()> {
let ExprList {
range: _,
node_index: _,
elts,
ctx: _,
} = item;

View file

@ -12,6 +12,7 @@ impl FormatNodeRule<ExprListComp> for FormatExprListComp {
fn fmt_fields(&self, item: &ExprListComp, f: &mut PyFormatter) -> FormatResult<()> {
let ExprListComp {
range: _,
node_index: _,
elt,
generators,
} = item;

View file

@ -14,6 +14,7 @@ impl FormatNodeRule<ExprName> for FormatExprName {
id: _,
range,
ctx: _,
node_index: _,
} = item;
write!(f, [source_text_slice(*range)])
}

View file

@ -17,6 +17,7 @@ impl FormatNodeRule<ExprNamed> for FormatExprNamed {
target,
value,
range: _,
node_index: _,
} = item;
// This context, a dangling comment is a comment between the `:=` and the value.

View file

@ -10,7 +10,11 @@ pub struct FormatExprSet;
impl FormatNodeRule<ExprSet> for FormatExprSet {
fn fmt_fields(&self, item: &ExprSet, f: &mut PyFormatter) -> FormatResult<()> {
let ExprSet { range: _, elts } = item;
let ExprSet {
range: _,
node_index: _,
elts,
} = item;
// That would be a dict expression
assert!(!elts.is_empty());
// Avoid second mutable borrow of f

View file

@ -12,6 +12,7 @@ impl FormatNodeRule<ExprSetComp> for FormatExprSetComp {
fn fmt_fields(&self, item: &ExprSetComp, f: &mut PyFormatter) -> FormatResult<()> {
let ExprSetComp {
range: _,
node_index: _,
elt,
generators,
} = item;

View file

@ -21,6 +21,7 @@ impl FormatNodeRule<ExprSlice> for FormatExprSlice {
upper,
step,
range,
node_index: _,
} = item;
let (first_colon, second_colon) = find_colons(
@ -232,6 +233,7 @@ pub(crate) fn assign_comment_in_slice(
upper,
step: _,
range,
node_index: _,
} = expr_slice;
let (first_colon, second_colon) =

View file

@ -14,6 +14,7 @@ impl FormatNodeRule<ExprStarred> for FormatExprStarred {
fn fmt_fields(&self, item: &ExprStarred, f: &mut PyFormatter) -> FormatResult<()> {
let ExprStarred {
range: _,
node_index: _,
value,
ctx: _,
} = item;

View file

@ -27,6 +27,7 @@ impl FormatNodeRule<ExprSubscript> for FormatExprSubscript {
fn fmt_fields(&self, item: &ExprSubscript, f: &mut PyFormatter) -> FormatResult<()> {
let ExprSubscript {
range: _,
node_index: _,
value,
slice,
ctx: _,

View file

@ -116,6 +116,7 @@ impl FormatNodeRule<ExprTuple> for FormatExprTuple {
elts,
ctx: _,
range: _,
node_index: _,
parenthesized: is_parenthesized,
} = item;

View file

@ -15,6 +15,7 @@ impl FormatNodeRule<ExprUnaryOp> for FormatExprUnaryOp {
fn fmt_fields(&self, item: &ExprUnaryOp, f: &mut PyFormatter) -> FormatResult<()> {
let ExprUnaryOp {
range: _,
node_index: _,
op,
operand,
} = item;

View file

@ -685,6 +685,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
#[expect(clippy::cast_possible_truncation)]
Expr::BoolOp(ast::ExprBoolOp {
range: _,
node_index: _,
op: _,
values,
}) => self.update_max_precedence_with_count(
@ -696,6 +697,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
left: _,
right: _,
range: _,
node_index: _,
}) => self.update_max_precedence(OperatorPrecedence::from(*op)),
Expr::If(_) => {
@ -708,6 +710,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
#[expect(clippy::cast_possible_truncation)]
Expr::Compare(ast::ExprCompare {
range: _,
node_index: _,
left: _,
ops,
comparators: _,
@ -719,6 +722,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
}
Expr::Call(ast::ExprCall {
range: _,
node_index: _,
func,
arguments: _,
}) => {
@ -740,6 +744,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
// `[a, b].test.test[300].dot`
Expr::Attribute(ast::ExprAttribute {
range: _,
node_index: _,
value,
attr: _,
ctx: _,
@ -760,6 +765,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
// Visit the sub-expressions because the sub expressions may be the end of the entire expression.
Expr::UnaryOp(ast::ExprUnaryOp {
range: _,
node_index: _,
op,
operand: _,
}) => {