[ty] AST garbage collection (#18482)

## Summary Garbage collect ASTs once we are done checking a given file. Queries with a cross-file dependency on the AST will reparse the file on demand. This reduces ty's peak memory usage by ~20-30%. The primary change of this PR is adding a `node_index` field to every AST node, that is assigned by the parser. `ParsedModule` can use this to create a flat index of AST nodes any time the file is parsed (or reparsed). This allows `AstNodeRef` to simply index into the current instance of the `ParsedModule`, instead of storing a pointer directly. The indices are somewhat hackily (using an atomic integer) assigned by the `parsed_module` query instead of by the parser directly. Assigning the indices in source-order in the (recursive) parser turns out to be difficult, and collecting the nodes during semantic indexing is impossible as `SemanticIndex` does not hold onto a specific `ParsedModuleRef`, which the pointers in the flat AST are tied to. This means that we have to do an extra AST traversal to assign and collect the nodes into a flat index, but the small performance impact (~3% on cold runs) seems worth it for the memory savings. Part of https://github.com/astral-sh/ty/issues/214.
2025-10-08 01:20:29 +00:00 · 2025-06-13 08:40:11 -04:00 · 2025-06-13 08:40:11 -04:00 · c9dff5c7d5
commit c9dff5c7d5
parent 76d9009a6e
824 changed files with 25243 additions and 804 deletions
--- a/crates/ruff_python_formatter/src/expression/expr_attribute.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_attribute.rs
@ -30,6 +30,7 @@ impl FormatNodeRule<ExprAttribute> for FormatExprAttribute {
        let ExprAttribute {
            value,
            range: _,
+            node_index: _,
            attr,
            ctx: _,
        } = item;
@ -188,7 +189,12 @@ impl NeedsParentheses for ExprAttribute {
 // Non Hex, octal or binary number literals need parentheses to disambiguate the attribute `.` from
 // a decimal point. Floating point numbers don't strictly need parentheses but it reads better (rather than 0.0.test()).
 fn is_base_ten_number_literal(expr: &Expr, source: &str) -> bool {
-    if let Some(ExprNumberLiteral { value, range }) = expr.as_number_literal_expr() {
+    if let Some(ExprNumberLiteral {
+        value,
+        range,
+        node_index: _,
+    }) = expr.as_number_literal_expr()
+    {
        match value {
            Number::Float(_) => true,
            Number::Int(_) => {
--- a/crates/ruff_python_formatter/src/expression/expr_await.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_await.rs
@ -13,7 +13,11 @@ pub struct FormatExprAwait;

 impl FormatNodeRule<ExprAwait> for FormatExprAwait {
    fn fmt_fields(&self, item: &ExprAwait, f: &mut PyFormatter) -> FormatResult<()> {
-        let ExprAwait { range: _, value } = item;
+        let ExprAwait {
+            range: _,
+            node_index: _,
+            value,
+        } = item;

        write!(
            f,
--- a/crates/ruff_python_formatter/src/expression/expr_call.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_call.rs
@ -27,6 +27,7 @@ impl FormatNodeRule<ExprCall> for FormatExprCall {
    fn fmt_fields(&self, item: &ExprCall, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprCall {
            range: _,
+            node_index: _,
            func,
            arguments,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_dict.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_dict.rs
@ -13,7 +13,11 @@ pub struct FormatExprDict;

 impl FormatNodeRule<ExprDict> for FormatExprDict {
    fn fmt_fields(&self, item: &ExprDict, f: &mut PyFormatter) -> FormatResult<()> {
-        let ExprDict { range: _, items } = item;
+        let ExprDict {
+            range: _,
+            node_index: _,
+            items,
+        } = item;

        let comments = f.context().comments().clone();
        let dangling = comments.dangling(item);
--- a/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_dict_comp.rs
@ -14,6 +14,7 @@ impl FormatNodeRule<ExprDictComp> for FormatExprDictComp {
    fn fmt_fields(&self, item: &ExprDictComp, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprDictComp {
            range: _,
+            node_index: _,
            key,
            value,
            generators,
--- a/crates/ruff_python_formatter/src/expression/expr_generator.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_generator.rs
@ -37,6 +37,7 @@ impl FormatNodeRule<ExprGenerator> for FormatExprGenerator {
    fn fmt_fields(&self, item: &ExprGenerator, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprGenerator {
            range: _,
+            node_index: _,
            elt,
            generators,
            parenthesized: is_parenthesized,
--- a/crates/ruff_python_formatter/src/expression/expr_if.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_if.rs
@ -46,6 +46,7 @@ impl FormatNodeRule<ExprIf> for FormatExprIf {
    fn fmt_fields(&self, item: &ExprIf, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprIf {
            range: _,
+            node_index: _,
            test,
            body,
            orelse,
--- a/crates/ruff_python_formatter/src/expression/expr_lambda.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_lambda.rs
@ -15,6 +15,7 @@ impl FormatNodeRule<ExprLambda> for FormatExprLambda {
    fn fmt_fields(&self, item: &ExprLambda, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprLambda {
            range: _,
+            node_index: _,
            parameters,
            body,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_list.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_list.rs
@ -15,6 +15,7 @@ impl FormatNodeRule<ExprList> for FormatExprList {
    fn fmt_fields(&self, item: &ExprList, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprList {
            range: _,
+            node_index: _,
            elts,
            ctx: _,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_list_comp.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_list_comp.rs
@ -12,6 +12,7 @@ impl FormatNodeRule<ExprListComp> for FormatExprListComp {
    fn fmt_fields(&self, item: &ExprListComp, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprListComp {
            range: _,
+            node_index: _,
            elt,
            generators,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_name.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_name.rs
@ -14,6 +14,7 @@ impl FormatNodeRule<ExprName> for FormatExprName {
            id: _,
            range,
            ctx: _,
+            node_index: _,
        } = item;
        write!(f, [source_text_slice(*range)])
    }
--- a/crates/ruff_python_formatter/src/expression/expr_named.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_named.rs
@ -17,6 +17,7 @@ impl FormatNodeRule<ExprNamed> for FormatExprNamed {
            target,
            value,
            range: _,
+            node_index: _,
        } = item;

        // This context, a dangling comment is a comment between the `:=` and the value.
--- a/crates/ruff_python_formatter/src/expression/expr_set.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_set.rs
@ -10,7 +10,11 @@ pub struct FormatExprSet;

 impl FormatNodeRule<ExprSet> for FormatExprSet {
    fn fmt_fields(&self, item: &ExprSet, f: &mut PyFormatter) -> FormatResult<()> {
-        let ExprSet { range: _, elts } = item;
+        let ExprSet {
+            range: _,
+            node_index: _,
+            elts,
+        } = item;
        // That would be a dict expression
        assert!(!elts.is_empty());
        // Avoid second mutable borrow of f
--- a/crates/ruff_python_formatter/src/expression/expr_set_comp.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_set_comp.rs
@ -12,6 +12,7 @@ impl FormatNodeRule<ExprSetComp> for FormatExprSetComp {
    fn fmt_fields(&self, item: &ExprSetComp, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprSetComp {
            range: _,
+            node_index: _,
            elt,
            generators,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_slice.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_slice.rs
@ -21,6 +21,7 @@ impl FormatNodeRule<ExprSlice> for FormatExprSlice {
            upper,
            step,
            range,
+            node_index: _,
        } = item;

        let (first_colon, second_colon) = find_colons(
@ -232,6 +233,7 @@ pub(crate) fn assign_comment_in_slice(
        upper,
        step: _,
        range,
+        node_index: _,
    } = expr_slice;

    let (first_colon, second_colon) =
--- a/crates/ruff_python_formatter/src/expression/expr_starred.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_starred.rs
@ -14,6 +14,7 @@ impl FormatNodeRule<ExprStarred> for FormatExprStarred {
    fn fmt_fields(&self, item: &ExprStarred, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprStarred {
            range: _,
+            node_index: _,
            value,
            ctx: _,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/expr_subscript.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_subscript.rs
@ -27,6 +27,7 @@ impl FormatNodeRule<ExprSubscript> for FormatExprSubscript {
    fn fmt_fields(&self, item: &ExprSubscript, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprSubscript {
            range: _,
+            node_index: _,
            value,
            slice,
            ctx: _,
--- a/crates/ruff_python_formatter/src/expression/expr_tuple.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_tuple.rs
@ -116,6 +116,7 @@ impl FormatNodeRule<ExprTuple> for FormatExprTuple {
            elts,
            ctx: _,
            range: _,
+            node_index: _,
            parenthesized: is_parenthesized,
        } = item;

--- a/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_unary_op.rs
@ -15,6 +15,7 @@ impl FormatNodeRule<ExprUnaryOp> for FormatExprUnaryOp {
    fn fmt_fields(&self, item: &ExprUnaryOp, f: &mut PyFormatter) -> FormatResult<()> {
        let ExprUnaryOp {
            range: _,
+            node_index: _,
            op,
            operand,
        } = item;
--- a/crates/ruff_python_formatter/src/expression/mod.rs
+++ b/crates/ruff_python_formatter/src/expression/mod.rs
@ -685,6 +685,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
            #[expect(clippy::cast_possible_truncation)]
            Expr::BoolOp(ast::ExprBoolOp {
                range: _,
+                node_index: _,
                op: _,
                values,
            }) => self.update_max_precedence_with_count(
@ -696,6 +697,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
                left: _,
                right: _,
                range: _,
+                node_index: _,
            }) => self.update_max_precedence(OperatorPrecedence::from(*op)),

            Expr::If(_) => {
@ -708,6 +710,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
            #[expect(clippy::cast_possible_truncation)]
            Expr::Compare(ast::ExprCompare {
                range: _,
+                node_index: _,
                left: _,
                ops,
                comparators: _,
@ -719,6 +722,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
            }
            Expr::Call(ast::ExprCall {
                range: _,
+                node_index: _,
                func,
                arguments: _,
            }) => {
@ -740,6 +744,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
            // `[a, b].test.test[300].dot`
            Expr::Attribute(ast::ExprAttribute {
                range: _,
+                node_index: _,
                value,
                attr: _,
                ctx: _,
@ -760,6 +765,7 @@ impl<'input> CanOmitOptionalParenthesesVisitor<'input> {
            // Visit the sub-expressions because the sub expressions may be the end of the entire expression.
            Expr::UnaryOp(ast::ExprUnaryOp {
                range: _,
+                node_index: _,
                op,
                operand: _,
            }) => {