[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -9,6 +9,7 @@ impl FormatNodeRule<PatternKeyword> for FormatPatternKeyword {
fn fmt_fields(&self, item: &PatternKeyword, f: &mut PyFormatter) -> FormatResult<()> {
let PatternKeyword {
range: _,
node_index: _,
attr,
pattern,
} = item;

View file

@ -13,6 +13,7 @@ impl FormatNodeRule<PatternMatchAs> for FormatPatternMatchAs {
fn fmt_fields(&self, item: &PatternMatchAs, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchAs {
range: _,
node_index: _,
pattern,
name,
} = item;

View file

@ -13,6 +13,7 @@ impl FormatNodeRule<PatternMatchClass> for FormatPatternMatchClass {
fn fmt_fields(&self, item: &PatternMatchClass, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchClass {
range: _,
node_index: _,
cls,
arguments,
} = item;

View file

@ -21,6 +21,7 @@ impl FormatNodeRule<PatternMatchMapping> for FormatPatternMatchMapping {
patterns,
rest,
range: _,
node_index: _,
} = item;
debug_assert_eq!(keys.len(), patterns.len());
@ -163,6 +164,7 @@ fn find_double_star(pattern: &PatternMatchMapping, source: &str) -> Option<(Text
patterns,
rest,
range: _,
node_index: _,
} = pattern;
// If there's no `rest` element, there's no `**`.

View file

@ -14,7 +14,11 @@ pub struct FormatPatternMatchOr;
impl FormatNodeRule<PatternMatchOr> for FormatPatternMatchOr {
fn fmt_fields(&self, item: &PatternMatchOr, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchOr { range: _, patterns } = item;
let PatternMatchOr {
range: _,
node_index: _,
patterns,
} = item;
let inner = format_with(|f: &mut PyFormatter| {
let mut patterns = patterns.iter();
let comments = f.context().comments().clone();

View file

@ -14,7 +14,11 @@ pub struct FormatPatternMatchSequence;
impl FormatNodeRule<PatternMatchSequence> for FormatPatternMatchSequence {
fn fmt_fields(&self, item: &PatternMatchSequence, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchSequence { patterns, range } = item;
let PatternMatchSequence {
patterns,
range,
node_index: _,
} = item;
let comments = f.context().comments().clone();
let dangling = comments.dangling(item);

View file

@ -9,7 +9,11 @@ pub struct FormatPatternMatchValue;
impl FormatNodeRule<PatternMatchValue> for FormatPatternMatchValue {
fn fmt_fields(&self, item: &PatternMatchValue, f: &mut PyFormatter) -> FormatResult<()> {
let PatternMatchValue { value, range: _ } = item;
let PatternMatchValue {
value,
range: _,
node_index: _,
} = item;
value.format().with_options(Parentheses::Never).fmt(f)
}
}