[ty] AST garbage collection (#18482)

## Summary

Garbage collect ASTs once we are done checking a given file. Queries
with a cross-file dependency on the AST will reparse the file on demand.
This reduces ty's peak memory usage by ~20-30%.

The primary change of this PR is adding a `node_index` field to every
AST node, that is assigned by the parser. `ParsedModule` can use this to
create a flat index of AST nodes any time the file is parsed (or
reparsed). This allows `AstNodeRef` to simply index into the current
instance of the `ParsedModule`, instead of storing a pointer directly.

The indices are somewhat hackily (using an atomic integer) assigned by
the `parsed_module` query instead of by the parser directly. Assigning
the indices in source-order in the (recursive) parser turns out to be
difficult, and collecting the nodes during semantic indexing is
impossible as `SemanticIndex` does not hold onto a specific
`ParsedModuleRef`, which the pointers in the flat AST are tied to. This
means that we have to do an extra AST traversal to assign and collect
the nodes into a flat index, but the small performance impact (~3% on
cold runs) seems worth it for the memory savings.

Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
Ibraheem Ahmed 2025-06-13 08:40:11 -04:00 committed by GitHub
parent 76d9009a6e
commit c9dff5c7d5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
824 changed files with 25243 additions and 804 deletions

View file

@ -1,10 +1,10 @@
---
source: crates/ruff_python_parser/src/parser/tests.rs
expression: parsed.expr()
snapshot_kind: text
---
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 0..5,
id: Name("first"),
ctx: Load,

View file

@ -1,20 +1,23 @@
---
source: crates/ruff_python_parser/src/parser/tests.rs
expression: parsed.syntax()
snapshot_kind: text
---
Module(
ModModule {
node_index: AtomicNodeIndex(..),
range: 0..929,
body: [
Expr(
StmtExpr {
node_index: AtomicNodeIndex(..),
range: 21..42,
value: BinOp(
ExprBinOp {
node_index: AtomicNodeIndex(..),
range: 27..40,
left: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 27..28,
id: Name("a"),
ctx: Load,
@ -23,6 +26,7 @@ Module(
op: Mod,
right: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 39..40,
id: Name("b"),
ctx: Load,
@ -34,6 +38,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 66..73,
kind: Help2,
value: "a.foo",
@ -41,6 +46,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 74..80,
kind: Help,
value: "a.foo",
@ -48,6 +54,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 81..88,
kind: Help,
value: "a.foo",
@ -55,6 +62,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 89..100,
kind: Help2,
value: "a.foo()",
@ -62,6 +70,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 115..128,
kind: Magic,
value: "timeit a = b",
@ -69,6 +78,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 129..147,
kind: Magic,
value: "timeit foo(b) % 3",
@ -76,6 +86,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 148..176,
kind: Magic,
value: "alias showPath pwd && ls -a",
@ -83,6 +94,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 177..205,
kind: Magic,
value: "timeit a = foo(b); b = 2",
@ -90,6 +102,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 206..226,
kind: Magic,
value: "matplotlib --inline",
@ -97,6 +110,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 227..253,
kind: Magic,
value: "matplotlib --inline",
@ -104,6 +118,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 277..309,
kind: Shell,
value: "pwd && ls -a | sed 's/^/\\ /'",
@ -111,6 +126,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 310..347,
kind: Shell,
value: "pwd && ls -a | sed 's/^/\\\\ /'",
@ -118,6 +134,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 348..393,
kind: ShCap,
value: "cd /Users/foo/Library/Application\\ Support/",
@ -125,16 +142,21 @@ Module(
),
FunctionDef(
StmtFunctionDef {
node_index: AtomicNodeIndex(..),
range: 566..626,
is_async: false,
decorator_list: [],
name: Identifier {
id: Name("foo"),
range: 570..573,
node_index: AtomicNodeIndex(..),
},
type_params: None,
parameters: Parameters {
range: 573..575,
node_index: AtomicNodeIndex(
0,
),
posonlyargs: [],
args: [],
vararg: None,
@ -145,13 +167,16 @@ Module(
body: [
Return(
StmtReturn {
node_index: AtomicNodeIndex(..),
range: 581..626,
value: Some(
Compare(
ExprCompare {
node_index: AtomicNodeIndex(..),
range: 598..620,
left: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 598..599,
id: Name("a"),
ctx: Load,
@ -163,6 +188,7 @@ Module(
comparators: [
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 619..620,
id: Name("b"),
ctx: Load,
@ -179,6 +205,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 656..664,
kind: Paren,
value: "foo 1 2",
@ -186,6 +213,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 665..673,
kind: Quote2,
value: "foo 1 2",
@ -193,6 +221,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 674..682,
kind: Quote,
value: "foo 1 2",
@ -200,10 +229,12 @@ Module(
),
For(
StmtFor {
node_index: AtomicNodeIndex(..),
range: 711..737,
is_async: false,
target: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 715..716,
id: Name("a"),
ctx: Store,
@ -211,9 +242,11 @@ Module(
),
iter: Call(
ExprCall {
node_index: AtomicNodeIndex(..),
range: 720..728,
func: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 720..725,
id: Name("range"),
ctx: Load,
@ -221,9 +254,11 @@ Module(
),
arguments: Arguments {
range: 725..728,
node_index: AtomicNodeIndex(..),
args: [
NumberLiteral(
ExprNumberLiteral {
node_index: AtomicNodeIndex(..),
range: 726..727,
value: Int(
5,
@ -238,6 +273,7 @@ Module(
body: [
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 734..737,
kind: Shell,
value: "ls",
@ -249,10 +285,12 @@ Module(
),
Assign(
StmtAssign {
node_index: AtomicNodeIndex(..),
range: 739..748,
targets: [
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 739..741,
id: Name("p1"),
ctx: Store,
@ -261,6 +299,7 @@ Module(
],
value: IpyEscapeCommand(
ExprIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 744..748,
kind: Shell,
value: "pwd",
@ -270,9 +309,11 @@ Module(
),
AnnAssign(
StmtAnnAssign {
node_index: AtomicNodeIndex(..),
range: 749..763,
target: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 749..751,
id: Name("p2"),
ctx: Store,
@ -280,6 +321,7 @@ Module(
),
annotation: Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 753..756,
id: Name("str"),
ctx: Load,
@ -288,6 +330,7 @@ Module(
value: Some(
IpyEscapeCommand(
ExprIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 759..763,
kind: Shell,
value: "pwd",
@ -299,10 +342,12 @@ Module(
),
Assign(
StmtAssign {
node_index: AtomicNodeIndex(..),
range: 764..784,
targets: [
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 764..767,
id: Name("foo"),
ctx: Store,
@ -311,6 +356,7 @@ Module(
],
value: IpyEscapeCommand(
ExprIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 770..784,
kind: Magic,
value: "foo bar",
@ -320,6 +366,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 786..791,
kind: Magic,
value: " foo",
@ -327,10 +374,12 @@ Module(
),
Assign(
StmtAssign {
node_index: AtomicNodeIndex(..),
range: 792..813,
targets: [
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 792..795,
id: Name("foo"),
ctx: Store,
@ -339,6 +388,7 @@ Module(
],
value: IpyEscapeCommand(
ExprIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 798..813,
kind: Magic,
value: "foo # comment",
@ -348,6 +398,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 838..842,
kind: Help,
value: "foo",
@ -355,6 +406,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 843..852,
kind: Help2,
value: "foo.bar",
@ -362,6 +414,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 853..865,
kind: Help,
value: "foo.bar.baz",
@ -369,6 +422,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 866..874,
kind: Help2,
value: "foo[0]",
@ -376,6 +430,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 875..885,
kind: Help,
value: "foo[0][1]",
@ -383,6 +438,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 886..905,
kind: Help2,
value: "foo.bar[0].baz[1]",
@ -390,6 +446,7 @@ Module(
),
IpyEscapeCommand(
StmtIpyEscapeCommand {
node_index: AtomicNodeIndex(..),
range: 906..929,
kind: Help2,
value: "foo.bar[0].baz[2].egg",

View file

@ -1,15 +1,16 @@
---
source: crates/ruff_python_parser/src/parser/tests.rs
expression: suite
snapshot_kind: text
---
[
Assign(
StmtAssign {
node_index: AtomicNodeIndex(..),
range: 0..37,
targets: [
Name(
ExprName {
node_index: AtomicNodeIndex(..),
range: 0..1,
id: Name("x"),
ctx: Store,
@ -18,11 +19,13 @@ snapshot_kind: text
],
value: StringLiteral(
ExprStringLiteral {
node_index: AtomicNodeIndex(..),
range: 4..37,
value: StringLiteralValue {
inner: Single(
StringLiteral {
range: 4..37,
node_index: AtomicNodeIndex(..),
value: "\u{8}another cool trick",
flags: StringLiteralFlags {
quote_style: Double,