[ty] Add environment variable to dump Salsa memory usage stats (#18928)

## Summary

Setting `TY_MEMORY_REPORT=full` will generate and print a memory usage
report to the CLI after a `ty check` run:

```
=======SALSA STRUCTS=======
`Definition`                                       metadata=7.24MB   fields=17.38MB  count=181062
`Expression`                                       metadata=4.45MB   fields=5.94MB   count=92804
`member_lookup_with_policy_::interned_arguments`   metadata=1.97MB   fields=2.25MB   count=35176
...
=======SALSA QUERIES=======
`File -> ty_python_semantic::semantic_index::SemanticIndex`
    metadata=11.46MB  fields=88.86MB  count=1638
`Definition -> ty_python_semantic::types::infer::TypeInference`
    metadata=24.52MB  fields=86.68MB  count=146018
`File -> ruff_db::parsed::ParsedModule`
    metadata=0.12MB   fields=69.06MB  count=1642
...
=======SALSA SUMMARY=======
TOTAL MEMORY USAGE: 577.61MB
    struct metadata = 29.00MB
    struct fields = 35.68MB
    memo metadata = 103.87MB
    memo fields = 409.06MB
```

Eventually, we should integrate these numbers into CI in some form. The
one limitation currently is that heap allocations in salsa structs (e.g.
interned values) are not tracked, but memoized values should have full
coverage. We may also want a peak memory usage counter (that accounts
for non-salsa memory), but that is relatively simple to profile manually
(e.g. `time -v ty check`) and would require a compile-time option to
avoid runtime overhead.
This commit is contained in:
Ibraheem Ahmed 2025-06-26 17:27:51 -04:00 committed by GitHub
parent a1579d82d0
commit 6f7b1c9bb3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
79 changed files with 905 additions and 207 deletions

View file

@ -7,7 +7,7 @@ use crate::{TokenKind, string::InterpolatedStringKind};
/// Represents represent errors that occur during parsing and are
/// returned by the `parse_*` functions.
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, get_size2::GetSize)]
pub struct ParseError {
pub error: ParseErrorType,
pub location: TextRange,
@ -49,7 +49,7 @@ impl ParseError {
}
/// Represents the different types of errors that can occur during parsing of an f-string or t-string.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, get_size2::GetSize)]
pub enum InterpolatedStringErrorType {
/// Expected a right brace after an opened left brace.
UnclosedLbrace,
@ -95,7 +95,7 @@ impl std::fmt::Display for InterpolatedStringErrorType {
}
/// Represents the different types of errors that can occur during parsing.
#[derive(Debug, PartialEq, Eq, Clone)]
#[derive(Debug, PartialEq, Eq, Clone, get_size2::GetSize)]
pub enum ParseErrorType {
/// An unexpected error occurred.
OtherError(String),
@ -384,7 +384,7 @@ impl std::fmt::Display for LexicalError {
}
/// Represents the different types of errors that can occur during lexing.
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, get_size2::GetSize)]
pub enum LexicalErrorType {
// TODO: Can probably be removed, the places it is used seem to be able
// to use the `UnicodeError` variant instead.
@ -468,7 +468,7 @@ impl std::fmt::Display for LexicalErrorType {
///
/// An example of a version-related error is the use of a `match` statement before Python 3.10, when
/// it was first introduced. See [`UnsupportedSyntaxErrorKind`] for other kinds of errors.
#[derive(Debug, PartialEq, Clone)]
#[derive(Debug, PartialEq, Clone, get_size2::GetSize)]
pub struct UnsupportedSyntaxError {
pub kind: UnsupportedSyntaxErrorKind,
pub range: TextRange,
@ -483,28 +483,28 @@ impl Ranged for UnsupportedSyntaxError {
}
/// The type of tuple unpacking for [`UnsupportedSyntaxErrorKind::StarTuple`].
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, get_size2::GetSize)]
pub enum StarTupleKind {
Return,
Yield,
}
/// The type of PEP 701 f-string error for [`UnsupportedSyntaxErrorKind::Pep701FString`].
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, get_size2::GetSize)]
pub enum FStringKind {
Backslash,
Comment,
NestedQuote,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, get_size2::GetSize)]
pub enum UnparenthesizedNamedExprKind {
SequenceIndex,
SetLiteral,
SetComprehension,
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy, get_size2::GetSize)]
pub enum UnsupportedSyntaxErrorKind {
Match,
Walrus,
@ -988,7 +988,7 @@ impl Display for UnsupportedSyntaxError {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum RelaxedDecoratorError {
CallExpression,
Other(&'static str),

View file

@ -304,7 +304,7 @@ pub fn parse_unchecked_source(source: &str, source_type: PySourceType) -> Parsed
}
/// Represents the parsed source code.
#[derive(Debug, PartialEq, Clone)]
#[derive(Debug, PartialEq, Clone, get_size2::GetSize)]
pub struct Parsed<T> {
syntax: T,
tokens: Tokens,
@ -474,7 +474,7 @@ impl Parsed<ModExpression> {
}
/// Tokens represents a vector of lexed [`Token`].
#[derive(Debug, Clone, PartialEq, Eq)]
#[derive(Debug, Clone, PartialEq, Eq, get_size2::GetSize)]
pub struct Tokens {
raw: Vec<Token>,
}

View file

@ -890,7 +890,7 @@ impl SemanticSyntaxChecker {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub struct SemanticSyntaxError {
pub kind: SemanticSyntaxErrorKind,
pub range: TextRange,
@ -981,7 +981,7 @@ impl Display for SemanticSyntaxError {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum SemanticSyntaxErrorKind {
/// Represents the use of a `__future__` import after the beginning of a file.
///
@ -1303,7 +1303,7 @@ pub enum SemanticSyntaxErrorKind {
NonlocalDeclarationAtModuleLevel,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum AwaitOutsideAsyncFunctionKind {
Await,
AsyncFor,
@ -1322,7 +1322,7 @@ impl Display for AwaitOutsideAsyncFunctionKind {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum YieldOutsideFunctionKind {
Yield,
YieldFrom,
@ -1345,7 +1345,7 @@ impl Display for YieldOutsideFunctionKind {
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum InvalidExpressionPosition {
TypeVarBound,
TypeVarDefault,
@ -1370,7 +1370,7 @@ impl Display for InvalidExpressionPosition {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum InvalidExpressionKind {
Yield,
NamedExpr,
@ -1387,7 +1387,7 @@ impl Display for InvalidExpressionKind {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub enum WriteToDebugKind {
Store,
Delete(PythonVersion),

View file

@ -17,7 +17,7 @@ use ruff_python_ast::str_prefix::{
use ruff_python_ast::{AnyStringFlags, BoolOp, Int, IpyEscapeKind, Operator, StringFlags, UnaryOp};
use ruff_text_size::{Ranged, TextRange};
#[derive(Clone, Copy, PartialEq, Eq)]
#[derive(Clone, Copy, PartialEq, Eq, get_size2::GetSize)]
pub struct Token {
/// The kind of the token.
kind: TokenKind,
@ -124,7 +124,7 @@ impl fmt::Debug for Token {
}
/// A kind of a token.
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord)]
#[derive(Copy, Clone, PartialEq, Eq, Hash, Debug, PartialOrd, Ord, get_size2::GetSize)]
pub enum TokenKind {
/// Token kind for a name, commonly known as an identifier.
Name,
@ -754,6 +754,8 @@ bitflags! {
}
}
impl get_size2::GetSize for TokenFlags {}
impl StringFlags for TokenFlags {
fn quote_style(self) -> Quote {
if self.intersects(TokenFlags::DOUBLE_QUOTES) {