mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
[ty] AST garbage collection (#18482)
## Summary Garbage collect ASTs once we are done checking a given file. Queries with a cross-file dependency on the AST will reparse the file on demand. This reduces ty's peak memory usage by ~20-30%. The primary change of this PR is adding a `node_index` field to every AST node, that is assigned by the parser. `ParsedModule` can use this to create a flat index of AST nodes any time the file is parsed (or reparsed). This allows `AstNodeRef` to simply index into the current instance of the `ParsedModule`, instead of storing a pointer directly. The indices are somewhat hackily (using an atomic integer) assigned by the `parsed_module` query instead of by the parser directly. Assigning the indices in source-order in the (recursive) parser turns out to be difficult, and collecting the nodes during semantic indexing is impossible as `SemanticIndex` does not hold onto a specific `ParsedModuleRef`, which the pointers in the flat AST are tied to. This means that we have to do an extra AST traversal to assign and collect the nodes into a flat index, but the small performance impact (~3% on cold runs) seems worth it for the memory savings. Part of https://github.com/astral-sh/ty/issues/214.
This commit is contained in:
parent
76d9009a6e
commit
c9dff5c7d5
824 changed files with 25243 additions and 804 deletions
|
@ -21,6 +21,7 @@ ruff_source_file = { workspace = true }
|
|||
ruff_text_size = { workspace = true }
|
||||
|
||||
anstyle = { workspace = true }
|
||||
arc-swap = { workspace = true }
|
||||
camino = { workspace = true }
|
||||
countme = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
|
|
|
@ -1,7 +1,8 @@
|
|||
use std::fmt::Formatter;
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_python_ast::ModModule;
|
||||
use arc_swap::ArcSwapOption;
|
||||
use ruff_python_ast::{AnyRootNodeRef, ModModule, NodeIndex};
|
||||
use ruff_python_parser::{ParseOptions, Parsed, parse_unchecked};
|
||||
|
||||
use crate::Db;
|
||||
|
@ -23,16 +24,20 @@ use crate::source::source_text;
|
|||
pub fn parsed_module(db: &dyn Db, file: File) -> ParsedModule {
|
||||
let _span = tracing::trace_span!("parsed_module", ?file).entered();
|
||||
|
||||
let parsed = parsed_module_impl(db, file);
|
||||
|
||||
ParsedModule::new(file, parsed)
|
||||
}
|
||||
|
||||
pub fn parsed_module_impl(db: &dyn Db, file: File) -> Parsed<ModModule> {
|
||||
let source = source_text(db, file);
|
||||
let ty = file.source_type(db);
|
||||
|
||||
let target_version = db.python_version();
|
||||
let options = ParseOptions::from(ty).with_target_version(target_version);
|
||||
let parsed = parse_unchecked(&source, options)
|
||||
parse_unchecked(&source, options)
|
||||
.try_into_module()
|
||||
.expect("PySourceType always parses into a module");
|
||||
|
||||
ParsedModule::new(parsed)
|
||||
.expect("PySourceType always parses into a module")
|
||||
}
|
||||
|
||||
/// A wrapper around a parsed module.
|
||||
|
@ -41,21 +46,58 @@ pub fn parsed_module(db: &dyn Db, file: File) -> ParsedModule {
|
|||
/// is represented with the [`ParsedModuleRef`] type.
|
||||
#[derive(Clone)]
|
||||
pub struct ParsedModule {
|
||||
inner: Arc<Parsed<ModModule>>,
|
||||
file: File,
|
||||
inner: Arc<ArcSwapOption<indexed::IndexedModule>>,
|
||||
}
|
||||
|
||||
impl ParsedModule {
|
||||
pub fn new(parsed: Parsed<ModModule>) -> Self {
|
||||
pub fn new(file: File, parsed: Parsed<ModModule>) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(parsed),
|
||||
file,
|
||||
inner: Arc::new(ArcSwapOption::new(Some(indexed::IndexedModule::new(
|
||||
parsed,
|
||||
)))),
|
||||
}
|
||||
}
|
||||
/// Loads a reference to the parsed module.
|
||||
///
|
||||
/// Note that holding on to the reference will prevent garbage collection
|
||||
/// of the AST. This method will reparse the module if it has been collected.
|
||||
pub fn load(&self, db: &dyn Db) -> ParsedModuleRef {
|
||||
let parsed = match self.inner.load_full() {
|
||||
Some(parsed) => parsed,
|
||||
None => {
|
||||
// Re-parse the file.
|
||||
let parsed = indexed::IndexedModule::new(parsed_module_impl(db, self.file));
|
||||
tracing::debug!(
|
||||
"File `{}` was reparsed after being collected in the current Salsa revision",
|
||||
self.file.path(db)
|
||||
);
|
||||
|
||||
self.inner.store(Some(parsed.clone()));
|
||||
parsed
|
||||
}
|
||||
};
|
||||
|
||||
ParsedModuleRef {
|
||||
module: self.clone(),
|
||||
indexed: parsed,
|
||||
}
|
||||
}
|
||||
|
||||
/// Loads a reference to the parsed module.
|
||||
pub fn load(&self, _db: &dyn Db) -> ParsedModuleRef {
|
||||
ParsedModuleRef {
|
||||
module_ref: self.inner.clone(),
|
||||
}
|
||||
/// Clear the parsed module, dropping the AST once all references to it are dropped.
|
||||
pub fn clear(&self) {
|
||||
self.inner.store(None);
|
||||
}
|
||||
|
||||
/// Returns a pointer for this [`ParsedModule`].
|
||||
///
|
||||
/// The pointer uniquely identifies the module within the current Salsa revision,
|
||||
/// regardless of whether particular [`ParsedModuleRef`] instances are garbage collected.
|
||||
pub fn as_ptr(&self) -> *const () {
|
||||
// Note that the outer `Arc` in `inner` is stable across garbage collection, while the inner
|
||||
// `Arc` within the `ArcSwap` may change.
|
||||
Arc::as_ptr(&self.inner).cast()
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -76,16 +118,19 @@ impl Eq for ParsedModule {}
|
|||
/// Cheap cloneable wrapper around an instance of a module AST.
|
||||
#[derive(Clone)]
|
||||
pub struct ParsedModuleRef {
|
||||
module_ref: Arc<Parsed<ModModule>>,
|
||||
module: ParsedModule,
|
||||
indexed: Arc<indexed::IndexedModule>,
|
||||
}
|
||||
|
||||
impl ParsedModuleRef {
|
||||
pub fn as_arc(&self) -> &Arc<Parsed<ModModule>> {
|
||||
&self.module_ref
|
||||
/// Returns a reference to the [`ParsedModule`] that this instance was loaded from.
|
||||
pub fn module(&self) -> &ParsedModule {
|
||||
&self.module
|
||||
}
|
||||
|
||||
pub fn into_arc(self) -> Arc<Parsed<ModModule>> {
|
||||
self.module_ref
|
||||
/// Returns a reference to the AST node at the given index.
|
||||
pub fn get_by_index<'ast>(&'ast self, index: NodeIndex) -> AnyRootNodeRef<'ast> {
|
||||
self.indexed.get_by_index(index)
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -93,7 +138,247 @@ impl std::ops::Deref for ParsedModuleRef {
|
|||
type Target = Parsed<ModModule>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.module_ref
|
||||
&self.indexed.parsed
|
||||
}
|
||||
}
|
||||
|
||||
mod indexed {
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_python_ast::visitor::source_order::*;
|
||||
use ruff_python_ast::*;
|
||||
use ruff_python_parser::Parsed;
|
||||
|
||||
/// A wrapper around the AST that allows access to AST nodes by index.
|
||||
#[derive(Debug)]
|
||||
pub struct IndexedModule {
|
||||
index: Box<[AnyRootNodeRef<'static>]>,
|
||||
pub parsed: Parsed<ModModule>,
|
||||
}
|
||||
|
||||
impl IndexedModule {
|
||||
/// Create a new [`IndexedModule`] from the given AST.
|
||||
#[allow(clippy::unnecessary_cast)]
|
||||
pub fn new(parsed: Parsed<ModModule>) -> Arc<Self> {
|
||||
let mut visitor = Visitor {
|
||||
nodes: Vec::new(),
|
||||
index: 0,
|
||||
};
|
||||
|
||||
let mut inner = Arc::new(IndexedModule {
|
||||
parsed,
|
||||
index: Box::new([]),
|
||||
});
|
||||
|
||||
AnyNodeRef::from(inner.parsed.syntax()).visit_source_order(&mut visitor);
|
||||
|
||||
let index: Box<[AnyRootNodeRef<'_>]> = visitor.nodes.into_boxed_slice();
|
||||
|
||||
// SAFETY: We cast from `Box<[AnyRootNodeRef<'_>]>` to `Box<[AnyRootNodeRef<'static>]>`,
|
||||
// faking the 'static lifetime to create the self-referential struct. The node references
|
||||
// are into the `Arc<Parsed<ModModule>>`, so are valid for as long as the `IndexedModule`
|
||||
// is alive. We make sure to restore the correct lifetime in `get_by_index`.
|
||||
//
|
||||
// Note that we can never move the data within the `Arc` after this point.
|
||||
Arc::get_mut(&mut inner).unwrap().index =
|
||||
unsafe { Box::from_raw(Box::into_raw(index) as *mut [AnyRootNodeRef<'static>]) };
|
||||
|
||||
inner
|
||||
}
|
||||
|
||||
/// Returns the node at the given index.
|
||||
pub fn get_by_index<'ast>(&'ast self, index: NodeIndex) -> AnyRootNodeRef<'ast> {
|
||||
// Note that this method restores the correct lifetime: the nodes are valid for as
|
||||
// long as the reference to `IndexedModule` is alive.
|
||||
self.index[index.as_usize()]
|
||||
}
|
||||
}
|
||||
|
||||
/// A visitor that collects nodes in source order.
|
||||
pub struct Visitor<'a> {
|
||||
pub index: u32,
|
||||
pub nodes: Vec<AnyRootNodeRef<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> Visitor<'a> {
|
||||
fn visit_node<T>(&mut self, node: &'a T)
|
||||
where
|
||||
T: HasNodeIndex + std::fmt::Debug,
|
||||
AnyRootNodeRef<'a>: From<&'a T>,
|
||||
{
|
||||
node.node_index().set(self.index);
|
||||
self.nodes.push(AnyRootNodeRef::from(node));
|
||||
self.index += 1;
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> SourceOrderVisitor<'a> for Visitor<'a> {
|
||||
#[inline]
|
||||
fn visit_mod(&mut self, module: &'a Mod) {
|
||||
self.visit_node(module);
|
||||
walk_module(self, module);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_stmt(&mut self, stmt: &'a Stmt) {
|
||||
self.visit_node(stmt);
|
||||
walk_stmt(self, stmt);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_annotation(&mut self, expr: &'a Expr) {
|
||||
self.visit_node(expr);
|
||||
walk_annotation(self, expr);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_expr(&mut self, expr: &'a Expr) {
|
||||
self.visit_node(expr);
|
||||
walk_expr(self, expr);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_decorator(&mut self, decorator: &'a Decorator) {
|
||||
self.visit_node(decorator);
|
||||
walk_decorator(self, decorator);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_comprehension(&mut self, comprehension: &'a Comprehension) {
|
||||
self.visit_node(comprehension);
|
||||
walk_comprehension(self, comprehension);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
|
||||
self.visit_node(except_handler);
|
||||
walk_except_handler(self, except_handler);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_arguments(&mut self, arguments: &'a Arguments) {
|
||||
self.visit_node(arguments);
|
||||
walk_arguments(self, arguments);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_parameters(&mut self, parameters: &'a Parameters) {
|
||||
self.visit_node(parameters);
|
||||
walk_parameters(self, parameters);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_parameter(&mut self, arg: &'a Parameter) {
|
||||
self.visit_node(arg);
|
||||
walk_parameter(self, arg);
|
||||
}
|
||||
|
||||
fn visit_parameter_with_default(
|
||||
&mut self,
|
||||
parameter_with_default: &'a ParameterWithDefault,
|
||||
) {
|
||||
self.visit_node(parameter_with_default);
|
||||
walk_parameter_with_default(self, parameter_with_default);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_keyword(&mut self, keyword: &'a Keyword) {
|
||||
self.visit_node(keyword);
|
||||
walk_keyword(self, keyword);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_alias(&mut self, alias: &'a Alias) {
|
||||
self.visit_node(alias);
|
||||
walk_alias(self, alias);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_with_item(&mut self, with_item: &'a WithItem) {
|
||||
self.visit_node(with_item);
|
||||
walk_with_item(self, with_item);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_type_params(&mut self, type_params: &'a TypeParams) {
|
||||
self.visit_node(type_params);
|
||||
walk_type_params(self, type_params);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_type_param(&mut self, type_param: &'a TypeParam) {
|
||||
self.visit_node(type_param);
|
||||
walk_type_param(self, type_param);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_match_case(&mut self, match_case: &'a MatchCase) {
|
||||
self.visit_node(match_case);
|
||||
walk_match_case(self, match_case);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_pattern(&mut self, pattern: &'a Pattern) {
|
||||
self.visit_node(pattern);
|
||||
walk_pattern(self, pattern);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_pattern_arguments(&mut self, pattern_arguments: &'a PatternArguments) {
|
||||
self.visit_node(pattern_arguments);
|
||||
walk_pattern_arguments(self, pattern_arguments);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_pattern_keyword(&mut self, pattern_keyword: &'a PatternKeyword) {
|
||||
self.visit_node(pattern_keyword);
|
||||
walk_pattern_keyword(self, pattern_keyword);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_elif_else_clause(&mut self, elif_else_clause: &'a ElifElseClause) {
|
||||
self.visit_node(elif_else_clause);
|
||||
walk_elif_else_clause(self, elif_else_clause);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_f_string(&mut self, f_string: &'a FString) {
|
||||
self.visit_node(f_string);
|
||||
walk_f_string(self, f_string);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_interpolated_string_element(
|
||||
&mut self,
|
||||
interpolated_string_element: &'a InterpolatedStringElement,
|
||||
) {
|
||||
self.visit_node(interpolated_string_element);
|
||||
walk_interpolated_string_element(self, interpolated_string_element);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_t_string(&mut self, t_string: &'a TString) {
|
||||
self.visit_node(t_string);
|
||||
walk_t_string(self, t_string);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_string_literal(&mut self, string_literal: &'a StringLiteral) {
|
||||
self.visit_node(string_literal);
|
||||
walk_string_literal(self, string_literal);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_bytes_literal(&mut self, bytes_literal: &'a BytesLiteral) {
|
||||
self.visit_node(bytes_literal);
|
||||
walk_bytes_literal(self, bytes_literal);
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn visit_identifier(&mut self, identifier: &'a Identifier) {
|
||||
self.visit_node(identifier);
|
||||
walk_identifier(self, identifier);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue