From de947deee77c85adcd3483dc76a2ceebcb438d24 Mon Sep 17 00:00:00 2001 From: Alex Waygood Date: Fri, 8 Nov 2024 22:17:56 +0000 Subject: [PATCH] [red-knot] Consolidate detection of cyclically defined classes (#14207) --- crates/red_knot_python_semantic/src/types.rs | 231 ++++++++---------- .../src/types/infer.rs | 42 ++-- .../red_knot_python_semantic/src/types/mro.rs | 88 ++----- 3 files changed, 150 insertions(+), 211 deletions(-) diff --git a/crates/red_knot_python_semantic/src/types.rs b/crates/red_knot_python_semantic/src/types.rs index d43fe7a03f..fc7ffc47e3 100644 --- a/crates/red_knot_python_semantic/src/types.rs +++ b/crates/red_knot_python_semantic/src/types.rs @@ -2349,6 +2349,15 @@ impl<'db> Class<'db> { self.explicit_bases_query(db) } + /// Iterate over this class's explicit bases, filtering out any bases that are not class objects. + fn fully_static_explicit_bases(self, db: &'db dyn Db) -> impl Iterator> { + self.explicit_bases(db) + .iter() + .copied() + .filter_map(Type::into_class_literal) + .map(|ClassLiteralType { class }| class) + } + #[salsa::tracked(return_ref)] fn explicit_bases_query(self, db: &'db dyn Db) -> Box<[Type<'db>]> { let class_stmt = self.node(db); @@ -2448,102 +2457,80 @@ impl<'db> Class<'db> { /// Return the metaclass of this class, or `Unknown` if the metaclass cannot be inferred. pub(crate) fn metaclass(self, db: &'db dyn Db) -> Type<'db> { // TODO: `type[Unknown]` would be a more precise fallback - // (needs support for ) self.try_metaclass(db).unwrap_or(Type::Unknown) } /// Return the metaclass of this class, or an error if the metaclass cannot be inferred. #[salsa::tracked] pub(crate) fn try_metaclass(self, db: &'db dyn Db) -> Result, MetaclassError<'db>> { - /// Infer the metaclass of a class, tracking the classes that have been visited to detect - /// cyclic definitions. - fn infer<'db>( - db: &'db dyn Db, - class: Class<'db>, - seen: &mut SeenSet>, - ) -> Result, MetaclassError<'db>> { - // Recursively infer the metaclass of a class, ensuring that cyclic definitions are - // detected. - let mut safe_recurse = |class: Class<'db>| -> Result, MetaclassError<'db>> { - // Each base must be considered in isolation. - let num_seen = seen.len(); - if !seen.insert(class) { - return Err(MetaclassError { - kind: MetaclassErrorKind::CyclicDefinition, - }); - } - let metaclass = infer(db, class, seen)?; - seen.truncate(num_seen); - Ok(metaclass) - }; + // Identify the class's own metaclass (or take the first base class's metaclass). + let mut base_classes = self.fully_static_explicit_bases(db).peekable(); - let mut base_classes = class - .explicit_bases(db) - .iter() - .copied() - .filter_map(Type::into_class_literal); - - // Identify the class's own metaclass (or take the first base class's metaclass). - let explicit_metaclass = class.explicit_metaclass(db); - let (metaclass, class_metaclass_was_from) = if let Some(metaclass) = explicit_metaclass - { - (metaclass, class) - } else if let Some(base_class) = base_classes.next() { - (safe_recurse(base_class.class)?, base_class.class) - } else { - (KnownClass::Type.to_class(db), class) - }; - - let mut candidate = if let Type::ClassLiteral(metaclass_ty) = metaclass { - MetaclassCandidate { - metaclass: metaclass_ty.class, - explicit_metaclass_of: class_metaclass_was_from, - } - } else { - // TODO: If the metaclass is not a class, we should verify that it's a callable - // which accepts the same arguments as `type.__new__` (otherwise error), and return - // the meta-type of its return type. (And validate that is a class type?) - return Ok(Type::Todo); - }; - - // Reconcile all base classes' metaclasses with the candidate metaclass. + if base_classes.peek().is_some() && self.is_cyclically_defined(db) { + // We emit diagnostics for cyclic class definitions elsewhere. + // Avoid attempting to infer the metaclass if the class is cyclically defined: + // it would be easy to enter an infinite loop. // - // See: - // - https://docs.python.org/3/reference/datamodel.html#determining-the-appropriate-metaclass - // - https://github.com/python/cpython/blob/83ba8c2bba834c0b92de669cac16fcda17485e0e/Objects/typeobject.c#L3629-L3663 - for base_class in base_classes { - let metaclass = safe_recurse(base_class.class)?; - let Type::ClassLiteral(metaclass) = metaclass else { - continue; - }; - if metaclass.class.is_subclass_of(db, candidate.metaclass) { - candidate = MetaclassCandidate { - metaclass: metaclass.class, - explicit_metaclass_of: base_class.class, - }; - continue; - } - if candidate.metaclass.is_subclass_of(db, metaclass.class) { - continue; - } - return Err(MetaclassError { - kind: MetaclassErrorKind::Conflict { - candidate1: candidate, - candidate2: MetaclassCandidate { - metaclass: metaclass.class, - explicit_metaclass_of: base_class.class, - }, - candidate1_is_base_class: explicit_metaclass.is_none(), - }, - }); - } - - Ok(Type::ClassLiteral(ClassLiteralType { - class: candidate.metaclass, - })) + // TODO: `type[Unknown]` might be better here? + return Ok(Type::Unknown); } - infer(db, self, &mut SeenSet::new(self)) + let explicit_metaclass = self.explicit_metaclass(db); + let (metaclass, class_metaclass_was_from) = if let Some(metaclass) = explicit_metaclass { + (metaclass, self) + } else if let Some(base_class) = base_classes.next() { + (base_class.metaclass(db), base_class) + } else { + (KnownClass::Type.to_class(db), self) + }; + + let mut candidate = if let Type::ClassLiteral(metaclass_ty) = metaclass { + MetaclassCandidate { + metaclass: metaclass_ty.class, + explicit_metaclass_of: class_metaclass_was_from, + } + } else { + // TODO: If the metaclass is not a class, we should verify that it's a callable + // which accepts the same arguments as `type.__new__` (otherwise error), and return + // the meta-type of its return type. (And validate that is a class type?) + return Ok(Type::Todo); + }; + + // Reconcile all base classes' metaclasses with the candidate metaclass. + // + // See: + // - https://docs.python.org/3/reference/datamodel.html#determining-the-appropriate-metaclass + // - https://github.com/python/cpython/blob/83ba8c2bba834c0b92de669cac16fcda17485e0e/Objects/typeobject.c#L3629-L3663 + for base_class in base_classes { + let metaclass = base_class.metaclass(db); + let Type::ClassLiteral(metaclass) = metaclass else { + continue; + }; + if metaclass.class.is_subclass_of(db, candidate.metaclass) { + candidate = MetaclassCandidate { + metaclass: metaclass.class, + explicit_metaclass_of: base_class, + }; + continue; + } + if candidate.metaclass.is_subclass_of(db, metaclass.class) { + continue; + } + return Err(MetaclassError { + kind: MetaclassErrorKind::Conflict { + candidate1: candidate, + candidate2: MetaclassCandidate { + metaclass: metaclass.class, + explicit_metaclass_of: base_class, + }, + candidate1_is_base_class: explicit_metaclass.is_none(), + }, + }); + } + + Ok(Type::ClassLiteral(ClassLiteralType { + class: candidate.metaclass, + })) } /// Returns the class member of this class named `name`. @@ -2590,6 +2577,39 @@ impl<'db> Class<'db> { let scope = self.body_scope(db); symbol(db, scope, name) } + + /// Return `true` if this class appears to be a cyclic definition, + /// i.e., it inherits either directly or indirectly from itself. + /// + /// A class definition like this will fail at runtime, + /// but we must be resilient to it or we could panic. + #[salsa::tracked] + fn is_cyclically_defined(self, db: &'db dyn Db) -> bool { + fn is_cyclically_defined_recursive<'db>( + db: &'db dyn Db, + class: Class<'db>, + classes_to_watch: &mut IndexSet>, + ) -> bool { + if !classes_to_watch.insert(class) { + return true; + } + for explicit_base_class in class.fully_static_explicit_bases(db) { + // Each base must be considered in isolation. + // This is due to the fact that if a class uses multiple inheritance, + // there could easily be a situation where two bases have the same class in their MROs; + // that isn't enough to constitute the class being cyclically defined. + let classes_to_watch_len = classes_to_watch.len(); + if is_cyclically_defined_recursive(db, explicit_base_class, classes_to_watch) { + return true; + } + classes_to_watch.truncate(classes_to_watch_len); + } + false + } + + self.fully_static_explicit_bases(db) + .any(|base_class| is_cyclically_defined_recursive(db, base_class, &mut IndexSet::new())) + } } /// Either the explicit `metaclass=` keyword of the class, or the inferred metaclass of one of its base classes. @@ -2599,38 +2619,6 @@ pub(super) struct MetaclassCandidate<'db> { explicit_metaclass_of: Class<'db>, } -/// A utility struct for detecting duplicates in class hierarchies while storing the initial -/// entry on the stack. -#[derive(Debug, Clone, PartialEq, Eq)] -struct SeenSet { - initial: T, - visited: IndexSet, -} - -impl SeenSet { - fn new(initial: T) -> SeenSet { - Self { - initial, - visited: IndexSet::new(), - } - } - - fn len(&self) -> usize { - self.visited.len() - } - - fn truncate(&mut self, len: usize) { - self.visited.truncate(len); - } - - fn insert(&mut self, value: T) -> bool { - if value == self.initial { - return false; - } - self.visited.insert(value) - } -} - /// A singleton type representing a single class object at runtime. #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, salsa::Update)] pub struct ClassLiteralType<'db> { @@ -2714,13 +2702,6 @@ pub(super) enum MetaclassErrorKind<'db> { /// inferred metaclass of a base class. This helps us give better error messages in diagnostics. candidate1_is_base_class: bool, }, - - /// The class inherits from itself! - /// - /// This is very unlikely to happen in working real-world code, - /// but it's important to explicitly account for it. - /// If we don't, there's a possibility of an infinite loop and a panic. - CyclicDefinition, } #[salsa::interned] diff --git a/crates/red_knot_python_semantic/src/types/infer.rs b/crates/red_knot_python_semantic/src/types/infer.rs index 30e9965e73..7f6fbdd6bd 100644 --- a/crates/red_knot_python_semantic/src/types/infer.rs +++ b/crates/red_knot_python_semantic/src/types/infer.rs @@ -457,9 +457,13 @@ impl<'db> TypeInferenceBuilder<'db> { self.check_class_definitions(); } - /// Iterate over all class definitions to check that Python will be able to create a - /// consistent "[method resolution order]" and [metaclass] for each class at runtime. If not, - /// issue a diagnostic. + /// Iterate over all class definitions to check that the definition will not cause an exception + /// to be raised at runtime. This needs to be done after most other types in the scope have been + /// inferred, due to the fact that base classes can be deferred. If it looks like a class + /// definition is invalid in some way, issue a diagnostic. + /// + /// Among the things we check for in this method are whether Python will be able to determine a + /// consistent "[method resolution order]" and [metaclass] for each class. /// /// [method resolution order]: https://docs.python.org/3/glossary.html#term-method-resolution-order /// [metaclass]: https://docs.python.org/3/reference/datamodel.html#metaclasses @@ -471,7 +475,25 @@ impl<'db> TypeInferenceBuilder<'db> { .filter_map(|ty| ty.into_class_literal()) .map(|class_ty| class_ty.class); + // Iterate through all class definitions in this scope. for class in class_definitions { + // (1) Check that the class does not have a cyclic definition + if class.is_cyclically_defined(self.db) { + self.diagnostics.add( + class.node(self.db).into(), + "cyclic-class-def", + format_args!( + "Cyclic definition of `{}` or bases of `{}` (class cannot inherit from itself)", + class.name(self.db), + class.name(self.db) + ), + ); + // Attempting to determine the MRO of a class or if the class has a metaclass conflict + // is impossible if the class is cyclically defined; there's nothing more to do here. + continue; + } + + // (2) Check that the class's MRO is resolvable if let Err(mro_error) = class.try_mro(self.db).as_ref() { match mro_error.reason() { MroErrorKind::DuplicateBases(duplicates) => { @@ -484,15 +506,6 @@ impl<'db> TypeInferenceBuilder<'db> { ); } } - MroErrorKind::CyclicClassDefinition => self.diagnostics.add( - class.node(self.db).into(), - "cyclic-class-def", - format_args!( - "Cyclic definition of `{}` or bases of `{}` (class cannot inherit from itself)", - class.name(self.db), - class.name(self.db) - ), - ), MroErrorKind::InvalidBases(bases) => { let base_nodes = class.node(self.db).bases(); for (index, base_ty) in bases { @@ -518,6 +531,7 @@ impl<'db> TypeInferenceBuilder<'db> { } } + // (3) Check that the class's metaclass can be determined without error. if let Err(metaclass_error) = class.try_metaclass(self.db) { match metaclass_error.reason() { MetaclassErrorKind::Conflict { @@ -565,10 +579,6 @@ impl<'db> TypeInferenceBuilder<'db> { ); } } - MetaclassErrorKind::CyclicDefinition => { - // Cyclic class definition diagnostic will already have been emitted above - // in MRO calculation. - } } } } diff --git a/crates/red_knot_python_semantic/src/types/mro.rs b/crates/red_knot_python_semantic/src/types/mro.rs index 254a6f03fd..8ff2279e36 100644 --- a/crates/red_knot_python_semantic/src/types/mro.rs +++ b/crates/red_knot_python_semantic/src/types/mro.rs @@ -1,7 +1,6 @@ use std::collections::VecDeque; use std::ops::Deref; -use indexmap::IndexSet; use itertools::Either; use rustc_hash::FxHashSet; @@ -26,22 +25,30 @@ impl<'db> Mro<'db> { /// (We emit a diagnostic warning about the runtime `TypeError` in /// [`super::infer::TypeInferenceBuilder::infer_region_scope`].) pub(super) fn of_class(db: &'db dyn Db, class: Class<'db>) -> Result> { - Self::of_class_impl(db, class).map_err(|error_kind| { - let fallback_mro = Self::from([ - ClassBase::Class(class), - ClassBase::Unknown, - ClassBase::object(db), - ]); - MroError { - kind: error_kind, - fallback_mro, - } + Self::of_class_impl(db, class).map_err(|error_kind| MroError { + kind: error_kind, + fallback_mro: Self::from_error(db, class), }) } + pub(super) fn from_error(db: &'db dyn Db, class: Class<'db>) -> Self { + Self::from([ + ClassBase::Class(class), + ClassBase::Unknown, + ClassBase::object(db), + ]) + } + fn of_class_impl(db: &'db dyn Db, class: Class<'db>) -> Result> { let class_bases = class.explicit_bases(db); + if !class_bases.is_empty() && class.is_cyclically_defined(db) { + // We emit errors for cyclically defined classes elsewhere. + // It's important that we don't even try to infer the MRO for a cyclically defined class, + // or we'll end up in an infinite loop. + return Ok(Mro::from_error(db, class)); + } + match class_bases { // `builtins.object` is the special case: // the only class in Python that has an MRO with length <2 @@ -77,11 +84,6 @@ impl<'db> Mro<'db> { Err(MroErrorKind::InvalidBases(bases_info)) }, |single_base| { - if let ClassBase::Class(class_base) = single_base { - if class_is_cyclically_defined(db, class_base) { - return Err(MroErrorKind::CyclicClassDefinition); - } - } let mro = std::iter::once(ClassBase::Class(class)) .chain(single_base.mro(db)) .collect(); @@ -96,10 +98,6 @@ impl<'db> Mro<'db> { // what MRO Python will give this class at runtime // (if an MRO is indeed resolvable at all!) multiple_bases => { - if class_is_cyclically_defined(db, class) { - return Err(MroErrorKind::CyclicClassDefinition); - } - let mut valid_bases = vec![]; let mut invalid_bases = vec![]; @@ -282,13 +280,6 @@ pub(super) enum MroErrorKind<'db> { /// Each index is the index of a node representing an invalid base. InvalidBases(Box<[(usize, Type<'db>)]>), - /// The class inherits from itself! - /// - /// This is very unlikely to happen in working real-world code, - /// but it's important to explicitly account for it. - /// If we don't, there's a possibility of an infinite loop and a panic. - CyclicClassDefinition, - /// The class has one or more duplicate bases. /// /// This variant records the indices and [`Class`]es @@ -461,46 +452,3 @@ fn c3_merge(mut sequences: Vec>) -> Option { } } } - -/// Return `true` if this class appears to be a cyclic definition, -/// i.e., it inherits either directly or indirectly from itself. -/// -/// A class definition like this will fail at runtime, -/// but we must be resilient to it or we could panic. -fn class_is_cyclically_defined(db: &dyn Db, class: Class) -> bool { - fn is_cyclically_defined_recursive<'db>( - db: &'db dyn Db, - class: Class<'db>, - classes_to_watch: &mut IndexSet>, - ) -> bool { - if !classes_to_watch.insert(class) { - return true; - } - for explicit_base_class in class - .explicit_bases(db) - .iter() - .copied() - .filter_map(Type::into_class_literal) - .map(|ClassLiteralType { class }| class) - { - // Each base must be considered in isolation. - // This is due to the fact that if a class uses multiple inheritance, - // there could easily be a situation where two bases have the same class in their MROs; - // that isn't enough to constitute the class being cyclically defined. - let classes_to_watch_len = classes_to_watch.len(); - if is_cyclically_defined_recursive(db, explicit_base_class, classes_to_watch) { - return true; - } - classes_to_watch.truncate(classes_to_watch_len); - } - false - } - - class - .explicit_bases(db) - .iter() - .copied() - .filter_map(Type::into_class_literal) - .map(|ClassLiteralType { class }| class) - .any(|base_class| is_cyclically_defined_recursive(db, base_class, &mut IndexSet::default())) -}