diff --git a/Cargo.lock b/Cargo.lock index aafd0c1f86..10429702ab 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1904,6 +1904,14 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "ruff_index" +version = "0.0.0" +dependencies = [ + "ruff_macros", + "static_assertions", +] + [[package]] name = "ruff_macros" version = "0.0.0" @@ -1965,6 +1973,7 @@ dependencies = [ "bitflags 2.3.1", "is-macro", "nohash-hasher", + "ruff_index", "ruff_python_ast", "ruff_python_stdlib", "ruff_text_size", diff --git a/crates/ruff/src/checkers/ast/mod.rs b/crates/ruff/src/checkers/ast/mod.rs index 7fbc9f4d37..cbc8b58095 100644 --- a/crates/ruff/src/checkers/ast/mod.rs +++ b/crates/ruff/src/checkers/ast/mod.rs @@ -5529,7 +5529,7 @@ impl<'a> Checker<'a> { self.semantic_model .scopes .ancestor_ids(*scope_id) - .flat_map(|scope_id| runtime_imports[usize::from(scope_id)].iter()) + .flat_map(|scope_id| runtime_imports[scope_id.as_usize()].iter()) .copied() .collect() }; diff --git a/crates/ruff_index/Cargo.toml b/crates/ruff_index/Cargo.toml new file mode 100644 index 0000000000..77e1c3fb56 --- /dev/null +++ b/crates/ruff_index/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "ruff_index" +version = "0.0.0" +publish = false +edition = { workspace = true } +rust-version = { workspace = true } + +[lib] + +[dependencies] +ruff_macros = { path = "../ruff_macros" } + +[dev-dependencies] +static_assertions = "1.1.0" diff --git a/crates/ruff_index/src/idx.rs b/crates/ruff_index/src/idx.rs new file mode 100644 index 0000000000..aa7b1f6ac4 --- /dev/null +++ b/crates/ruff_index/src/idx.rs @@ -0,0 +1,60 @@ +use std::hash::Hash; + +/// Represents a newtype wrapper used to index into a Vec or a slice. +/// +/// You can use the [`newtype_index`](crate::newtype_index) macro to define your own index. +pub trait Idx: Copy + PartialEq + Eq + Hash + std::fmt::Debug + 'static { + fn new(value: usize) -> Self; + + fn index(self) -> usize; +} + +#[cfg(test)] +mod tests { + + use crate::newtype_index; + use static_assertions::{assert_eq_size, assert_impl_all}; + + // Allows the macro invocation below to work + use crate as ruff_index; + + #[newtype_index] + #[derive(PartialOrd, Ord)] + struct MyIndex; + + assert_impl_all!(MyIndex: Ord, PartialOrd); + assert_eq_size!(MyIndex, Option); + + #[test] + #[should_panic(expected = "assertion failed: value <= Self::MAX")] + fn from_u32_panics_for_u32_max() { + MyIndex::from_u32(u32::MAX); + } + + #[test] + #[should_panic(expected = "assertion failed: value <= Self::MAX")] + fn from_usize_panics_for_u32_max() { + MyIndex::from_usize(u32::MAX as usize); + } + + #[test] + fn max_value() { + let max_value = MyIndex::from_u32(u32::MAX - 1); + + assert_eq!(max_value.as_u32(), u32::MAX - 1); + } + + #[test] + fn max_value_usize() { + let max_value = MyIndex::from_usize((u32::MAX - 1) as usize); + + assert_eq!(max_value.as_u32(), u32::MAX - 1); + } + + #[test] + fn debug() { + let output = format!("{:?}", MyIndex::from(10u32)); + + assert_eq!(output, "MyIndex(10)"); + } +} diff --git a/crates/ruff_index/src/lib.rs b/crates/ruff_index/src/lib.rs new file mode 100644 index 0000000000..6f7ac59c41 --- /dev/null +++ b/crates/ruff_index/src/lib.rs @@ -0,0 +1,13 @@ +//! Provides new-type wrappers for collections that are indexed by a [`Idx`] rather +//! than `usize`. +//! +//! Inspired by [rustc_index](https://github.com/rust-lang/rust/blob/master/compiler/rustc_index/src/lib.rs). + +mod idx; +mod slice; +mod vec; + +pub use idx::Idx; +pub use ruff_macros::newtype_index; +pub use slice::IndexSlice; +pub use vec::IndexVec; diff --git a/crates/ruff_index/src/slice.rs b/crates/ruff_index/src/slice.rs new file mode 100644 index 0000000000..ddb534ea82 --- /dev/null +++ b/crates/ruff_index/src/slice.rs @@ -0,0 +1,178 @@ +use crate::vec::IndexVec; +use crate::Idx; +use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; +use std::ops::{Index, IndexMut}; + +/// A view into contiguous `T`s, indexed by `I` rather than by `usize`. +#[derive(PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct IndexSlice { + index: PhantomData, + pub raw: [T], +} + +impl IndexSlice { + #[inline] + pub const fn empty() -> &'static Self { + Self::from_raw(&[]) + } + + #[inline] + pub const fn from_raw(raw: &[T]) -> &Self { + let ptr: *const [T] = raw; + + #[allow(unsafe_code)] + // SAFETY: `IndexSlice` is `repr(transparent)` over a normal slice + unsafe { + &*(ptr as *const Self) + } + } + + #[inline] + pub fn from_raw_mut(raw: &mut [T]) -> &mut Self { + let ptr: *mut [T] = raw; + + #[allow(unsafe_code)] + // SAFETY: `IndexSlice` is `repr(transparent)` over a normal slice + unsafe { + &mut *(ptr as *mut Self) + } + } + + #[inline] + pub const fn len(&self) -> usize { + self.raw.len() + } + + #[inline] + pub const fn is_empty(&self) -> bool { + self.raw.is_empty() + } + + #[inline] + pub fn iter(&self) -> std::slice::Iter<'_, T> { + self.raw.iter() + } + + /// Returns an iterator over the indices + #[inline] + pub fn indices( + &self, + ) -> impl DoubleEndedIterator + ExactSizeIterator + Clone + 'static { + (0..self.len()).map(|n| I::new(n)) + } + + #[inline] + pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> { + self.raw.iter_mut() + } + + #[inline] + pub fn last_index(&self) -> Option { + self.len().checked_sub(1).map(I::new) + } + + #[inline] + pub fn swap(&mut self, a: I, b: I) { + self.raw.swap(a.index(), b.index()); + } + + #[inline] + pub fn get(&self, index: I) -> Option<&T> { + self.raw.get(index.index()) + } + + #[inline] + pub fn get_mut(&mut self, index: I) -> Option<&mut T> { + self.raw.get_mut(index.index()) + } + + #[inline] + pub fn binary_search(&self, value: &T) -> Result + where + T: Ord, + { + match self.raw.binary_search(value) { + Ok(i) => Ok(Idx::new(i)), + Err(i) => Err(Idx::new(i)), + } + } +} + +impl Debug for IndexSlice +where + I: Idx, + T: Debug, +{ + fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.raw, fmt) + } +} + +impl Index for IndexSlice { + type Output = T; + + #[inline] + fn index(&self, index: I) -> &T { + &self.raw[index.index()] + } +} + +impl IndexMut for IndexSlice { + #[inline] + fn index_mut(&mut self, index: I) -> &mut T { + &mut self.raw[index.index()] + } +} + +impl<'a, I: Idx, T> IntoIterator for &'a IndexSlice { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + #[inline] + fn into_iter(self) -> std::slice::Iter<'a, T> { + self.raw.iter() + } +} + +impl<'a, I: Idx, T> IntoIterator for &'a mut IndexSlice { + type Item = &'a mut T; + type IntoIter = std::slice::IterMut<'a, T>; + + #[inline] + fn into_iter(self) -> std::slice::IterMut<'a, T> { + self.raw.iter_mut() + } +} + +impl ToOwned for IndexSlice { + type Owned = IndexVec; + + fn to_owned(&self) -> IndexVec { + IndexVec::from_raw(self.raw.to_owned()) + } + + fn clone_into(&self, target: &mut IndexVec) { + self.raw.clone_into(&mut target.raw); + } +} + +impl Default for &IndexSlice { + #[inline] + fn default() -> Self { + IndexSlice::from_raw(Default::default()) + } +} + +impl Default for &mut IndexSlice { + #[inline] + fn default() -> Self { + IndexSlice::from_raw_mut(Default::default()) + } +} + +// Whether `IndexSlice` is `Send` depends only on the data, +// not the phantom data. +#[allow(unsafe_code)] +unsafe impl Send for IndexSlice where T: Send {} diff --git a/crates/ruff_index/src/vec.rs b/crates/ruff_index/src/vec.rs new file mode 100644 index 0000000000..36fa6388ac --- /dev/null +++ b/crates/ruff_index/src/vec.rs @@ -0,0 +1,170 @@ +use crate::slice::IndexSlice; +use crate::Idx; +use std::borrow::{Borrow, BorrowMut}; +use std::fmt::{Debug, Formatter}; +use std::marker::PhantomData; +use std::ops::{Deref, DerefMut, RangeBounds}; + +/// An owned sequence of `T` indexed by `I` +#[derive(Clone, PartialEq, Eq, Hash)] +#[repr(transparent)] +pub struct IndexVec { + pub raw: Vec, + index: PhantomData, +} + +impl IndexVec { + #[inline] + pub fn new() -> Self { + Self { + raw: Vec::new(), + index: PhantomData, + } + } + + #[inline] + pub fn with_capacity(capacity: usize) -> Self { + Self { + raw: Vec::with_capacity(capacity), + index: PhantomData, + } + } + + #[inline] + pub fn from_raw(raw: Vec) -> Self { + Self { + raw, + index: PhantomData, + } + } + + #[inline] + pub fn drain>(&mut self, range: R) -> impl Iterator + '_ { + self.raw.drain(range) + } + + #[inline] + pub fn truncate(&mut self, a: usize) { + self.raw.truncate(a); + } + + #[inline] + pub fn as_slice(&self) -> &IndexSlice { + IndexSlice::from_raw(&self.raw) + } + + #[inline] + pub fn as_mut_slice(&mut self) -> &mut IndexSlice { + IndexSlice::from_raw_mut(&mut self.raw) + } + + #[inline] + pub fn push(&mut self, data: T) -> I { + let index = self.next_index(); + self.raw.push(data); + index + } + + #[inline] + pub fn next_index(&self) -> I { + I::new(self.raw.len()) + } +} + +impl Debug for IndexVec +where + T: Debug, +{ + fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { + std::fmt::Debug::fmt(&self.raw, f) + } +} + +impl Deref for IndexVec { + type Target = IndexSlice; + + fn deref(&self) -> &Self::Target { + self.as_slice() + } +} + +impl DerefMut for IndexVec { + fn deref_mut(&mut self) -> &mut Self::Target { + self.as_mut_slice() + } +} + +impl Borrow> for IndexVec { + fn borrow(&self) -> &IndexSlice { + self + } +} + +impl BorrowMut> for IndexVec { + fn borrow_mut(&mut self) -> &mut IndexSlice { + self + } +} + +impl Extend for IndexVec { + #[inline] + fn extend>(&mut self, iter: Iter) { + self.raw.extend(iter); + } +} + +impl FromIterator for IndexVec { + #[inline] + fn from_iter>(iter: Iter) -> Self { + Self::from_raw(Vec::from_iter(iter)) + } +} + +impl IntoIterator for IndexVec { + type Item = T; + type IntoIter = std::vec::IntoIter; + + #[inline] + fn into_iter(self) -> std::vec::IntoIter { + self.raw.into_iter() + } +} + +impl<'a, I: Idx, T> IntoIterator for &'a IndexVec { + type Item = &'a T; + type IntoIter = std::slice::Iter<'a, T>; + + #[inline] + fn into_iter(self) -> std::slice::Iter<'a, T> { + self.iter() + } +} + +impl<'a, I: Idx, T> IntoIterator for &'a mut IndexVec { + type Item = &'a mut T; + type IntoIter = std::slice::IterMut<'a, T>; + + #[inline] + fn into_iter(self) -> std::slice::IterMut<'a, T> { + self.iter_mut() + } +} + +impl Default for IndexVec { + #[inline] + fn default() -> Self { + IndexVec::new() + } +} + +impl From<[T; N]> for IndexVec { + #[inline] + fn from(array: [T; N]) -> Self { + IndexVec::from_raw(array.into()) + } +} + +// Whether `IndexVec` is `Send` depends only on the data, +// not the phantom data. +#[allow(unsafe_code)] +unsafe impl Send for IndexVec where T: Send {} diff --git a/crates/ruff_macros/Cargo.toml b/crates/ruff_macros/Cargo.toml index b2c9b7ca99..4d95a74a2e 100644 --- a/crates/ruff_macros/Cargo.toml +++ b/crates/ruff_macros/Cargo.toml @@ -12,6 +12,6 @@ doctest = false [dependencies] proc-macro2 = { workspace = true } quote = { workspace = true } -syn = { workspace = true, features = ["derive", "parsing", "extra-traits"] } +syn = { workspace = true, features = ["derive", "parsing", "extra-traits", "full"] } textwrap = { workspace = true } itertools = { workspace = true } diff --git a/crates/ruff_macros/src/lib.rs b/crates/ruff_macros/src/lib.rs index 22ffb939b3..f02a2d1b16 100644 --- a/crates/ruff_macros/src/lib.rs +++ b/crates/ruff_macros/src/lib.rs @@ -1,6 +1,7 @@ //! This crate implements internal macros for the `ruff` library. use crate::cache_key::derive_cache_key; +use crate::newtype_index::generate_newtype_index; use proc_macro::TokenStream; use syn::{parse_macro_input, DeriveInput, ItemFn, ItemStruct}; @@ -9,6 +10,7 @@ mod combine_options; mod config; mod derive_message_formats; mod map_codes; +mod newtype_index; mod register_rules; mod rule_code_prefix; mod rule_namespace; @@ -79,3 +81,33 @@ pub fn derive_message_formats(_attr: TokenStream, item: TokenStream) -> TokenStr let func = parse_macro_input!(item as ItemFn); derive_message_formats::derive_message_formats(&func).into() } + +/// Derives a newtype wrapper that can be used as an index. +/// The wrapper can represent indices up to `u32::MAX - 1`. +/// +/// The `u32::MAX - 1` is an optimization so that `Option` has the same size as `Index`. +/// +/// Can store at most `u32::MAX - 1` values +/// +/// ## Warning +/// +/// Additional `derive` attributes must come AFTER this attribute: +/// +/// Good: +/// +/// ```rust +/// #[newtype_index] +/// #[derive(Ord, PartialOrd)] +/// struct MyIndex; +/// ``` +#[proc_macro_attribute] +pub fn newtype_index(_metadata: TokenStream, input: TokenStream) -> TokenStream { + let item = parse_macro_input!(input as ItemStruct); + + let output = match generate_newtype_index(item) { + Ok(output) => output, + Err(err) => err.to_compile_error(), + }; + + TokenStream::from(output) +} diff --git a/crates/ruff_macros/src/newtype_index.rs b/crates/ruff_macros/src/newtype_index.rs new file mode 100644 index 0000000000..f6524b48a9 --- /dev/null +++ b/crates/ruff_macros/src/newtype_index.rs @@ -0,0 +1,139 @@ +use quote::quote; +use syn::spanned::Spanned; +use syn::{Error, ItemStruct}; + +pub(super) fn generate_newtype_index(item: ItemStruct) -> syn::Result { + if !item.fields.is_empty() { + return Err(Error::new( + item.span(), + "A new type index cannot have any fields.", + )); + } + + if !item.generics.params.is_empty() { + return Err(Error::new( + item.span(), + "A new type index cannot be generic.", + )); + } + + let ItemStruct { + attrs, + vis, + struct_token, + ident, + generics: _, + fields: _, + semi_token, + } = item; + + let debug_name = ident.to_string(); + + let semi_token = semi_token.unwrap_or_default(); + let output = quote! { + #(#attrs)* + #[derive(Copy, Clone, Eq, PartialEq, Hash)] + #vis #struct_token #ident(std::num::NonZeroU32)#semi_token + + impl #ident { + const MAX: u32 = u32::MAX - 1; + + #vis const fn from_usize(value: usize) -> Self { + assert!(value <= Self::MAX as usize); + + // SAFETY: + // * The `value < u32::MAX` guarantees that the add doesn't overflow. + // * The `+ 1` guarantees that the index is not zero + #[allow(unsafe_code)] + Self(unsafe { std::num::NonZeroU32::new_unchecked((value as u32) + 1) }) + } + + #vis const fn from_u32(value: u32) -> Self { + assert!(value <= Self::MAX); + + // SAFETY: + // * The `value < u32::MAX` guarantees that the add doesn't overflow. + // * The `+ 1` guarantees that the index is larger than zero. + #[allow(unsafe_code)] + Self(unsafe { std::num::NonZeroU32::new_unchecked(value + 1) }) + } + + /// Returns the index as a `u32` value + #[inline] + #vis const fn as_u32(self) -> u32 { + self.0.get() - 1 + } + + /// Returns the index as a `u32` value + #[inline] + #vis const fn as_usize(self) -> usize { + self.as_u32() as usize + } + + #[inline] + #vis const fn index(self) -> usize { + self.as_usize() + } + } + + impl std::ops::Add for #ident { + type Output = #ident; + + fn add(self, rhs: usize) -> Self::Output { + #ident::from_usize(self.index() + rhs) + } + } + + impl std::ops::Add for #ident { + type Output = #ident; + + fn add(self, rhs: Self) -> Self::Output { + #ident::from_usize(self.index() + rhs.index()) + } + } + + impl std::fmt::Debug for #ident { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + f.debug_tuple(#debug_name).field(&self.index()).finish() + } + } + + impl ruff_index::Idx for #ident { + #[inline] + fn new(value: usize) -> Self { + #ident::from_usize(value) + } + + #[inline] + fn index(self) -> usize { + self.index() + } + } + + impl From for #ident { + fn from(value: usize) -> Self { + #ident::from_usize(value) + } + } + + impl From for #ident { + fn from(value: u32) -> Self { + #ident::from_u32(value) + } + } + + impl From<#ident> for usize { + fn from(value: #ident) -> Self { + value.as_usize() + } + } + + impl From<#ident> for u32 { + fn from(value: #ident) -> Self { + value.as_u32() + } + } + }; + + Ok(output) +} diff --git a/crates/ruff_python_semantic/Cargo.toml b/crates/ruff_python_semantic/Cargo.toml index 87ac6230c0..d0fa45debd 100644 --- a/crates/ruff_python_semantic/Cargo.toml +++ b/crates/ruff_python_semantic/Cargo.toml @@ -11,6 +11,7 @@ rust-version = { workspace = true } ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_stdlib = { path = "../ruff_python_stdlib" } ruff_text_size = { workspace = true } +ruff_index = { path = "../ruff_index" } bitflags = { workspace = true } is-macro = { workspace = true } diff --git a/crates/ruff_python_semantic/src/binding.rs b/crates/ruff_python_semantic/src/binding.rs index b8a8ad24c3..2e20ef7a8e 100644 --- a/crates/ruff_python_semantic/src/binding.rs +++ b/crates/ruff_python_semantic/src/binding.rs @@ -1,8 +1,8 @@ -use std::num::TryFromIntError; -use std::ops::{Deref, Index, IndexMut}; +use std::ops::{Deref, DerefMut}; use crate::model::SemanticModel; use bitflags::bitflags; +use ruff_index::{newtype_index, IndexSlice, IndexVec}; use ruff_python_ast::helpers; use ruff_python_ast::source_code::Locator; use ruff_text_size::TextRange; @@ -130,16 +130,8 @@ impl<'a> Binding<'a> { /// Using a `u32` to identify [Binding]s should is sufficient because Ruff only supports documents with a /// size smaller than or equal to `u32::max`. A document with the size of `u32::max` must have fewer than `u32::max` /// bindings because bindings must be separated by whitespace (and have an assignment). -#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] -pub struct BindingId(u32); - -impl TryFrom for BindingId { - type Error = TryFromIntError; - - fn try_from(value: usize) -> Result { - Ok(Self(u32::try_from(value)?)) - } -} +#[newtype_index] +pub struct BindingId; impl nohash_hasher::IsEnabled for BindingId {} @@ -147,53 +139,37 @@ impl nohash_hasher::IsEnabled for BindingId {} /// /// Bindings are indexed by [`BindingId`] #[derive(Debug, Clone, Default)] -pub struct Bindings<'a>(Vec>); +pub struct Bindings<'a>(IndexVec>); impl<'a> Bindings<'a> { /// Pushes a new binding and returns its id pub fn push(&mut self, binding: Binding<'a>) -> BindingId { - let id = self.next_id(); - self.0.push(binding); - id + self.0.push(binding) } /// Returns the id that will be assigned when pushing the next binding pub fn next_id(&self) -> BindingId { - BindingId::try_from(self.0.len()).unwrap() - } -} - -impl<'a> Index for Bindings<'a> { - type Output = Binding<'a>; - - fn index(&self, index: BindingId) -> &Self::Output { - &self.0[usize::from(index)] - } -} - -impl<'a> IndexMut for Bindings<'a> { - fn index_mut(&mut self, index: BindingId) -> &mut Self::Output { - &mut self.0[usize::from(index)] + self.0.next_index() } } impl<'a> Deref for Bindings<'a> { - type Target = [Binding<'a>]; + type Target = IndexSlice>; fn deref(&self) -> &Self::Target { &self.0 } } -impl<'a> FromIterator> for Bindings<'a> { - fn from_iter>>(iter: T) -> Self { - Self(Vec::from_iter(iter)) +impl<'a> DerefMut for Bindings<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 } } -impl From for usize { - fn from(value: BindingId) -> Self { - value.0 as usize +impl<'a> FromIterator> for Bindings<'a> { + fn from_iter>>(iter: T) -> Self { + Self(IndexVec::from_iter(iter)) } } diff --git a/crates/ruff_python_semantic/src/definition.rs b/crates/ruff_python_semantic/src/definition.rs index e9daf9feec..df75490f5e 100644 --- a/crates/ruff_python_semantic/src/definition.rs +++ b/crates/ruff_python_semantic/src/definition.rs @@ -2,9 +2,9 @@ //! can be documented, such as a module, class, or function. use std::fmt::Debug; -use std::num::TryFromIntError; -use std::ops::{Deref, Index}; +use std::ops::Deref; +use ruff_index::{newtype_index, IndexSlice, IndexVec}; use rustpython_parser::ast::{self, Stmt}; use crate::analyze::visibility::{ @@ -12,28 +12,14 @@ use crate::analyze::visibility::{ }; /// Id uniquely identifying a definition in a program. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] -pub struct DefinitionId(u32); +#[newtype_index] +pub struct DefinitionId; impl DefinitionId { /// Returns the ID for the module definition. #[inline] pub const fn module() -> Self { - DefinitionId(0) - } -} - -impl TryFrom for DefinitionId { - type Error = TryFromIntError; - - fn try_from(value: usize) -> Result { - Ok(Self(u32::try_from(value)?)) - } -} - -impl From for usize { - fn from(value: DefinitionId) -> Self { - value.0 as usize + DefinitionId::from_u32(0) } } @@ -118,11 +104,11 @@ impl Definition<'_> { /// The definitions within a Python program indexed by [`DefinitionId`]. #[derive(Debug, Default)] -pub struct Definitions<'a>(Vec>); +pub struct Definitions<'a>(IndexVec>); impl<'a> Definitions<'a> { pub fn for_module(definition: Module<'a>) -> Self { - Self(vec![Definition::Module(definition)]) + Self(IndexVec::from_raw(vec![Definition::Module(definition)])) } /// Pushes a new member definition and returns its unique id. @@ -130,14 +116,13 @@ impl<'a> Definitions<'a> { /// Members are assumed to be pushed in traversal order, such that parents are pushed before /// their children. pub fn push_member(&mut self, member: Member<'a>) -> DefinitionId { - let next_id = DefinitionId::try_from(self.0.len()).unwrap(); - self.0.push(Definition::Member(member)); - next_id + self.0.push(Definition::Member(member)) } /// Resolve the visibility of each definition in the collection. pub fn resolve(self, exports: Option<&[&str]>) -> ContextualizedDefinitions<'a> { - let mut definitions: Vec> = Vec::with_capacity(self.len()); + let mut definitions: IndexVec> = + IndexVec::with_capacity(self.len()); for definition in self { // Determine the visibility of the next definition, taking into account its parent's @@ -147,7 +132,7 @@ impl<'a> Definitions<'a> { Definition::Module(module) => module.source.to_visibility(), Definition::Member(member) => match member.kind { MemberKind::Class => { - let parent = &definitions[usize::from(member.parent)]; + let parent = &definitions[member.parent]; if parent.visibility.is_private() || exports .map_or(false, |exports| !exports.contains(&member.name())) @@ -158,7 +143,7 @@ impl<'a> Definitions<'a> { } } MemberKind::NestedClass => { - let parent = &definitions[usize::from(member.parent)]; + let parent = &definitions[member.parent]; if parent.visibility.is_private() || matches!( parent.definition, @@ -176,7 +161,7 @@ impl<'a> Definitions<'a> { } } MemberKind::Function => { - let parent = &definitions[usize::from(member.parent)]; + let parent = &definitions[member.parent]; if parent.visibility.is_private() || exports .map_or(false, |exports| !exports.contains(&member.name())) @@ -188,7 +173,7 @@ impl<'a> Definitions<'a> { } MemberKind::NestedFunction => Visibility::Private, MemberKind::Method => { - let parent = &definitions[usize::from(member.parent)]; + let parent = &definitions[member.parent]; if parent.visibility.is_private() { Visibility::Private } else { @@ -204,20 +189,13 @@ impl<'a> Definitions<'a> { }); } - ContextualizedDefinitions(definitions) - } -} - -impl<'a> Index for Definitions<'a> { - type Output = Definition<'a>; - - fn index(&self, index: DefinitionId) -> &Self::Output { - &self.0[usize::from(index)] + ContextualizedDefinitions(definitions.raw) } } impl<'a> Deref for Definitions<'a> { - type Target = [Definition<'a>]; + type Target = IndexSlice>; + fn deref(&self) -> &Self::Target { &self.0 } diff --git a/crates/ruff_python_semantic/src/node.rs b/crates/ruff_python_semantic/src/node.rs index bc3dbba7d5..dd71fd5f1d 100644 --- a/crates/ruff_python_semantic/src/node.rs +++ b/crates/ruff_python_semantic/src/node.rs @@ -1,6 +1,6 @@ -use std::num::{NonZeroU32, TryFromIntError}; use std::ops::{Index, IndexMut}; +use ruff_index::{newtype_index, IndexVec}; use rustc_hash::FxHashMap; use rustpython_parser::ast::Stmt; @@ -11,24 +11,9 @@ use ruff_python_ast::types::RefEquality; /// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max` /// and it is impossible to have more statements than characters in the file. We use a `NonZeroU32` to /// take advantage of memory layout optimizations. -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] -pub struct NodeId(NonZeroU32); - -/// Convert a `usize` to a `NodeId` (by adding 1 to the value, and casting to `NonZeroU32`). -impl TryFrom for NodeId { - type Error = TryFromIntError; - - fn try_from(value: usize) -> Result { - Ok(Self(NonZeroU32::try_from(u32::try_from(value)? + 1)?)) - } -} - -/// Convert a `NodeId` to a `usize` (by subtracting 1 from the value, and casting to `usize`). -impl From for usize { - fn from(value: NodeId) -> Self { - value.0.get() as usize - 1 - } -} +#[newtype_index] +#[derive(Ord, PartialOrd)] +pub struct NodeId; /// A [`Node`] represents a statement in a program, along with a pointer to its parent (if any). #[derive(Debug)] @@ -44,7 +29,7 @@ struct Node<'a> { /// The nodes of a program indexed by [`NodeId`] #[derive(Debug, Default)] pub struct Nodes<'a> { - nodes: Vec>, + nodes: IndexVec>, node_to_id: FxHashMap, NodeId>, } @@ -53,16 +38,15 @@ impl<'a> Nodes<'a> { /// /// Panics if a node with the same pointer already exists. pub fn insert(&mut self, stmt: &'a Stmt, parent: Option) -> NodeId { - let next_id = NodeId::try_from(self.nodes.len()).unwrap(); + let next_id = self.nodes.next_index(); if let Some(existing_id) = self.node_to_id.insert(RefEquality(stmt), next_id) { panic!("Node already exists with id {existing_id:?}"); } self.nodes.push(Node { stmt, parent, - depth: parent.map_or(0, |parent| self.nodes[usize::from(parent)].depth + 1), - }); - next_id + depth: parent.map_or(0, |parent| self.nodes[parent].depth + 1), + }) } /// Returns the [`NodeId`] of the given node. @@ -74,26 +58,24 @@ impl<'a> Nodes<'a> { /// Return the [`NodeId`] of the parent node. #[inline] pub fn parent_id(&self, node_id: NodeId) -> Option { - self.nodes[usize::from(node_id)].parent + self.nodes[node_id].parent } /// Return the depth of the node. #[inline] pub fn depth(&self, node_id: NodeId) -> u32 { - self.nodes[usize::from(node_id)].depth + self.nodes[node_id].depth } /// Returns an iterator over all [`NodeId`] ancestors, starting from the given [`NodeId`]. pub fn ancestor_ids(&self, node_id: NodeId) -> impl Iterator + '_ { - std::iter::successors(Some(node_id), |&node_id| { - self.nodes[usize::from(node_id)].parent - }) + std::iter::successors(Some(node_id), |&node_id| self.nodes[node_id].parent) } /// Return the parent of the given node. pub fn parent(&self, node: &'a Stmt) -> Option<&'a Stmt> { let node_id = self.node_to_id.get(&RefEquality(node))?; - let parent_id = self.nodes[usize::from(*node_id)].parent?; + let parent_id = self.nodes[*node_id].parent?; Some(self[parent_id]) } } @@ -101,13 +83,15 @@ impl<'a> Nodes<'a> { impl<'a> Index for Nodes<'a> { type Output = &'a Stmt; + #[inline] fn index(&self, index: NodeId) -> &Self::Output { - &self.nodes[usize::from(index)].stmt + &self.nodes[index].stmt } } impl<'a> IndexMut for Nodes<'a> { + #[inline] fn index_mut(&mut self, index: NodeId) -> &mut Self::Output { - &mut self.nodes[usize::from(index)].stmt + &mut self.nodes[index].stmt } } diff --git a/crates/ruff_python_semantic/src/scope.rs b/crates/ruff_python_semantic/src/scope.rs index 6a9f192619..bd71596ce6 100644 --- a/crates/ruff_python_semantic/src/scope.rs +++ b/crates/ruff_python_semantic/src/scope.rs @@ -1,6 +1,6 @@ -use std::num::TryFromIntError; -use std::ops::{Deref, Index, IndexMut}; +use std::ops::{Deref, DerefMut}; +use ruff_index::{newtype_index, Idx, IndexSlice, IndexVec}; use rustc_hash::FxHashMap; use rustpython_parser::ast::{Arguments, Expr, Keyword, Stmt}; @@ -151,39 +151,25 @@ pub struct Lambda<'a> { /// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max` /// and it is impossible to have more scopes than characters in the file (because defining a function or class /// requires more than one character). -#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] -pub struct ScopeId(u32); +#[newtype_index] +pub struct ScopeId; impl ScopeId { /// Returns the ID for the global scope #[inline] pub const fn global() -> Self { - ScopeId(0) + ScopeId::from_u32(0) } /// Returns `true` if this is the id of the global scope pub const fn is_global(&self) -> bool { - self.0 == 0 - } -} - -impl TryFrom for ScopeId { - type Error = TryFromIntError; - - fn try_from(value: usize) -> Result { - Ok(Self(u32::try_from(value)?)) - } -} - -impl From for usize { - fn from(value: ScopeId) -> Self { - value.0 as usize + self.index() == 0 } } /// The scopes of a program indexed by [`ScopeId`] #[derive(Debug)] -pub struct Scopes<'a>(Vec>); +pub struct Scopes<'a>(IndexVec>); impl<'a> Scopes<'a> { /// Returns a reference to the global scope @@ -198,7 +184,7 @@ impl<'a> Scopes<'a> { /// Pushes a new scope and returns its unique id pub fn push_scope(&mut self, kind: ScopeKind<'a>, parent: ScopeId) -> ScopeId { - let next_id = ScopeId::try_from(self.0.len()).unwrap(); + let next_id = ScopeId::new(self.0.len()); self.0.push(Scope::local(kind, parent)); next_id } @@ -218,27 +204,19 @@ impl<'a> Scopes<'a> { impl Default for Scopes<'_> { fn default() -> Self { - Self(vec![Scope::global()]) - } -} - -impl<'a> Index for Scopes<'a> { - type Output = Scope<'a>; - - fn index(&self, index: ScopeId) -> &Self::Output { - &self.0[usize::from(index)] - } -} - -impl<'a> IndexMut for Scopes<'a> { - fn index_mut(&mut self, index: ScopeId) -> &mut Self::Output { - &mut self.0[usize::from(index)] + Self(IndexVec::from_raw(vec![Scope::global()])) } } impl<'a> Deref for Scopes<'a> { - type Target = [Scope<'a>]; + type Target = IndexSlice>; fn deref(&self) -> &Self::Target { &self.0 } } + +impl<'a> DerefMut for Scopes<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +}