Introduce ruff_index crate (#4597)

This commit is contained in:
Micha Reiser 2023-05-23 17:40:35 +02:00 committed by GitHub
parent 04d273bcc7
commit 652c644c2a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 681 additions and 149 deletions

9
Cargo.lock generated
View file

@ -1904,6 +1904,14 @@ dependencies = [
"unicode-width", "unicode-width",
] ]
[[package]]
name = "ruff_index"
version = "0.0.0"
dependencies = [
"ruff_macros",
"static_assertions",
]
[[package]] [[package]]
name = "ruff_macros" name = "ruff_macros"
version = "0.0.0" version = "0.0.0"
@ -1965,6 +1973,7 @@ dependencies = [
"bitflags 2.3.1", "bitflags 2.3.1",
"is-macro", "is-macro",
"nohash-hasher", "nohash-hasher",
"ruff_index",
"ruff_python_ast", "ruff_python_ast",
"ruff_python_stdlib", "ruff_python_stdlib",
"ruff_text_size", "ruff_text_size",

View file

@ -5529,7 +5529,7 @@ impl<'a> Checker<'a> {
self.semantic_model self.semantic_model
.scopes .scopes
.ancestor_ids(*scope_id) .ancestor_ids(*scope_id)
.flat_map(|scope_id| runtime_imports[usize::from(scope_id)].iter()) .flat_map(|scope_id| runtime_imports[scope_id.as_usize()].iter())
.copied() .copied()
.collect() .collect()
}; };

View file

@ -0,0 +1,14 @@
[package]
name = "ruff_index"
version = "0.0.0"
publish = false
edition = { workspace = true }
rust-version = { workspace = true }
[lib]
[dependencies]
ruff_macros = { path = "../ruff_macros" }
[dev-dependencies]
static_assertions = "1.1.0"

View file

@ -0,0 +1,60 @@
use std::hash::Hash;
/// Represents a newtype wrapper used to index into a Vec or a slice.
///
/// You can use the [`newtype_index`](crate::newtype_index) macro to define your own index.
pub trait Idx: Copy + PartialEq + Eq + Hash + std::fmt::Debug + 'static {
fn new(value: usize) -> Self;
fn index(self) -> usize;
}
#[cfg(test)]
mod tests {
use crate::newtype_index;
use static_assertions::{assert_eq_size, assert_impl_all};
// Allows the macro invocation below to work
use crate as ruff_index;
#[newtype_index]
#[derive(PartialOrd, Ord)]
struct MyIndex;
assert_impl_all!(MyIndex: Ord, PartialOrd);
assert_eq_size!(MyIndex, Option<MyIndex>);
#[test]
#[should_panic(expected = "assertion failed: value <= Self::MAX")]
fn from_u32_panics_for_u32_max() {
MyIndex::from_u32(u32::MAX);
}
#[test]
#[should_panic(expected = "assertion failed: value <= Self::MAX")]
fn from_usize_panics_for_u32_max() {
MyIndex::from_usize(u32::MAX as usize);
}
#[test]
fn max_value() {
let max_value = MyIndex::from_u32(u32::MAX - 1);
assert_eq!(max_value.as_u32(), u32::MAX - 1);
}
#[test]
fn max_value_usize() {
let max_value = MyIndex::from_usize((u32::MAX - 1) as usize);
assert_eq!(max_value.as_u32(), u32::MAX - 1);
}
#[test]
fn debug() {
let output = format!("{:?}", MyIndex::from(10u32));
assert_eq!(output, "MyIndex(10)");
}
}

View file

@ -0,0 +1,13 @@
//! Provides new-type wrappers for collections that are indexed by a [`Idx`] rather
//! than `usize`.
//!
//! Inspired by [rustc_index](https://github.com/rust-lang/rust/blob/master/compiler/rustc_index/src/lib.rs).
mod idx;
mod slice;
mod vec;
pub use idx::Idx;
pub use ruff_macros::newtype_index;
pub use slice::IndexSlice;
pub use vec::IndexVec;

View file

@ -0,0 +1,178 @@
use crate::vec::IndexVec;
use crate::Idx;
use std::fmt::{Debug, Formatter};
use std::marker::PhantomData;
use std::ops::{Index, IndexMut};
/// A view into contiguous `T`s, indexed by `I` rather than by `usize`.
#[derive(PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct IndexSlice<I, T> {
index: PhantomData<I>,
pub raw: [T],
}
impl<I: Idx, T> IndexSlice<I, T> {
#[inline]
pub const fn empty() -> &'static Self {
Self::from_raw(&[])
}
#[inline]
pub const fn from_raw(raw: &[T]) -> &Self {
let ptr: *const [T] = raw;
#[allow(unsafe_code)]
// SAFETY: `IndexSlice` is `repr(transparent)` over a normal slice
unsafe {
&*(ptr as *const Self)
}
}
#[inline]
pub fn from_raw_mut(raw: &mut [T]) -> &mut Self {
let ptr: *mut [T] = raw;
#[allow(unsafe_code)]
// SAFETY: `IndexSlice` is `repr(transparent)` over a normal slice
unsafe {
&mut *(ptr as *mut Self)
}
}
#[inline]
pub const fn len(&self) -> usize {
self.raw.len()
}
#[inline]
pub const fn is_empty(&self) -> bool {
self.raw.is_empty()
}
#[inline]
pub fn iter(&self) -> std::slice::Iter<'_, T> {
self.raw.iter()
}
/// Returns an iterator over the indices
#[inline]
pub fn indices(
&self,
) -> impl DoubleEndedIterator<Item = I> + ExactSizeIterator + Clone + 'static {
(0..self.len()).map(|n| I::new(n))
}
#[inline]
pub fn iter_mut(&mut self) -> std::slice::IterMut<'_, T> {
self.raw.iter_mut()
}
#[inline]
pub fn last_index(&self) -> Option<I> {
self.len().checked_sub(1).map(I::new)
}
#[inline]
pub fn swap(&mut self, a: I, b: I) {
self.raw.swap(a.index(), b.index());
}
#[inline]
pub fn get(&self, index: I) -> Option<&T> {
self.raw.get(index.index())
}
#[inline]
pub fn get_mut(&mut self, index: I) -> Option<&mut T> {
self.raw.get_mut(index.index())
}
#[inline]
pub fn binary_search(&self, value: &T) -> Result<I, I>
where
T: Ord,
{
match self.raw.binary_search(value) {
Ok(i) => Ok(Idx::new(i)),
Err(i) => Err(Idx::new(i)),
}
}
}
impl<I, T> Debug for IndexSlice<I, T>
where
I: Idx,
T: Debug,
{
fn fmt(&self, fmt: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.raw, fmt)
}
}
impl<I: Idx, T> Index<I> for IndexSlice<I, T> {
type Output = T;
#[inline]
fn index(&self, index: I) -> &T {
&self.raw[index.index()]
}
}
impl<I: Idx, T> IndexMut<I> for IndexSlice<I, T> {
#[inline]
fn index_mut(&mut self, index: I) -> &mut T {
&mut self.raw[index.index()]
}
}
impl<'a, I: Idx, T> IntoIterator for &'a IndexSlice<I, T> {
type Item = &'a T;
type IntoIter = std::slice::Iter<'a, T>;
#[inline]
fn into_iter(self) -> std::slice::Iter<'a, T> {
self.raw.iter()
}
}
impl<'a, I: Idx, T> IntoIterator for &'a mut IndexSlice<I, T> {
type Item = &'a mut T;
type IntoIter = std::slice::IterMut<'a, T>;
#[inline]
fn into_iter(self) -> std::slice::IterMut<'a, T> {
self.raw.iter_mut()
}
}
impl<I: Idx, T: Clone> ToOwned for IndexSlice<I, T> {
type Owned = IndexVec<I, T>;
fn to_owned(&self) -> IndexVec<I, T> {
IndexVec::from_raw(self.raw.to_owned())
}
fn clone_into(&self, target: &mut IndexVec<I, T>) {
self.raw.clone_into(&mut target.raw);
}
}
impl<I: Idx, T> Default for &IndexSlice<I, T> {
#[inline]
fn default() -> Self {
IndexSlice::from_raw(Default::default())
}
}
impl<I: Idx, T> Default for &mut IndexSlice<I, T> {
#[inline]
fn default() -> Self {
IndexSlice::from_raw_mut(Default::default())
}
}
// Whether `IndexSlice` is `Send` depends only on the data,
// not the phantom data.
#[allow(unsafe_code)]
unsafe impl<I: Idx, T> Send for IndexSlice<I, T> where T: Send {}

View file

@ -0,0 +1,170 @@
use crate::slice::IndexSlice;
use crate::Idx;
use std::borrow::{Borrow, BorrowMut};
use std::fmt::{Debug, Formatter};
use std::marker::PhantomData;
use std::ops::{Deref, DerefMut, RangeBounds};
/// An owned sequence of `T` indexed by `I`
#[derive(Clone, PartialEq, Eq, Hash)]
#[repr(transparent)]
pub struct IndexVec<I, T> {
pub raw: Vec<T>,
index: PhantomData<I>,
}
impl<I: Idx, T> IndexVec<I, T> {
#[inline]
pub fn new() -> Self {
Self {
raw: Vec::new(),
index: PhantomData,
}
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
raw: Vec::with_capacity(capacity),
index: PhantomData,
}
}
#[inline]
pub fn from_raw(raw: Vec<T>) -> Self {
Self {
raw,
index: PhantomData,
}
}
#[inline]
pub fn drain<R: RangeBounds<usize>>(&mut self, range: R) -> impl Iterator<Item = T> + '_ {
self.raw.drain(range)
}
#[inline]
pub fn truncate(&mut self, a: usize) {
self.raw.truncate(a);
}
#[inline]
pub fn as_slice(&self) -> &IndexSlice<I, T> {
IndexSlice::from_raw(&self.raw)
}
#[inline]
pub fn as_mut_slice(&mut self) -> &mut IndexSlice<I, T> {
IndexSlice::from_raw_mut(&mut self.raw)
}
#[inline]
pub fn push(&mut self, data: T) -> I {
let index = self.next_index();
self.raw.push(data);
index
}
#[inline]
pub fn next_index(&self) -> I {
I::new(self.raw.len())
}
}
impl<I, T> Debug for IndexVec<I, T>
where
T: Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
std::fmt::Debug::fmt(&self.raw, f)
}
}
impl<I: Idx, T> Deref for IndexVec<I, T> {
type Target = IndexSlice<I, T>;
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl<I: Idx, T> DerefMut for IndexVec<I, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.as_mut_slice()
}
}
impl<I: Idx, T> Borrow<IndexSlice<I, T>> for IndexVec<I, T> {
fn borrow(&self) -> &IndexSlice<I, T> {
self
}
}
impl<I: Idx, T> BorrowMut<IndexSlice<I, T>> for IndexVec<I, T> {
fn borrow_mut(&mut self) -> &mut IndexSlice<I, T> {
self
}
}
impl<I, T> Extend<T> for IndexVec<I, T> {
#[inline]
fn extend<Iter: IntoIterator<Item = T>>(&mut self, iter: Iter) {
self.raw.extend(iter);
}
}
impl<I: Idx, T> FromIterator<T> for IndexVec<I, T> {
#[inline]
fn from_iter<Iter: IntoIterator<Item = T>>(iter: Iter) -> Self {
Self::from_raw(Vec::from_iter(iter))
}
}
impl<I: Idx, T> IntoIterator for IndexVec<I, T> {
type Item = T;
type IntoIter = std::vec::IntoIter<T>;
#[inline]
fn into_iter(self) -> std::vec::IntoIter<T> {
self.raw.into_iter()
}
}
impl<'a, I: Idx, T> IntoIterator for &'a IndexVec<I, T> {
type Item = &'a T;
type IntoIter = std::slice::Iter<'a, T>;
#[inline]
fn into_iter(self) -> std::slice::Iter<'a, T> {
self.iter()
}
}
impl<'a, I: Idx, T> IntoIterator for &'a mut IndexVec<I, T> {
type Item = &'a mut T;
type IntoIter = std::slice::IterMut<'a, T>;
#[inline]
fn into_iter(self) -> std::slice::IterMut<'a, T> {
self.iter_mut()
}
}
impl<I: Idx, T> Default for IndexVec<I, T> {
#[inline]
fn default() -> Self {
IndexVec::new()
}
}
impl<I: Idx, T, const N: usize> From<[T; N]> for IndexVec<I, T> {
#[inline]
fn from(array: [T; N]) -> Self {
IndexVec::from_raw(array.into())
}
}
// Whether `IndexVec` is `Send` depends only on the data,
// not the phantom data.
#[allow(unsafe_code)]
unsafe impl<I: Idx, T> Send for IndexVec<I, T> where T: Send {}

View file

@ -12,6 +12,6 @@ doctest = false
[dependencies] [dependencies]
proc-macro2 = { workspace = true } proc-macro2 = { workspace = true }
quote = { workspace = true } quote = { workspace = true }
syn = { workspace = true, features = ["derive", "parsing", "extra-traits"] } syn = { workspace = true, features = ["derive", "parsing", "extra-traits", "full"] }
textwrap = { workspace = true } textwrap = { workspace = true }
itertools = { workspace = true } itertools = { workspace = true }

View file

@ -1,6 +1,7 @@
//! This crate implements internal macros for the `ruff` library. //! This crate implements internal macros for the `ruff` library.
use crate::cache_key::derive_cache_key; use crate::cache_key::derive_cache_key;
use crate::newtype_index::generate_newtype_index;
use proc_macro::TokenStream; use proc_macro::TokenStream;
use syn::{parse_macro_input, DeriveInput, ItemFn, ItemStruct}; use syn::{parse_macro_input, DeriveInput, ItemFn, ItemStruct};
@ -9,6 +10,7 @@ mod combine_options;
mod config; mod config;
mod derive_message_formats; mod derive_message_formats;
mod map_codes; mod map_codes;
mod newtype_index;
mod register_rules; mod register_rules;
mod rule_code_prefix; mod rule_code_prefix;
mod rule_namespace; mod rule_namespace;
@ -79,3 +81,33 @@ pub fn derive_message_formats(_attr: TokenStream, item: TokenStream) -> TokenStr
let func = parse_macro_input!(item as ItemFn); let func = parse_macro_input!(item as ItemFn);
derive_message_formats::derive_message_formats(&func).into() derive_message_formats::derive_message_formats(&func).into()
} }
/// Derives a newtype wrapper that can be used as an index.
/// The wrapper can represent indices up to `u32::MAX - 1`.
///
/// The `u32::MAX - 1` is an optimization so that `Option<Index>` has the same size as `Index`.
///
/// Can store at most `u32::MAX - 1` values
///
/// ## Warning
///
/// Additional `derive` attributes must come AFTER this attribute:
///
/// Good:
///
/// ```rust
/// #[newtype_index]
/// #[derive(Ord, PartialOrd)]
/// struct MyIndex;
/// ```
#[proc_macro_attribute]
pub fn newtype_index(_metadata: TokenStream, input: TokenStream) -> TokenStream {
let item = parse_macro_input!(input as ItemStruct);
let output = match generate_newtype_index(item) {
Ok(output) => output,
Err(err) => err.to_compile_error(),
};
TokenStream::from(output)
}

View file

@ -0,0 +1,139 @@
use quote::quote;
use syn::spanned::Spanned;
use syn::{Error, ItemStruct};
pub(super) fn generate_newtype_index(item: ItemStruct) -> syn::Result<proc_macro2::TokenStream> {
if !item.fields.is_empty() {
return Err(Error::new(
item.span(),
"A new type index cannot have any fields.",
));
}
if !item.generics.params.is_empty() {
return Err(Error::new(
item.span(),
"A new type index cannot be generic.",
));
}
let ItemStruct {
attrs,
vis,
struct_token,
ident,
generics: _,
fields: _,
semi_token,
} = item;
let debug_name = ident.to_string();
let semi_token = semi_token.unwrap_or_default();
let output = quote! {
#(#attrs)*
#[derive(Copy, Clone, Eq, PartialEq, Hash)]
#vis #struct_token #ident(std::num::NonZeroU32)#semi_token
impl #ident {
const MAX: u32 = u32::MAX - 1;
#vis const fn from_usize(value: usize) -> Self {
assert!(value <= Self::MAX as usize);
// SAFETY:
// * The `value < u32::MAX` guarantees that the add doesn't overflow.
// * The `+ 1` guarantees that the index is not zero
#[allow(unsafe_code)]
Self(unsafe { std::num::NonZeroU32::new_unchecked((value as u32) + 1) })
}
#vis const fn from_u32(value: u32) -> Self {
assert!(value <= Self::MAX);
// SAFETY:
// * The `value < u32::MAX` guarantees that the add doesn't overflow.
// * The `+ 1` guarantees that the index is larger than zero.
#[allow(unsafe_code)]
Self(unsafe { std::num::NonZeroU32::new_unchecked(value + 1) })
}
/// Returns the index as a `u32` value
#[inline]
#vis const fn as_u32(self) -> u32 {
self.0.get() - 1
}
/// Returns the index as a `u32` value
#[inline]
#vis const fn as_usize(self) -> usize {
self.as_u32() as usize
}
#[inline]
#vis const fn index(self) -> usize {
self.as_usize()
}
}
impl std::ops::Add<usize> for #ident {
type Output = #ident;
fn add(self, rhs: usize) -> Self::Output {
#ident::from_usize(self.index() + rhs)
}
}
impl std::ops::Add for #ident {
type Output = #ident;
fn add(self, rhs: Self) -> Self::Output {
#ident::from_usize(self.index() + rhs.index())
}
}
impl std::fmt::Debug for #ident {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_tuple(#debug_name).field(&self.index()).finish()
}
}
impl ruff_index::Idx for #ident {
#[inline]
fn new(value: usize) -> Self {
#ident::from_usize(value)
}
#[inline]
fn index(self) -> usize {
self.index()
}
}
impl From<usize> for #ident {
fn from(value: usize) -> Self {
#ident::from_usize(value)
}
}
impl From<u32> for #ident {
fn from(value: u32) -> Self {
#ident::from_u32(value)
}
}
impl From<#ident> for usize {
fn from(value: #ident) -> Self {
value.as_usize()
}
}
impl From<#ident> for u32 {
fn from(value: #ident) -> Self {
value.as_u32()
}
}
};
Ok(output)
}

View file

@ -11,6 +11,7 @@ rust-version = { workspace = true }
ruff_python_ast = { path = "../ruff_python_ast" } ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_stdlib = { path = "../ruff_python_stdlib" } ruff_python_stdlib = { path = "../ruff_python_stdlib" }
ruff_text_size = { workspace = true } ruff_text_size = { workspace = true }
ruff_index = { path = "../ruff_index" }
bitflags = { workspace = true } bitflags = { workspace = true }
is-macro = { workspace = true } is-macro = { workspace = true }

View file

@ -1,8 +1,8 @@
use std::num::TryFromIntError; use std::ops::{Deref, DerefMut};
use std::ops::{Deref, Index, IndexMut};
use crate::model::SemanticModel; use crate::model::SemanticModel;
use bitflags::bitflags; use bitflags::bitflags;
use ruff_index::{newtype_index, IndexSlice, IndexVec};
use ruff_python_ast::helpers; use ruff_python_ast::helpers;
use ruff_python_ast::source_code::Locator; use ruff_python_ast::source_code::Locator;
use ruff_text_size::TextRange; use ruff_text_size::TextRange;
@ -130,16 +130,8 @@ impl<'a> Binding<'a> {
/// Using a `u32` to identify [Binding]s should is sufficient because Ruff only supports documents with a /// Using a `u32` to identify [Binding]s should is sufficient because Ruff only supports documents with a
/// size smaller than or equal to `u32::max`. A document with the size of `u32::max` must have fewer than `u32::max` /// size smaller than or equal to `u32::max`. A document with the size of `u32::max` must have fewer than `u32::max`
/// bindings because bindings must be separated by whitespace (and have an assignment). /// bindings because bindings must be separated by whitespace (and have an assignment).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)] #[newtype_index]
pub struct BindingId(u32); pub struct BindingId;
impl TryFrom<usize> for BindingId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(u32::try_from(value)?))
}
}
impl nohash_hasher::IsEnabled for BindingId {} impl nohash_hasher::IsEnabled for BindingId {}
@ -147,53 +139,37 @@ impl nohash_hasher::IsEnabled for BindingId {}
/// ///
/// Bindings are indexed by [`BindingId`] /// Bindings are indexed by [`BindingId`]
#[derive(Debug, Clone, Default)] #[derive(Debug, Clone, Default)]
pub struct Bindings<'a>(Vec<Binding<'a>>); pub struct Bindings<'a>(IndexVec<BindingId, Binding<'a>>);
impl<'a> Bindings<'a> { impl<'a> Bindings<'a> {
/// Pushes a new binding and returns its id /// Pushes a new binding and returns its id
pub fn push(&mut self, binding: Binding<'a>) -> BindingId { pub fn push(&mut self, binding: Binding<'a>) -> BindingId {
let id = self.next_id(); self.0.push(binding)
self.0.push(binding);
id
} }
/// Returns the id that will be assigned when pushing the next binding /// Returns the id that will be assigned when pushing the next binding
pub fn next_id(&self) -> BindingId { pub fn next_id(&self) -> BindingId {
BindingId::try_from(self.0.len()).unwrap() self.0.next_index()
}
}
impl<'a> Index<BindingId> for Bindings<'a> {
type Output = Binding<'a>;
fn index(&self, index: BindingId) -> &Self::Output {
&self.0[usize::from(index)]
}
}
impl<'a> IndexMut<BindingId> for Bindings<'a> {
fn index_mut(&mut self, index: BindingId) -> &mut Self::Output {
&mut self.0[usize::from(index)]
} }
} }
impl<'a> Deref for Bindings<'a> { impl<'a> Deref for Bindings<'a> {
type Target = [Binding<'a>]; type Target = IndexSlice<BindingId, Binding<'a>>;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.0 &self.0
} }
} }
impl<'a> FromIterator<Binding<'a>> for Bindings<'a> { impl<'a> DerefMut for Bindings<'a> {
fn from_iter<T: IntoIterator<Item = Binding<'a>>>(iter: T) -> Self { fn deref_mut(&mut self) -> &mut Self::Target {
Self(Vec::from_iter(iter)) &mut self.0
} }
} }
impl From<BindingId> for usize { impl<'a> FromIterator<Binding<'a>> for Bindings<'a> {
fn from(value: BindingId) -> Self { fn from_iter<T: IntoIterator<Item = Binding<'a>>>(iter: T) -> Self {
value.0 as usize Self(IndexVec::from_iter(iter))
} }
} }

View file

@ -2,9 +2,9 @@
//! can be documented, such as a module, class, or function. //! can be documented, such as a module, class, or function.
use std::fmt::Debug; use std::fmt::Debug;
use std::num::TryFromIntError; use std::ops::Deref;
use std::ops::{Deref, Index};
use ruff_index::{newtype_index, IndexSlice, IndexVec};
use rustpython_parser::ast::{self, Stmt}; use rustpython_parser::ast::{self, Stmt};
use crate::analyze::visibility::{ use crate::analyze::visibility::{
@ -12,28 +12,14 @@ use crate::analyze::visibility::{
}; };
/// Id uniquely identifying a definition in a program. /// Id uniquely identifying a definition in a program.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[newtype_index]
pub struct DefinitionId(u32); pub struct DefinitionId;
impl DefinitionId { impl DefinitionId {
/// Returns the ID for the module definition. /// Returns the ID for the module definition.
#[inline] #[inline]
pub const fn module() -> Self { pub const fn module() -> Self {
DefinitionId(0) DefinitionId::from_u32(0)
}
}
impl TryFrom<usize> for DefinitionId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(u32::try_from(value)?))
}
}
impl From<DefinitionId> for usize {
fn from(value: DefinitionId) -> Self {
value.0 as usize
} }
} }
@ -118,11 +104,11 @@ impl Definition<'_> {
/// The definitions within a Python program indexed by [`DefinitionId`]. /// The definitions within a Python program indexed by [`DefinitionId`].
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct Definitions<'a>(Vec<Definition<'a>>); pub struct Definitions<'a>(IndexVec<DefinitionId, Definition<'a>>);
impl<'a> Definitions<'a> { impl<'a> Definitions<'a> {
pub fn for_module(definition: Module<'a>) -> Self { pub fn for_module(definition: Module<'a>) -> Self {
Self(vec![Definition::Module(definition)]) Self(IndexVec::from_raw(vec![Definition::Module(definition)]))
} }
/// Pushes a new member definition and returns its unique id. /// Pushes a new member definition and returns its unique id.
@ -130,14 +116,13 @@ impl<'a> Definitions<'a> {
/// Members are assumed to be pushed in traversal order, such that parents are pushed before /// Members are assumed to be pushed in traversal order, such that parents are pushed before
/// their children. /// their children.
pub fn push_member(&mut self, member: Member<'a>) -> DefinitionId { pub fn push_member(&mut self, member: Member<'a>) -> DefinitionId {
let next_id = DefinitionId::try_from(self.0.len()).unwrap(); self.0.push(Definition::Member(member))
self.0.push(Definition::Member(member));
next_id
} }
/// Resolve the visibility of each definition in the collection. /// Resolve the visibility of each definition in the collection.
pub fn resolve(self, exports: Option<&[&str]>) -> ContextualizedDefinitions<'a> { pub fn resolve(self, exports: Option<&[&str]>) -> ContextualizedDefinitions<'a> {
let mut definitions: Vec<ContextualizedDefinition<'a>> = Vec::with_capacity(self.len()); let mut definitions: IndexVec<DefinitionId, ContextualizedDefinition<'a>> =
IndexVec::with_capacity(self.len());
for definition in self { for definition in self {
// Determine the visibility of the next definition, taking into account its parent's // Determine the visibility of the next definition, taking into account its parent's
@ -147,7 +132,7 @@ impl<'a> Definitions<'a> {
Definition::Module(module) => module.source.to_visibility(), Definition::Module(module) => module.source.to_visibility(),
Definition::Member(member) => match member.kind { Definition::Member(member) => match member.kind {
MemberKind::Class => { MemberKind::Class => {
let parent = &definitions[usize::from(member.parent)]; let parent = &definitions[member.parent];
if parent.visibility.is_private() if parent.visibility.is_private()
|| exports || exports
.map_or(false, |exports| !exports.contains(&member.name())) .map_or(false, |exports| !exports.contains(&member.name()))
@ -158,7 +143,7 @@ impl<'a> Definitions<'a> {
} }
} }
MemberKind::NestedClass => { MemberKind::NestedClass => {
let parent = &definitions[usize::from(member.parent)]; let parent = &definitions[member.parent];
if parent.visibility.is_private() if parent.visibility.is_private()
|| matches!( || matches!(
parent.definition, parent.definition,
@ -176,7 +161,7 @@ impl<'a> Definitions<'a> {
} }
} }
MemberKind::Function => { MemberKind::Function => {
let parent = &definitions[usize::from(member.parent)]; let parent = &definitions[member.parent];
if parent.visibility.is_private() if parent.visibility.is_private()
|| exports || exports
.map_or(false, |exports| !exports.contains(&member.name())) .map_or(false, |exports| !exports.contains(&member.name()))
@ -188,7 +173,7 @@ impl<'a> Definitions<'a> {
} }
MemberKind::NestedFunction => Visibility::Private, MemberKind::NestedFunction => Visibility::Private,
MemberKind::Method => { MemberKind::Method => {
let parent = &definitions[usize::from(member.parent)]; let parent = &definitions[member.parent];
if parent.visibility.is_private() { if parent.visibility.is_private() {
Visibility::Private Visibility::Private
} else { } else {
@ -204,20 +189,13 @@ impl<'a> Definitions<'a> {
}); });
} }
ContextualizedDefinitions(definitions) ContextualizedDefinitions(definitions.raw)
}
}
impl<'a> Index<DefinitionId> for Definitions<'a> {
type Output = Definition<'a>;
fn index(&self, index: DefinitionId) -> &Self::Output {
&self.0[usize::from(index)]
} }
} }
impl<'a> Deref for Definitions<'a> { impl<'a> Deref for Definitions<'a> {
type Target = [Definition<'a>]; type Target = IndexSlice<DefinitionId, Definition<'a>>;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.0 &self.0
} }

View file

@ -1,6 +1,6 @@
use std::num::{NonZeroU32, TryFromIntError};
use std::ops::{Index, IndexMut}; use std::ops::{Index, IndexMut};
use ruff_index::{newtype_index, IndexVec};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use rustpython_parser::ast::Stmt; use rustpython_parser::ast::Stmt;
@ -11,24 +11,9 @@ use ruff_python_ast::types::RefEquality;
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max` /// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
/// and it is impossible to have more statements than characters in the file. We use a `NonZeroU32` to /// and it is impossible to have more statements than characters in the file. We use a `NonZeroU32` to
/// take advantage of memory layout optimizations. /// take advantage of memory layout optimizations.
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[newtype_index]
pub struct NodeId(NonZeroU32); #[derive(Ord, PartialOrd)]
pub struct NodeId;
/// Convert a `usize` to a `NodeId` (by adding 1 to the value, and casting to `NonZeroU32`).
impl TryFrom<usize> for NodeId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(NonZeroU32::try_from(u32::try_from(value)? + 1)?))
}
}
/// Convert a `NodeId` to a `usize` (by subtracting 1 from the value, and casting to `usize`).
impl From<NodeId> for usize {
fn from(value: NodeId) -> Self {
value.0.get() as usize - 1
}
}
/// A [`Node`] represents a statement in a program, along with a pointer to its parent (if any). /// A [`Node`] represents a statement in a program, along with a pointer to its parent (if any).
#[derive(Debug)] #[derive(Debug)]
@ -44,7 +29,7 @@ struct Node<'a> {
/// The nodes of a program indexed by [`NodeId`] /// The nodes of a program indexed by [`NodeId`]
#[derive(Debug, Default)] #[derive(Debug, Default)]
pub struct Nodes<'a> { pub struct Nodes<'a> {
nodes: Vec<Node<'a>>, nodes: IndexVec<NodeId, Node<'a>>,
node_to_id: FxHashMap<RefEquality<'a, Stmt>, NodeId>, node_to_id: FxHashMap<RefEquality<'a, Stmt>, NodeId>,
} }
@ -53,16 +38,15 @@ impl<'a> Nodes<'a> {
/// ///
/// Panics if a node with the same pointer already exists. /// Panics if a node with the same pointer already exists.
pub fn insert(&mut self, stmt: &'a Stmt, parent: Option<NodeId>) -> NodeId { pub fn insert(&mut self, stmt: &'a Stmt, parent: Option<NodeId>) -> NodeId {
let next_id = NodeId::try_from(self.nodes.len()).unwrap(); let next_id = self.nodes.next_index();
if let Some(existing_id) = self.node_to_id.insert(RefEquality(stmt), next_id) { if let Some(existing_id) = self.node_to_id.insert(RefEquality(stmt), next_id) {
panic!("Node already exists with id {existing_id:?}"); panic!("Node already exists with id {existing_id:?}");
} }
self.nodes.push(Node { self.nodes.push(Node {
stmt, stmt,
parent, parent,
depth: parent.map_or(0, |parent| self.nodes[usize::from(parent)].depth + 1), depth: parent.map_or(0, |parent| self.nodes[parent].depth + 1),
}); })
next_id
} }
/// Returns the [`NodeId`] of the given node. /// Returns the [`NodeId`] of the given node.
@ -74,26 +58,24 @@ impl<'a> Nodes<'a> {
/// Return the [`NodeId`] of the parent node. /// Return the [`NodeId`] of the parent node.
#[inline] #[inline]
pub fn parent_id(&self, node_id: NodeId) -> Option<NodeId> { pub fn parent_id(&self, node_id: NodeId) -> Option<NodeId> {
self.nodes[usize::from(node_id)].parent self.nodes[node_id].parent
} }
/// Return the depth of the node. /// Return the depth of the node.
#[inline] #[inline]
pub fn depth(&self, node_id: NodeId) -> u32 { pub fn depth(&self, node_id: NodeId) -> u32 {
self.nodes[usize::from(node_id)].depth self.nodes[node_id].depth
} }
/// Returns an iterator over all [`NodeId`] ancestors, starting from the given [`NodeId`]. /// Returns an iterator over all [`NodeId`] ancestors, starting from the given [`NodeId`].
pub fn ancestor_ids(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ { pub fn ancestor_ids(&self, node_id: NodeId) -> impl Iterator<Item = NodeId> + '_ {
std::iter::successors(Some(node_id), |&node_id| { std::iter::successors(Some(node_id), |&node_id| self.nodes[node_id].parent)
self.nodes[usize::from(node_id)].parent
})
} }
/// Return the parent of the given node. /// Return the parent of the given node.
pub fn parent(&self, node: &'a Stmt) -> Option<&'a Stmt> { pub fn parent(&self, node: &'a Stmt) -> Option<&'a Stmt> {
let node_id = self.node_to_id.get(&RefEquality(node))?; let node_id = self.node_to_id.get(&RefEquality(node))?;
let parent_id = self.nodes[usize::from(*node_id)].parent?; let parent_id = self.nodes[*node_id].parent?;
Some(self[parent_id]) Some(self[parent_id])
} }
} }
@ -101,13 +83,15 @@ impl<'a> Nodes<'a> {
impl<'a> Index<NodeId> for Nodes<'a> { impl<'a> Index<NodeId> for Nodes<'a> {
type Output = &'a Stmt; type Output = &'a Stmt;
#[inline]
fn index(&self, index: NodeId) -> &Self::Output { fn index(&self, index: NodeId) -> &Self::Output {
&self.nodes[usize::from(index)].stmt &self.nodes[index].stmt
} }
} }
impl<'a> IndexMut<NodeId> for Nodes<'a> { impl<'a> IndexMut<NodeId> for Nodes<'a> {
#[inline]
fn index_mut(&mut self, index: NodeId) -> &mut Self::Output { fn index_mut(&mut self, index: NodeId) -> &mut Self::Output {
&mut self.nodes[usize::from(index)].stmt &mut self.nodes[index].stmt
} }
} }

View file

@ -1,6 +1,6 @@
use std::num::TryFromIntError; use std::ops::{Deref, DerefMut};
use std::ops::{Deref, Index, IndexMut};
use ruff_index::{newtype_index, Idx, IndexSlice, IndexVec};
use rustc_hash::FxHashMap; use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Arguments, Expr, Keyword, Stmt}; use rustpython_parser::ast::{Arguments, Expr, Keyword, Stmt};
@ -151,39 +151,25 @@ pub struct Lambda<'a> {
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max` /// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
/// and it is impossible to have more scopes than characters in the file (because defining a function or class /// and it is impossible to have more scopes than characters in the file (because defining a function or class
/// requires more than one character). /// requires more than one character).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)] #[newtype_index]
pub struct ScopeId(u32); pub struct ScopeId;
impl ScopeId { impl ScopeId {
/// Returns the ID for the global scope /// Returns the ID for the global scope
#[inline] #[inline]
pub const fn global() -> Self { pub const fn global() -> Self {
ScopeId(0) ScopeId::from_u32(0)
} }
/// Returns `true` if this is the id of the global scope /// Returns `true` if this is the id of the global scope
pub const fn is_global(&self) -> bool { pub const fn is_global(&self) -> bool {
self.0 == 0 self.index() == 0
}
}
impl TryFrom<usize> for ScopeId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(u32::try_from(value)?))
}
}
impl From<ScopeId> for usize {
fn from(value: ScopeId) -> Self {
value.0 as usize
} }
} }
/// The scopes of a program indexed by [`ScopeId`] /// The scopes of a program indexed by [`ScopeId`]
#[derive(Debug)] #[derive(Debug)]
pub struct Scopes<'a>(Vec<Scope<'a>>); pub struct Scopes<'a>(IndexVec<ScopeId, Scope<'a>>);
impl<'a> Scopes<'a> { impl<'a> Scopes<'a> {
/// Returns a reference to the global scope /// Returns a reference to the global scope
@ -198,7 +184,7 @@ impl<'a> Scopes<'a> {
/// Pushes a new scope and returns its unique id /// Pushes a new scope and returns its unique id
pub fn push_scope(&mut self, kind: ScopeKind<'a>, parent: ScopeId) -> ScopeId { pub fn push_scope(&mut self, kind: ScopeKind<'a>, parent: ScopeId) -> ScopeId {
let next_id = ScopeId::try_from(self.0.len()).unwrap(); let next_id = ScopeId::new(self.0.len());
self.0.push(Scope::local(kind, parent)); self.0.push(Scope::local(kind, parent));
next_id next_id
} }
@ -218,27 +204,19 @@ impl<'a> Scopes<'a> {
impl Default for Scopes<'_> { impl Default for Scopes<'_> {
fn default() -> Self { fn default() -> Self {
Self(vec![Scope::global()]) Self(IndexVec::from_raw(vec![Scope::global()]))
}
}
impl<'a> Index<ScopeId> for Scopes<'a> {
type Output = Scope<'a>;
fn index(&self, index: ScopeId) -> &Self::Output {
&self.0[usize::from(index)]
}
}
impl<'a> IndexMut<ScopeId> for Scopes<'a> {
fn index_mut(&mut self, index: ScopeId) -> &mut Self::Output {
&mut self.0[usize::from(index)]
} }
} }
impl<'a> Deref for Scopes<'a> { impl<'a> Deref for Scopes<'a> {
type Target = [Scope<'a>]; type Target = IndexSlice<ScopeId, Scope<'a>>;
fn deref(&self) -> &Self::Target { fn deref(&self) -> &Self::Target {
&self.0 &self.0
} }
} }
impl<'a> DerefMut for Scopes<'a> {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}