[ty] Reduce size of member table (#19572)

This commit is contained in:
Micha Reiser 2025-08-07 11:16:04 +02:00 committed by GitHub
parent cc97579c3b
commit b96aa4605b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 722 additions and 134 deletions

View file

@ -2,9 +2,10 @@ use bitflags::bitflags;
use hashbrown::hash_table::Entry;
use ruff_index::{IndexVec, newtype_index};
use ruff_python_ast::{self as ast, name::Name};
use ruff_text_size::{TextLen as _, TextRange, TextSize};
use rustc_hash::FxHasher;
use smallvec::SmallVec;
use std::hash::{Hash as _, Hasher as _};
use std::hash::{Hash, Hasher as _};
use std::ops::{Deref, DerefMut};
/// A member access, e.g. `x.y` or `x[1]` or `x["foo"]`.
@ -25,7 +26,7 @@ impl Member {
/// Returns the left most part of the member expression, e.g. `x` in `x.y.z`.
///
/// This is the symbol on which the member access is performed.
pub(crate) fn symbol_name(&self) -> &Name {
pub(crate) fn symbol_name(&self) -> &str {
self.expression.symbol_name()
}
@ -78,10 +79,14 @@ impl Member {
/// a method context, or whether the `<NAME>` actually refers to the first
/// parameter of the method (i.e. `self`). To answer those questions,
/// use [`Self::as_instance_attribute`].
pub(super) fn as_instance_attribute_candidate(&self) -> Option<&Name> {
match &*self.expression.segments {
[MemberSegment::Attribute(name)] => Some(name),
_ => None,
pub(super) fn as_instance_attribute_candidate(&self) -> Option<&str> {
let mut segments = self.expression().segments();
let first_segment = segments.next()?;
if first_segment.kind == SegmentKind::Attribute && segments.next().is_none() {
Some(first_segment.text)
} else {
None
}
}
@ -100,11 +105,11 @@ impl Member {
/// Does the place expression have the form `self.{name}` (`self` is the first parameter of the method)?
pub(super) fn is_instance_attribute_named(&self, name: &str) -> bool {
self.as_instance_attribute().map(Name::as_str) == Some(name)
self.as_instance_attribute() == Some(name)
}
/// Return `Some(<ATTRIBUTE>)` if the place expression is an instance attribute.
pub(crate) fn as_instance_attribute(&self) -> Option<&Name> {
pub(crate) fn as_instance_attribute(&self) -> Option<&str> {
if self.is_instance_attribute() {
debug_assert!(self.as_instance_attribute_candidate().is_some());
self.as_instance_attribute_candidate()
@ -142,63 +147,119 @@ impl get_size2::GetSize for MemberFlags {}
/// * An integer-based subscript, e.g. `[1]` in `x[1]`
/// * A string-based subscript, e.g. `["foo"]` in `x["foo"]`
///
/// Internally, the segments are stored in reverse order. This allows constructing
/// a `MemberExpr` from an ast expression without having to reverse the segments.
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize, Hash)]
/// Uses a compact representation where the entire expression is stored as a single path.
/// For example, `foo.bar[0]["baz"]` is stored as:
/// - path: `foobar0baz`
/// - segments: stores where each segment starts and its kind (attribute, int subscript, string subscript)
///
/// The symbol name can be extracted from the path by taking the text up to the first segment's start offset.
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
pub(crate) struct MemberExpr {
symbol_name: Name,
segments: SmallVec<[MemberSegment; 1]>,
/// The entire path as a single Name
path: Name,
/// Metadata for each segment (in forward order)
segments: Segments,
}
impl MemberExpr {
pub(super) fn new(symbol_name: Name, segments: SmallVec<[MemberSegment; 1]>) -> Self {
debug_assert!(
!segments.is_empty(),
"A member without segments is a symbol."
);
pub(super) fn try_from_expr(expression: ast::ExprRef<'_>) -> Option<Self> {
fn visit(expr: ast::ExprRef) -> Option<(Name, SmallVec<[SegmentInfo; 8]>)> {
use std::fmt::Write as _;
Self {
symbol_name,
segments,
match expr {
ast::ExprRef::Name(name) => {
Some((name.id.clone(), smallvec::SmallVec::new_const()))
}
ast::ExprRef::Attribute(attribute) => {
let (mut path, mut segments) = visit(ast::ExprRef::from(&attribute.value))?;
let start_offset = path.text_len();
let _ = write!(path, "{}", attribute.attr.id);
segments.push(SegmentInfo::new(SegmentKind::Attribute, start_offset));
Some((path, segments))
}
ast::ExprRef::Subscript(subscript) => {
let (mut path, mut segments) = visit((&subscript.value).into())?;
let start_offset = path.text_len();
match &*subscript.slice {
ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(index),
..
}) => {
let _ = write!(path, "{index}");
segments
.push(SegmentInfo::new(SegmentKind::IntSubscript, start_offset));
}
ast::Expr::StringLiteral(string) => {
let _ = write!(path, "{}", string.value);
segments
.push(SegmentInfo::new(SegmentKind::StringSubscript, start_offset));
}
_ => {
return None;
}
}
Some((path, segments))
}
_ => None,
}
}
let (path, segments) = visit(expression)?;
if segments.is_empty() {
None
} else {
Some(Self {
path,
segments: Segments::from_vec(segments),
})
}
}
fn segment_infos(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
self.segments.iter()
}
fn segments(&self) -> impl Iterator<Item = Segment<'_>> + '_ {
SegmentsIterator::new(self.path.as_str(), self.segment_infos())
}
fn shrink_to_fit(&mut self) {
self.segments.shrink_to_fit();
self.segments.shrink_to_fit();
self.path.shrink_to_fit();
}
/// Returns the left most part of the member expression, e.g. `x` in `x.y.z`.
///
/// This is the symbol on which the member access is performed.
pub(crate) fn symbol_name(&self) -> &Name {
&self.symbol_name
pub(crate) fn symbol_name(&self) -> &str {
self.as_ref().symbol_name()
}
/// Returns the segments of the member expression, e.g. `[MemberSegment::Attribute("y"), MemberSegment::IntSubscript(1)]` for `x.y[1]`.
pub(crate) fn member_segments(
&self,
) -> impl ExactSizeIterator<Item = &MemberSegment> + DoubleEndedIterator {
self.segments.iter().rev()
pub(super) fn num_segments(&self) -> usize {
self.segments.len()
}
pub(crate) fn as_ref(&self) -> MemberExprRef {
MemberExprRef {
name: self.symbol_name.as_str(),
segments: self.segments.as_slice(),
path: self.path.as_str(),
segments: SegmentsRef::from(&self.segments),
}
}
}
impl std::fmt::Display for MemberExpr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.symbol_name.as_str())?;
f.write_str(self.symbol_name())?;
for segment in self.member_segments() {
match segment {
MemberSegment::Attribute(name) => write!(f, ".{name}")?,
MemberSegment::IntSubscript(int) => write!(f, "[{int}]")?,
MemberSegment::StringSubscript(string) => write!(f, "[\"{string}\"]")?,
for segment in self.segments() {
match segment.kind {
SegmentKind::Attribute => write!(f, ".{}", segment.text)?,
SegmentKind::IntSubscript => write!(f, "[{}]", segment.text)?,
SegmentKind::StringSubscript => write!(f, "[\"{}\"]", segment.text)?,
}
}
@ -230,42 +291,43 @@ impl PartialEq<&MemberExpr> for MemberExprRef<'_> {
}
}
#[derive(Debug, Clone, PartialEq, Eq, Hash, get_size2::GetSize)]
pub(crate) enum MemberSegment {
/// An attribute access, e.g. `.y` in `x.y`
Attribute(Name),
/// An integer-based index access, e.g. `[1]` in `x[1]`
IntSubscript(ast::Int),
/// A string-based index access, e.g. `["foo"]` in `x["foo"]`
StringSubscript(String),
}
/// Reference to a member expression.
#[derive(Debug, PartialEq, Eq, Hash, Copy, Clone)]
#[derive(Debug, Clone, PartialEq, Eq)]
pub(crate) struct MemberExprRef<'a> {
name: &'a str,
segments: &'a [MemberSegment],
path: &'a str,
segments: SegmentsRef<'a>,
}
impl<'a> MemberExprRef<'a> {
pub(super) fn symbol_name(&self) -> &'a str {
self.name
let end = self
.segments
.iter()
.next()
.map(SegmentInfo::offset)
.unwrap_or(self.path.text_len());
let range = TextRange::new(TextSize::default(), end);
&self.path[range]
}
/// Create a new `MemberExprRef` from a name and segments.
///
/// Note that the segments are expected to be in reverse order, i.e. the last segment is the first one in the expression.
pub(super) fn from_raw(name: &'a str, segments: &'a [MemberSegment]) -> Self {
debug_assert!(
!segments.is_empty(),
"A member without segments is a symbol."
);
Self { name, segments }
#[cfg(test)]
fn segments(&self) -> impl Iterator<Item = Segment<'_>> + '_ {
SegmentsIterator::new(self.path, self.segments.iter())
}
/// Returns a slice over the member segments. The segments are in reverse order,
pub(super) fn rev_member_segments(&self) -> &'a [MemberSegment] {
self.segments
pub(super) fn parent(&self) -> Option<MemberExprRef<'a>> {
let parent_segments = self.segments.parent()?;
// The removed segment is always the last one. Find its start offset.
let last_segment = self.segments.iter().last()?;
let path_end = last_segment.offset();
Some(MemberExprRef {
path: &self.path[TextRange::new(TextSize::default(), path_end)],
segments: parent_segments,
})
}
}
@ -275,6 +337,14 @@ impl<'a> From<&'a MemberExpr> for MemberExprRef<'a> {
}
}
impl Hash for MemberExprRef<'_> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
// Path on its own isn't 100% unique, but it should avoid
// most collisions and avoids iterating all segments.
self.path.hash(state);
}
}
/// Uniquely identifies a member in a scope.
#[newtype_index]
#[derive(get_size2::GetSize, salsa::Update)]
@ -315,10 +385,8 @@ impl MemberTable {
self.members.iter()
}
fn hash_member_expression_ref(member: MemberExprRef) -> u64 {
let mut h = FxHasher::default();
member.hash(&mut h);
h.finish()
fn hash_member_expression_ref(member: &MemberExprRef) -> u64 {
hash_single(member)
}
/// Returns the ID of the member with the given expression, if it exists.
@ -327,7 +395,7 @@ impl MemberTable {
member: impl Into<MemberExprRef<'a>>,
) -> Option<ScopedMemberId> {
let member = member.into();
let hash = Self::hash_member_expression_ref(member);
let hash = Self::hash_member_expression_ref(&member);
self.map
.find(hash, |id| self.members[*id].expression == member)
.copied()
@ -369,12 +437,14 @@ impl MemberTableBuilder {
///
/// Members are identified by their expression, which is hashed to find the entry in the table.
pub(super) fn add(&mut self, mut member: Member) -> (ScopedMemberId, bool) {
let hash = MemberTable::hash_member_expression_ref(member.expression.as_ref());
let member_ref = member.expression.as_ref();
let hash = MemberTable::hash_member_expression_ref(&member_ref);
let entry = self.table.map.entry(
hash,
|id| self.table.members[*id].expression == member.expression,
|id| self.table.members[*id].expression.as_ref() == member.expression.as_ref(),
|id| {
MemberTable::hash_member_expression_ref(self.table.members[*id].expression.as_ref())
let ref_expr = self.table.members[*id].expression.as_ref();
MemberTable::hash_member_expression_ref(&ref_expr)
},
);
@ -402,7 +472,8 @@ impl MemberTableBuilder {
let mut table = self.table;
table.members.shrink_to_fit();
table.map.shrink_to_fit(|id| {
MemberTable::hash_member_expression_ref(table.members[*id].expression.as_ref())
let ref_expr = table.members[*id].expression.as_ref();
MemberTable::hash_member_expression_ref(&ref_expr)
});
table
}
@ -421,3 +492,545 @@ impl DerefMut for MemberTableBuilder {
&mut self.table
}
}
/// Representation of segments that can be either inline or heap-allocated.
///
/// Design choices:
/// - Uses `Box<[SegmentInfo]>` instead of `ThinVec` because even with a `ThinVec`, the size of `Segments` is still 128 bytes.
/// - Uses u64 for inline storage. That's the largest size without increasing the overall size of `Segments` and allows to encode up to 7 segments.
#[derive(Clone, Debug, PartialEq, Eq, get_size2::GetSize)]
enum Segments {
/// Inline storage for up to 7 segments with 6-bit relative offsets (max 63 bytes per segment)
Small(SmallSegments),
/// Heap storage for expressions that don't fit inline
Heap(Box<[SegmentInfo]>),
}
static_assertions::assert_eq_size!(SmallSegments, u64);
static_assertions::assert_eq_size!(Segments, [u64; 2]);
impl Segments {
fn from_vec(segments: SmallVec<[SegmentInfo; 8]>) -> Self {
debug_assert!(
!segments.is_empty(),
"Segments cannot be empty. A member without segments is a symbol"
);
if let Some(small) = SmallSegments::try_from_slice(&segments) {
Self::Small(small)
} else {
Self::Heap(segments.into_vec().into_boxed_slice())
}
}
fn len(&self) -> usize {
match self {
Self::Small(small) => small.len(),
Self::Heap(segments) => segments.len(),
}
}
fn iter(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
match self {
Self::Small(small) => itertools::Either::Left(small.iter()),
Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()),
}
}
}
/// Segment metadata - packed into a single u32
/// Layout: [kind: 2 bits][offset: 30 bits]
/// - Bits 0-1: `SegmentKind` (0=Attribute, 1=IntSubscript, 2=StringSubscript)
/// - Bits 2-31: Absolute offset from start of path (up to 1,073,741,823 bytes)
#[derive(Clone, Copy, PartialEq, Eq, Hash, get_size2::GetSize)]
struct SegmentInfo(u32);
const KIND_MASK: u32 = 0b11;
const OFFSET_SHIFT: u32 = 2;
const MAX_OFFSET: u32 = (1 << 30) - 1; // 2^30 - 1
impl SegmentInfo {
const fn new(kind: SegmentKind, offset: TextSize) -> Self {
assert!(offset.to_u32() < MAX_OFFSET);
let value = (offset.to_u32() << OFFSET_SHIFT) | (kind as u32);
Self(value)
}
const fn kind(self) -> SegmentKind {
match self.0 & KIND_MASK {
0 => SegmentKind::Attribute,
1 => SegmentKind::IntSubscript,
2 => SegmentKind::StringSubscript,
_ => panic!("Invalid SegmentKind bits"),
}
}
const fn offset(self) -> TextSize {
TextSize::new(self.0 >> OFFSET_SHIFT)
}
}
impl std::fmt::Debug for SegmentInfo {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("SegmentInfo")
.field("kind", &self.kind())
.field("offset", &self.offset())
.finish()
}
}
struct Segment<'a> {
kind: SegmentKind,
text: &'a str,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, get_size2::GetSize)]
#[repr(u8)]
enum SegmentKind {
Attribute = 0,
IntSubscript = 1,
StringSubscript = 2,
}
/// Iterator over segments that converts `SegmentInfo` to `Segment` with text slices.
struct SegmentsIterator<'a, I> {
path: &'a str,
segment_infos: I,
current: Option<SegmentInfo>,
next: Option<SegmentInfo>,
}
impl<'a, I> SegmentsIterator<'a, I>
where
I: Iterator<Item = SegmentInfo>,
{
fn new(path: &'a str, mut segment_infos: I) -> Self {
let current = segment_infos.next();
let next = segment_infos.next();
Self {
path,
segment_infos,
current,
next,
}
}
}
impl<'a, I> Iterator for SegmentsIterator<'a, I>
where
I: Iterator<Item = SegmentInfo>,
{
type Item = Segment<'a>;
fn next(&mut self) -> Option<Self::Item> {
let info = self.current.take()?;
let end = self
.next
.map(SegmentInfo::offset)
.unwrap_or(self.path.text_len());
self.current = self.next;
self.next = self.segment_infos.next();
Some(Segment {
kind: info.kind(),
text: &self.path[TextRange::new(info.offset(), end)],
})
}
}
const INLINE_COUNT_BITS: u32 = 3;
const INLINE_COUNT_MASK: u64 = (1 << INLINE_COUNT_BITS) - 1;
const INLINE_SEGMENT_BITS: u32 = 8;
const INLINE_SEGMENT_MASK: u64 = (1 << INLINE_SEGMENT_BITS) - 1;
const INLINE_KIND_BITS: u32 = 2;
const INLINE_KIND_MASK: u64 = (1 << INLINE_KIND_BITS) - 1;
const INLINE_PREV_LEN_BITS: u32 = 6;
const INLINE_PREV_LEN_MASK: u64 = (1 << INLINE_PREV_LEN_BITS) - 1;
const INLINE_MAX_SEGMENTS: usize = 7;
const INLINE_MAX_RELATIVE_OFFSET: u32 = (1 << INLINE_PREV_LEN_BITS) - 1; // 63
/// Compact representation that can store up to 7 segments inline in a u64.
///
/// Layout:
/// - Bits 0-2: Number of segments minus 1 (0-6, representing 1-7 segments)
/// - Bits 3-10: Segment 0 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 11-18: Segment 1 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 19-26: Segment 2 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 27-34: Segment 3 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 35-42: Segment 4 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 43-50: Segment 5 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 51-58: Segment 6 (2 bits kind + 6 bits relative offset, max 63 bytes)
/// - Bits 59-63: Unused (5 bits)
///
/// Constraints:
/// - Maximum 7 segments (realistic limit for member access chains)
/// - Maximum 63-byte relative offset per segment (sufficient for most identifiers)
/// - Never empty (`segments.len()` >= 1)
///
#[derive(Clone, Copy, PartialEq, Eq, get_size2::GetSize)]
#[repr(transparent)]
struct SmallSegments(u64);
impl SmallSegments {
fn try_from_slice(segments: &[SegmentInfo]) -> Option<Self> {
if segments.is_empty() || segments.len() > INLINE_MAX_SEGMENTS {
return None;
}
// Pack into inline representation
// Store count minus 1 (since segments are never empty, range 0-6 represents 1-7 segments)
let mut packed = (segments.len() - 1) as u64;
let mut prev_offset = TextSize::new(0);
for (i, segment) in segments.iter().enumerate() {
// Compute relative offset on-the-fly
let relative_offset = segment.offset() - prev_offset;
if relative_offset > TextSize::from(INLINE_MAX_RELATIVE_OFFSET) {
return None;
}
let kind = segment.kind() as u64;
let relative_offset_val = u64::from(relative_offset.to_u32());
let segment_data = (relative_offset_val << INLINE_KIND_BITS) | kind;
let shift = INLINE_COUNT_BITS
+ (u32::try_from(i).expect("i is bounded by INLINE_MAX_SEGMENTS")
* INLINE_SEGMENT_BITS);
packed |= segment_data << shift;
prev_offset = segment.offset();
}
Some(Self(packed))
}
#[expect(
clippy::cast_possible_truncation,
reason = "INLINE_COUNT_MASK ensures value is at most 7"
)]
const fn len(self) -> usize {
// Add 1 because we store count minus 1
((self.0 & INLINE_COUNT_MASK) + 1) as usize
}
fn iter(self) -> SmallSegmentsInfoIterator {
SmallSegmentsInfoIterator {
segments: self,
index: 0,
next_offset: TextSize::new(0),
}
}
/// Returns the parent member expression, e.g. `x.b` from `x.b.c`, or `None` if the parent is
/// the `symbol` itself (e, g. parent of `x.a` is just `x`).
const fn parent(self) -> Option<Self> {
let len = self.len();
if len <= 1 {
return None;
}
// Simply copy the packed value but update the count
let mut new_packed = self.0;
// Clear the count bits and set the new count (len - 2, since we store count - 1)
new_packed &= !INLINE_COUNT_MASK;
new_packed |= (len - 2) as u64;
Some(Self(new_packed))
}
}
impl std::fmt::Debug for SmallSegments {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_list().entries(self.iter()).finish()
}
}
struct SmallSegmentsInfoIterator {
segments: SmallSegments,
index: usize,
next_offset: TextSize,
}
impl Iterator for SmallSegmentsInfoIterator {
type Item = SegmentInfo;
fn next(&mut self) -> Option<Self::Item> {
let count = self.segments.len();
if self.index >= count {
return None;
}
// Extract the relative offset and kind for the current segment
let shift = INLINE_COUNT_BITS
+ (u32::try_from(self.index).expect("index is bounded by INLINE_MAX_SEGMENTS")
* INLINE_SEGMENT_BITS);
let segment_data = (self.segments.0 >> shift) & INLINE_SEGMENT_MASK;
let kind = (segment_data & INLINE_KIND_MASK) as u8;
let relative_offset = ((segment_data >> INLINE_KIND_BITS) & INLINE_PREV_LEN_MASK) as u32;
// Update the running absolute offset
self.next_offset += TextSize::new(relative_offset);
let kind = match kind {
0 => SegmentKind::Attribute,
1 => SegmentKind::IntSubscript,
2 => SegmentKind::StringSubscript,
_ => panic!("Invalid SegmentKind bits"),
};
self.index += 1;
Some(SegmentInfo::new(kind, self.next_offset))
}
}
/// Reference view of segments, can be either small (inline) or heap-allocated.
#[derive(Clone, Copy, Debug)]
enum SegmentsRef<'a> {
Small(SmallSegments),
Heap(&'a [SegmentInfo]),
}
impl<'a> SegmentsRef<'a> {
fn len(&self) -> usize {
match self {
Self::Small(small) => small.len(),
Self::Heap(segments) => segments.len(),
}
}
fn iter(&self) -> impl Iterator<Item = SegmentInfo> + '_ {
match self {
Self::Small(small) => itertools::Either::Left(small.iter()),
Self::Heap(heap) => itertools::Either::Right(heap.iter().copied()),
}
}
/// Returns a parent view with one fewer segment, or None if <= 1 segment
fn parent(&self) -> Option<SegmentsRef<'a>> {
match self {
Self::Small(small) => small.parent().map(SegmentsRef::Small),
Self::Heap(segments) => {
let len = segments.len();
if len <= 1 {
None
} else {
Some(SegmentsRef::Heap(&segments[..len - 1]))
}
}
}
}
}
impl<'a> From<&'a Segments> for SegmentsRef<'a> {
fn from(segments: &'a Segments) -> Self {
match segments {
Segments::Small(small) => SegmentsRef::Small(*small),
Segments::Heap(heap) => SegmentsRef::Heap(heap),
}
}
}
impl PartialEq for SegmentsRef<'_> {
fn eq(&self, other: &Self) -> bool {
let len = self.len();
if len != other.len() {
return false;
}
self.iter().eq(other.iter())
}
}
impl Eq for SegmentsRef<'_> {}
/// Helper function to hash a single value and return the hash.
fn hash_single<T: Hash>(value: &T) -> u64 {
let mut hasher = FxHasher::default();
value.hash(&mut hasher);
hasher.finish()
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_member_expr_ref_hash_and_eq_small_heap() {
// For expression: foo.bar[0]["baz"]
// The path would be: "foobar0baz" (no dots or brackets in the path)
let path = "foobar0baz";
let segments = vec![
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6
SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(7)), // ["baz"] at offset 7
];
// Create Small version.
let small_segments = SmallSegments::try_from_slice(&segments).unwrap();
let member_ref_small = MemberExprRef {
path,
segments: SegmentsRef::Small(small_segments),
};
// Create Heap version with the same data.
let heap_segments: Box<[SegmentInfo]> = segments.into_boxed_slice();
let member_ref_heap = MemberExprRef {
path,
segments: SegmentsRef::Heap(&heap_segments),
};
// Test hash equality (MemberExprRef only hashes the path).
assert_eq!(
hash_single(&member_ref_small),
hash_single(&member_ref_heap)
);
// Test equality in both directions.
assert_eq!(member_ref_small, member_ref_heap);
assert_eq!(member_ref_heap, member_ref_small);
}
#[test]
fn test_member_expr_ref_different_segments() {
// For expressions: foo.bar[0] vs foo.bar["0"]
// Both have the same path "foobar0" but different segment types
let path = "foobar0";
// First expression: foo.bar[0]
let segments1 = vec![
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
SegmentInfo::new(SegmentKind::IntSubscript, TextSize::new(6)), // [0] at offset 6
];
// Second expression: foo.bar["0"]
let segments2 = vec![
SegmentInfo::new(SegmentKind::Attribute, TextSize::new(3)), // .bar at offset 3
SegmentInfo::new(SegmentKind::StringSubscript, TextSize::new(6)), // ["0"] at offset 6
];
// Create MemberExprRef instances
let small1 = SmallSegments::try_from_slice(&segments1).unwrap();
let member_ref1 = MemberExprRef {
path,
segments: SegmentsRef::Small(small1),
};
let small2 = SmallSegments::try_from_slice(&segments2).unwrap();
let member_ref2 = MemberExprRef {
path,
segments: SegmentsRef::Small(small2),
};
// Test inequality
assert_ne!(member_ref1, member_ref2);
assert_ne!(member_ref2, member_ref1);
// Test hash equality (MemberExprRef only hashes the path, not segments)
assert_eq!(hash_single(&member_ref1), hash_single(&member_ref2));
}
#[test]
fn test_member_expr_ref_parent() {
use ruff_python_parser::parse_expression;
// Parse a real Python expression
let parsed = parse_expression(r#"foo.bar[0]["baz"]"#).unwrap();
let expr = parsed.expr();
// Convert to MemberExpr
let member_expr = MemberExpr::try_from_expr(ast::ExprRef::from(expr)).unwrap();
let member_ref = member_expr.as_ref();
// Verify the initial state: foo.bar[0]["baz"]
assert_eq!(member_ref.symbol_name(), "foo");
let segments: Vec<_> = member_ref.segments().map(|s| (s.kind, s.text)).collect();
assert_eq!(
segments,
vec![
(SegmentKind::Attribute, "bar"),
(SegmentKind::IntSubscript, "0"),
(SegmentKind::StringSubscript, "baz")
]
);
// Test parent() removes the last segment ["baz"] -> foo.bar[0]
let parent1 = member_ref.parent().unwrap();
assert_eq!(parent1.symbol_name(), "foo");
let parent1_segments: Vec<_> = parent1.segments().map(|s| (s.kind, s.text)).collect();
assert_eq!(
parent1_segments,
vec![
(SegmentKind::Attribute, "bar"),
(SegmentKind::IntSubscript, "0")
]
);
// Test parent of parent removes [0] -> foo.bar
let parent2 = parent1.parent().unwrap();
assert_eq!(parent2.symbol_name(), "foo");
let parent2_segments: Vec<_> = parent2.segments().map(|s| (s.kind, s.text)).collect();
assert_eq!(parent2_segments, vec![(SegmentKind::Attribute, "bar")]);
// Test parent of single segment is a symbol and not a member.
let parent3 = parent2.parent();
assert!(parent3.is_none());
}
#[test]
fn test_member_expr_small_vs_heap_allocation() {
use ruff_python_parser::parse_expression;
// Test Small allocation: 7 segments (maximum for inline storage)
// Create expression with exactly 7 segments: x.a.b.c.d.e.f.g
let small_expr = parse_expression("x.a.b.c.d.e.f.g").unwrap();
let small_member =
MemberExpr::try_from_expr(ast::ExprRef::from(small_expr.expr())).unwrap();
// Should use Small allocation
assert!(matches!(small_member.segments, Segments::Small(_)));
assert_eq!(small_member.num_segments(), 7);
// Test Heap allocation: 8 segments (exceeds inline capacity)
// Create expression with 8 segments: x.a.b.c.d.e.f.g.h
let heap_expr = parse_expression("x.a.b.c.d.e.f.g.h").unwrap();
let heap_member = MemberExpr::try_from_expr(ast::ExprRef::from(heap_expr.expr())).unwrap();
// Should use Heap allocation
assert!(matches!(heap_member.segments, Segments::Heap(_)));
assert_eq!(heap_member.num_segments(), 8);
// Test Small allocation with relative offset limit
// Create expression where relative offsets are small enough: a.b[0]["c"]
let small_offset_expr = parse_expression(r#"a.b[0]["c"]"#).unwrap();
let small_offset_member =
MemberExpr::try_from_expr(ast::ExprRef::from(small_offset_expr.expr())).unwrap();
// Should use Small allocation (3 segments, small offsets)
assert!(matches!(small_offset_member.segments, Segments::Small(_)));
assert_eq!(small_offset_member.num_segments(), 3);
// Test Small allocation with maximum 63-byte relative offset limit
// Create expression where one segment has exactly 63 bytes (the limit)
let segment_63_bytes = "a".repeat(63);
let max_offset_expr_code = format!("x.{segment_63_bytes}.y");
let max_offset_expr = parse_expression(&max_offset_expr_code).unwrap();
let max_offset_member =
MemberExpr::try_from_expr(ast::ExprRef::from(max_offset_expr.expr())).unwrap();
// Should still use Small allocation (exactly at the limit)
assert!(matches!(max_offset_member.segments, Segments::Small(_)));
assert_eq!(max_offset_member.num_segments(), 2);
// Test that heap allocation works for segment content that would exceed relative offset limits
// This would require very long identifiers (>63 bytes between segments), which is uncommon
// but we can test by creating an expression with long attribute names
let long_name = "a".repeat(64); // 64 bytes (exceeds 63-byte limit)
let long_expr_code = format!("x.{long_name}.y");
let long_expr = parse_expression(&long_expr_code).unwrap();
let long_member = MemberExpr::try_from_expr(ast::ExprRef::from(long_expr.expr())).unwrap();
// Should use Heap allocation due to large relative offset
assert!(matches!(long_member.segments, Segments::Heap(_)));
assert_eq!(long_member.num_segments(), 2);
}
}

View file

@ -1,6 +1,5 @@
use crate::semantic_index::member::{
Member, MemberExpr, MemberExprRef, MemberSegment, MemberTable, MemberTableBuilder,
ScopedMemberId,
Member, MemberExpr, MemberExprRef, MemberTable, MemberTableBuilder, ScopedMemberId,
};
use crate::semantic_index::scope::FileScopeId;
use crate::semantic_index::symbol::{ScopedSymbolId, Symbol, SymbolTable, SymbolTableBuilder};
@ -37,48 +36,14 @@ impl PlaceExpr {
/// * attribute: `x.y`
/// * subscripts with integer or string literals: `x[0]`, `x['key']`
pub(crate) fn try_from_expr<'e>(expr: impl Into<ast::ExprRef<'e>>) -> Option<Self> {
let mut current = expr.into();
let mut segments = smallvec::SmallVec::new_const();
let expr = expr.into();
loop {
match current {
ast::ExprRef::Name(name) => {
if segments.is_empty() {
return Some(PlaceExpr::Symbol(Symbol::new(name.id.clone())));
}
return Some(PlaceExpr::Member(Member::new(MemberExpr::new(
name.id.clone(),
segments,
))));
}
ast::ExprRef::Attribute(attribute) => {
segments.push(MemberSegment::Attribute(attribute.attr.id.clone()));
current = ast::ExprRef::from(&attribute.value);
}
ast::ExprRef::Subscript(subscript) => {
match &*subscript.slice {
ast::Expr::NumberLiteral(ast::ExprNumberLiteral {
value: ast::Number::Int(index),
..
}) => {
segments.push(MemberSegment::IntSubscript(index.clone()));
}
ast::Expr::StringLiteral(string) => {
segments.push(MemberSegment::StringSubscript(string.value.to_string()));
}
_ => {
return None;
}
}
current = ast::ExprRef::from(&subscript.value);
}
_ => {
return None;
}
}
if let ast::ExprRef::Name(name) = expr {
return Some(PlaceExpr::Symbol(Symbol::new(name.id.clone())));
}
let member_expression = MemberExpr::try_from_expr(expr)?;
Some(Self::Member(Member::new(member_expression)))
}
}
@ -132,7 +97,7 @@ impl<'a> PlaceExprRef<'a> {
pub(crate) fn num_member_segments(self) -> usize {
match self {
PlaceExprRef::Symbol(_) => 0,
PlaceExprRef::Member(member) => member.expression().member_segments().len(),
PlaceExprRef::Member(member) => member.expression().num_segments(),
}
}
}
@ -517,24 +482,20 @@ enum ParentPlaceIterState<'a> {
impl<'a> ParentPlaceIterState<'a> {
fn parent_state(
expression: MemberExprRef<'a>,
expression: &MemberExprRef<'a>,
symbols: &'a SymbolTable,
members: &'a MemberTable,
) -> Self {
let segments = expression.rev_member_segments();
let segments = &segments[1..];
if segments.is_empty() {
Self::Symbol {
symbol_name: expression.symbol_name(),
symbols,
}
} else {
Self::Member {
next_member: MemberExprRef::from_raw(expression.symbol_name(), segments),
match expression.parent() {
Some(parent) => Self::Member {
next_member: parent,
symbols,
members,
}
},
None => Self::Symbol {
symbol_name: expression.symbol_name(),
symbols,
},
}
}
}
@ -549,9 +510,10 @@ impl<'a> ParentPlaceIter<'a> {
symbol_table: &'a SymbolTable,
member_table: &'a MemberTable,
) -> Self {
let expr_ref = expression.as_ref();
ParentPlaceIter {
state: Some(ParentPlaceIterState::parent_state(
expression.as_ref(),
&expr_ref,
symbol_table,
member_table,
)),
@ -578,7 +540,7 @@ impl Iterator for ParentPlaceIter<'_> {
next_member,
} => {
self.state = Some(ParentPlaceIterState::parent_state(
next_member,
&next_member,
symbols,
members,
));

View file

@ -309,12 +309,12 @@ impl<'db> AllMembers<'db> {
let Some(name) = place_expr.as_instance_attribute() else {
continue;
};
let result = ty.member(db, name.as_str());
let result = ty.member(db, name);
let Some(ty) = result.place.ignore_possibly_unbound() else {
continue;
};
self.members.insert(Member {
name: name.clone(),
name: Name::new(name),
ty,
});
}