ruff/crates/ruff_python_ast/src/call_path.rs
Charlie Marsh 76148ddb76
Store call paths rather than stringified names (#6102)
## Summary

Historically, we've stored "qualified names" on our
`BindingKind::Import`, `BindingKind::SubmoduleImport`, and
`BindingKind::ImportFrom` structs. In Ruff, a "qualified name" is a
dot-separated path to a symbol. For example, given `import foo.bar`, the
"qualified name" would be `"foo.bar"`; and given `from foo.bar import
baz`, the "qualified name" would be `foo.bar.baz`.

This PR modifies the `BindingKind` structs to instead store _call paths_
rather than qualified names. So in the examples above, we'd store
`["foo", "bar"]` and `["foo", "bar", "baz"]`. It turns out that this
more efficient given our data access patterns. Namely, we frequently
need to convert the qualified name to a call path (whenever we call
`resolve_call_path`), and it turns out that we do this operation enough
that those conversations show up on benchmarks.

There are a few other advantages to using call paths, rather than
qualified names:

1. The size of `BindingKind` is reduced from 32 to 24 bytes, since we no
longer need to store a `String` (only a boxed slice).
2. All three import types are more consistent, since they now all store
a boxed slice, rather than some storing an `&str` and some storing a
`String` (for `BindingKind::ImportFrom`, we needed to allocate a
`String` to create the qualified name, but the call path is a slice of
static elements that don't require that allocation).
3. A lot of code gets simpler, in part because we now do call path
resolution "earlier". Most notably, for relative imports (`from .foo
import bar`), we store the _resolved_ call path rather than the relative
call path, so the semantic model doesn't have to deal with that
resolution. (See that `resolve_call_path` is simpler, fewer branches,
etc.)

In my testing, this change improves the all-rules benchmark by another
4-5% on top of the improvements mentioned in #6047.
2023-08-05 15:21:50 +00:00

204 lines
6.4 KiB
Rust

use smallvec::{smallvec, SmallVec};
use crate::{nodes, Expr};
/// A representation of a qualified name, like `typing.List`.
pub type CallPath<'a> = SmallVec<[&'a str; 8]>;
/// Convert an `Expr` to its [`CallPath`] segments (like `["typing", "List"]`).
pub fn collect_call_path(expr: &Expr) -> Option<CallPath> {
// Unroll the loop up to eight times, to match the maximum number of expected attributes.
// In practice, unrolling appears to give about a 4x speed-up on this hot path.
let attr1 = match expr {
Expr::Attribute(attr1) => attr1,
// Ex) `foo`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[id.as_str()]))
}
_ => return None,
};
let attr2 = match attr1.value.as_ref() {
Expr::Attribute(attr2) => attr2,
// Ex) `foo.bar`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[id.as_str(), attr1.attr.as_str()]))
}
_ => return None,
};
let attr3 = match attr2.value.as_ref() {
Expr::Attribute(attr3) => attr3,
// Ex) `foo.bar.baz`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
let attr4 = match attr3.value.as_ref() {
Expr::Attribute(attr4) => attr4,
// Ex) `foo.bar.baz.bop`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
let attr5 = match attr4.value.as_ref() {
Expr::Attribute(attr5) => attr5,
// Ex) `foo.bar.baz.bop.bap`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr4.attr.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
let attr6 = match attr5.value.as_ref() {
Expr::Attribute(attr6) => attr6,
// Ex) `foo.bar.baz.bop.bap.bab`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr5.attr.as_str(),
attr4.attr.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
let attr7 = match attr6.value.as_ref() {
Expr::Attribute(attr7) => attr7,
// Ex) `foo.bar.baz.bop.bap.bab.bob`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr6.attr.as_str(),
attr5.attr.as_str(),
attr4.attr.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
let attr8 = match attr7.value.as_ref() {
Expr::Attribute(attr8) => attr8,
// Ex) `foo.bar.baz.bop.bap.bab.bob.bib`
Expr::Name(nodes::ExprName { id, .. }) => {
return Some(CallPath::from_slice(&[
id.as_str(),
attr7.attr.as_str(),
attr6.attr.as_str(),
attr5.attr.as_str(),
attr4.attr.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]));
}
_ => return None,
};
collect_call_path(&attr8.value).map(|mut segments| {
segments.extend([
attr8.attr.as_str(),
attr7.attr.as_str(),
attr6.attr.as_str(),
attr5.attr.as_str(),
attr4.attr.as_str(),
attr3.attr.as_str(),
attr2.attr.as_str(),
attr1.attr.as_str(),
]);
segments
})
}
/// Convert an `Expr` to its call path (like `List`, or `typing.List`).
pub fn compose_call_path(expr: &Expr) -> Option<String> {
collect_call_path(expr).map(|call_path| format_call_path(&call_path))
}
/// Format a call path for display.
pub fn format_call_path(call_path: &[&str]) -> String {
if call_path.first().map_or(false, |first| first.is_empty()) {
// If the first segment is empty, the `CallPath` is that of a builtin.
// Ex) `["", "bool"]` -> `"bool"`
call_path[1..].join(".")
} else if call_path
.first()
.map_or(false, |first| matches!(*first, "."))
{
// If the call path is dot-prefixed, it's an unresolved relative import.
// Ex) `[".foo", "bar"]` -> `".foo.bar"`
let mut formatted = String::new();
let mut iter = call_path.iter();
for segment in iter.by_ref() {
if *segment == "." {
formatted.push('.');
} else {
formatted.push_str(segment);
break;
}
}
for segment in iter {
formatted.push('.');
formatted.push_str(segment);
}
formatted
} else {
call_path.join(".")
}
}
/// Create a [`CallPath`] from an unqualified name.
///
/// ```rust
/// # use smallvec::smallvec;
/// # use ruff_python_ast::call_path::from_unqualified_name;
///
/// assert_eq!(from_unqualified_name("typing.List").as_slice(), ["typing", "List"]);
/// assert_eq!(from_unqualified_name("list").as_slice(), ["list"]);
/// ```
pub fn from_unqualified_name(name: &str) -> CallPath {
name.split('.').collect()
}
/// Create a [`CallPath`] from a fully-qualified name.
///
/// ```rust
/// # use smallvec::smallvec;
/// # use ruff_python_ast::call_path::from_qualified_name;
///
/// assert_eq!(from_qualified_name("typing.List").as_slice(), ["typing", "List"]);
/// assert_eq!(from_qualified_name("list").as_slice(), ["", "list"]);
/// ```
pub fn from_qualified_name(name: &str) -> CallPath {
if name.contains('.') {
name.split('.').collect()
} else {
// Special-case: for builtins, return `["", "int"]` instead of `["int"]`.
smallvec!["", name]
}
}