Store call paths rather than stringified names (#6102)

## Summary

Historically, we've stored "qualified names" on our
`BindingKind::Import`, `BindingKind::SubmoduleImport`, and
`BindingKind::ImportFrom` structs. In Ruff, a "qualified name" is a
dot-separated path to a symbol. For example, given `import foo.bar`, the
"qualified name" would be `"foo.bar"`; and given `from foo.bar import
baz`, the "qualified name" would be `foo.bar.baz`.

This PR modifies the `BindingKind` structs to instead store _call paths_
rather than qualified names. So in the examples above, we'd store
`["foo", "bar"]` and `["foo", "bar", "baz"]`. It turns out that this
more efficient given our data access patterns. Namely, we frequently
need to convert the qualified name to a call path (whenever we call
`resolve_call_path`), and it turns out that we do this operation enough
that those conversations show up on benchmarks.

There are a few other advantages to using call paths, rather than
qualified names:

1. The size of `BindingKind` is reduced from 32 to 24 bytes, since we no
longer need to store a `String` (only a boxed slice).
2. All three import types are more consistent, since they now all store
a boxed slice, rather than some storing an `&str` and some storing a
`String` (for `BindingKind::ImportFrom`, we needed to allocate a
`String` to create the qualified name, but the call path is a slice of
static elements that don't require that allocation).
3. A lot of code gets simpler, in part because we now do call path
resolution "earlier". Most notably, for relative imports (`from .foo
import bar`), we store the _resolved_ call path rather than the relative
call path, so the semantic model doesn't have to deal with that
resolution. (See that `resolve_call_path` is simpler, fewer branches,
etc.)

In my testing, this change improves the all-rules benchmark by another
4-5% on top of the improvements mentioned in #6047.
This commit is contained in:
Charlie Marsh 2023-08-05 11:21:50 -04:00 committed by GitHub
parent 501f537cb8
commit 76148ddb76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 449 additions and 365 deletions

View file

@ -1,11 +1,13 @@
use std::borrow::Cow;
use std::ops::{Deref, DerefMut};
use bitflags::bitflags;
use ruff_python_ast::Ranged;
use ruff_text_size::TextRange;
use ruff_index::{newtype_index, IndexSlice, IndexVec};
use ruff_python_ast::call_path::format_call_path;
use ruff_python_ast::Ranged;
use ruff_source_file::Locator;
use ruff_text_size::TextRange;
use crate::context::ExecutionContext;
use crate::model::SemanticModel;
@ -117,38 +119,38 @@ impl<'a> Binding<'a> {
// import foo.baz
// ```
BindingKind::Import(Import {
qualified_name: redefinition,
call_path: redefinition,
}) => {
if let BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) = &existing.kind
{
return redefinition == definition;
}
}
BindingKind::FromImport(FromImport {
qualified_name: redefinition,
call_path: redefinition,
}) => {
if let BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) = &existing.kind
{
return redefinition == definition;
}
}
BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: redefinition,
call_path: redefinition,
}) => match &existing.kind {
BindingKind::Import(Import {
qualified_name: definition,
call_path: definition,
})
| BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) => {
return redefinition == definition;
}
BindingKind::FromImport(FromImport {
qualified_name: definition,
call_path: definition,
}) => {
return redefinition == definition;
}
@ -175,35 +177,6 @@ impl<'a> Binding<'a> {
)
}
/// Returns the fully-qualified symbol name, if this symbol was imported from another module.
pub fn qualified_name(&self) -> Option<&str> {
match &self.kind {
BindingKind::Import(Import { qualified_name }) => Some(qualified_name),
BindingKind::FromImport(FromImport { qualified_name }) => Some(qualified_name),
BindingKind::SubmoduleImport(SubmoduleImport { qualified_name }) => {
Some(qualified_name)
}
_ => None,
}
}
/// Returns the fully-qualified name of the module from which this symbol was imported, if this
/// symbol was imported from another module.
pub fn module_name(&self) -> Option<&str> {
match &self.kind {
BindingKind::Import(Import { qualified_name })
| BindingKind::SubmoduleImport(SubmoduleImport { qualified_name }) => {
Some(qualified_name.split('.').next().unwrap_or(qualified_name))
}
BindingKind::FromImport(FromImport { qualified_name }) => Some(
qualified_name
.rsplit_once('.')
.map_or(qualified_name, |(module, _)| module),
),
_ => None,
}
}
/// Returns the name of the binding (e.g., `x` in `x = 1`).
pub fn name<'b>(&self, locator: &'b Locator) -> &'b str {
locator.slice(self.range)
@ -221,6 +194,15 @@ impl<'a> Binding<'a> {
}
})
}
pub fn as_any_import(&'a self) -> Option<AnyImport<'a>> {
match &self.kind {
BindingKind::Import(import) => Some(AnyImport::Import(import)),
BindingKind::SubmoduleImport(import) => Some(AnyImport::SubmoduleImport(import)),
BindingKind::FromImport(import) => Some(AnyImport::FromImport(import)),
_ => None,
}
}
}
bitflags! {
@ -356,18 +338,18 @@ pub struct Import<'a> {
/// The full name of the module being imported.
/// Ex) Given `import foo`, `qualified_name` would be "foo".
/// Ex) Given `import foo as bar`, `qualified_name` would be "foo".
pub qualified_name: &'a str,
pub call_path: Box<[&'a str]>,
}
/// A binding for a member imported from a module, keyed on the name to which the member is bound.
/// Ex) `from foo import bar` would be keyed on "bar".
/// Ex) `from foo import bar as baz` would be keyed on "baz".
#[derive(Debug, Clone)]
pub struct FromImport {
pub struct FromImport<'a> {
/// The full name of the member being imported.
/// Ex) Given `from foo import bar`, `qualified_name` would be "foo.bar".
/// Ex) Given `from foo import bar as baz`, `qualified_name` would be "foo.bar".
pub qualified_name: String,
pub call_path: Box<[&'a str]>,
}
/// A binding for a submodule imported from a module, keyed on the name of the parent module.
@ -376,7 +358,7 @@ pub struct FromImport {
pub struct SubmoduleImport<'a> {
/// The full name of the submodule being imported.
/// Ex) Given `import foo.bar`, `qualified_name` would be "foo.bar".
pub qualified_name: &'a str,
pub call_path: Box<[&'a str]>,
}
#[derive(Debug, Clone, is_macro::Is)]
@ -485,7 +467,7 @@ pub enum BindingKind<'a> {
/// ```python
/// from foo import bar
/// ```
FromImport(FromImport),
FromImport(FromImport<'a>),
/// A binding for a submodule imported from a module, like `bar` in:
/// ```python
@ -532,3 +514,106 @@ bitflags! {
const IMPORT_ERROR = 0b0000_0100;
}
}
/// A trait for imported symbols.
pub trait Imported<'a> {
/// Returns the call path to the imported symbol.
fn call_path(&self) -> &[&str];
/// Returns the module name of the imported symbol.
fn module_name(&self) -> &[&str];
/// Returns the member name of the imported symbol. For a straight import, this is equivalent
/// to the qualified name; for a `from` import, this is the name of the imported symbol.
fn member_name(&self) -> Cow<'a, str>;
/// Returns the fully-qualified name of the imported symbol.
fn qualified_name(&self) -> String {
format_call_path(self.call_path())
}
}
impl<'a> Imported<'a> for Import<'a> {
/// For example, given `import foo`, returns `["foo"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `import foo`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..1]
}
/// For example, given `import foo`, returns `"foo"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Owned(self.qualified_name())
}
}
impl<'a> Imported<'a> for SubmoduleImport<'a> {
/// For example, given `import foo.bar`, returns `["foo", "bar"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `import foo.bar`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..1]
}
/// For example, given `import foo.bar`, returns `"foo.bar"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Owned(self.qualified_name())
}
}
impl<'a> Imported<'a> for FromImport<'a> {
/// For example, given `from foo import bar`, returns `["foo", "bar"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `from foo import bar`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..self.call_path.len() - 1]
}
/// For example, given `from foo import bar`, returns `"bar"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Borrowed(self.call_path[self.call_path.len() - 1])
}
}
/// A wrapper around an import [`BindingKind`] that can be any of the three types of imports.
#[derive(Debug, Clone)]
pub enum AnyImport<'a> {
Import(&'a Import<'a>),
SubmoduleImport(&'a SubmoduleImport<'a>),
FromImport(&'a FromImport<'a>),
}
impl<'a> Imported<'a> for AnyImport<'a> {
fn call_path(&self) -> &[&str] {
match self {
Self::Import(import) => import.call_path(),
Self::SubmoduleImport(import) => import.call_path(),
Self::FromImport(import) => import.call_path(),
}
}
fn module_name(&self) -> &[&str] {
match self {
Self::Import(import) => import.module_name(),
Self::SubmoduleImport(import) => import.module_name(),
Self::FromImport(import) => import.module_name(),
}
}
fn member_name(&self) -> Cow<'a, str> {
match self {
Self::Import(import) => import.member_name(),
Self::SubmoduleImport(import) => import.member_name(),
Self::FromImport(import) => import.member_name(),
}
}
}