Store call paths rather than stringified names (#6102)

## Summary

Historically, we've stored "qualified names" on our
`BindingKind::Import`, `BindingKind::SubmoduleImport`, and
`BindingKind::ImportFrom` structs. In Ruff, a "qualified name" is a
dot-separated path to a symbol. For example, given `import foo.bar`, the
"qualified name" would be `"foo.bar"`; and given `from foo.bar import
baz`, the "qualified name" would be `foo.bar.baz`.

This PR modifies the `BindingKind` structs to instead store _call paths_
rather than qualified names. So in the examples above, we'd store
`["foo", "bar"]` and `["foo", "bar", "baz"]`. It turns out that this
more efficient given our data access patterns. Namely, we frequently
need to convert the qualified name to a call path (whenever we call
`resolve_call_path`), and it turns out that we do this operation enough
that those conversations show up on benchmarks.

There are a few other advantages to using call paths, rather than
qualified names:

1. The size of `BindingKind` is reduced from 32 to 24 bytes, since we no
longer need to store a `String` (only a boxed slice).
2. All three import types are more consistent, since they now all store
a boxed slice, rather than some storing an `&str` and some storing a
`String` (for `BindingKind::ImportFrom`, we needed to allocate a
`String` to create the qualified name, but the call path is a slice of
static elements that don't require that allocation).
3. A lot of code gets simpler, in part because we now do call path
resolution "earlier". Most notably, for relative imports (`from .foo
import bar`), we store the _resolved_ call path rather than the relative
call path, so the semantic model doesn't have to deal with that
resolution. (See that `resolve_call_path` is simpler, fewer branches,
etc.)

In my testing, this change improves the all-rules benchmark by another
4-5% on top of the improvements mentioned in #6047.
This commit is contained in:
Charlie Marsh 2023-08-05 11:21:50 -04:00 committed by GitHub
parent 501f537cb8
commit 76148ddb76
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 449 additions and 365 deletions

View file

@ -1,11 +1,13 @@
use std::borrow::Cow;
use std::ops::{Deref, DerefMut};
use bitflags::bitflags;
use ruff_python_ast::Ranged;
use ruff_text_size::TextRange;
use ruff_index::{newtype_index, IndexSlice, IndexVec};
use ruff_python_ast::call_path::format_call_path;
use ruff_python_ast::Ranged;
use ruff_source_file::Locator;
use ruff_text_size::TextRange;
use crate::context::ExecutionContext;
use crate::model::SemanticModel;
@ -117,38 +119,38 @@ impl<'a> Binding<'a> {
// import foo.baz
// ```
BindingKind::Import(Import {
qualified_name: redefinition,
call_path: redefinition,
}) => {
if let BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) = &existing.kind
{
return redefinition == definition;
}
}
BindingKind::FromImport(FromImport {
qualified_name: redefinition,
call_path: redefinition,
}) => {
if let BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) = &existing.kind
{
return redefinition == definition;
}
}
BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: redefinition,
call_path: redefinition,
}) => match &existing.kind {
BindingKind::Import(Import {
qualified_name: definition,
call_path: definition,
})
| BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: definition,
call_path: definition,
}) => {
return redefinition == definition;
}
BindingKind::FromImport(FromImport {
qualified_name: definition,
call_path: definition,
}) => {
return redefinition == definition;
}
@ -175,35 +177,6 @@ impl<'a> Binding<'a> {
)
}
/// Returns the fully-qualified symbol name, if this symbol was imported from another module.
pub fn qualified_name(&self) -> Option<&str> {
match &self.kind {
BindingKind::Import(Import { qualified_name }) => Some(qualified_name),
BindingKind::FromImport(FromImport { qualified_name }) => Some(qualified_name),
BindingKind::SubmoduleImport(SubmoduleImport { qualified_name }) => {
Some(qualified_name)
}
_ => None,
}
}
/// Returns the fully-qualified name of the module from which this symbol was imported, if this
/// symbol was imported from another module.
pub fn module_name(&self) -> Option<&str> {
match &self.kind {
BindingKind::Import(Import { qualified_name })
| BindingKind::SubmoduleImport(SubmoduleImport { qualified_name }) => {
Some(qualified_name.split('.').next().unwrap_or(qualified_name))
}
BindingKind::FromImport(FromImport { qualified_name }) => Some(
qualified_name
.rsplit_once('.')
.map_or(qualified_name, |(module, _)| module),
),
_ => None,
}
}
/// Returns the name of the binding (e.g., `x` in `x = 1`).
pub fn name<'b>(&self, locator: &'b Locator) -> &'b str {
locator.slice(self.range)
@ -221,6 +194,15 @@ impl<'a> Binding<'a> {
}
})
}
pub fn as_any_import(&'a self) -> Option<AnyImport<'a>> {
match &self.kind {
BindingKind::Import(import) => Some(AnyImport::Import(import)),
BindingKind::SubmoduleImport(import) => Some(AnyImport::SubmoduleImport(import)),
BindingKind::FromImport(import) => Some(AnyImport::FromImport(import)),
_ => None,
}
}
}
bitflags! {
@ -356,18 +338,18 @@ pub struct Import<'a> {
/// The full name of the module being imported.
/// Ex) Given `import foo`, `qualified_name` would be "foo".
/// Ex) Given `import foo as bar`, `qualified_name` would be "foo".
pub qualified_name: &'a str,
pub call_path: Box<[&'a str]>,
}
/// A binding for a member imported from a module, keyed on the name to which the member is bound.
/// Ex) `from foo import bar` would be keyed on "bar".
/// Ex) `from foo import bar as baz` would be keyed on "baz".
#[derive(Debug, Clone)]
pub struct FromImport {
pub struct FromImport<'a> {
/// The full name of the member being imported.
/// Ex) Given `from foo import bar`, `qualified_name` would be "foo.bar".
/// Ex) Given `from foo import bar as baz`, `qualified_name` would be "foo.bar".
pub qualified_name: String,
pub call_path: Box<[&'a str]>,
}
/// A binding for a submodule imported from a module, keyed on the name of the parent module.
@ -376,7 +358,7 @@ pub struct FromImport {
pub struct SubmoduleImport<'a> {
/// The full name of the submodule being imported.
/// Ex) Given `import foo.bar`, `qualified_name` would be "foo.bar".
pub qualified_name: &'a str,
pub call_path: Box<[&'a str]>,
}
#[derive(Debug, Clone, is_macro::Is)]
@ -485,7 +467,7 @@ pub enum BindingKind<'a> {
/// ```python
/// from foo import bar
/// ```
FromImport(FromImport),
FromImport(FromImport<'a>),
/// A binding for a submodule imported from a module, like `bar` in:
/// ```python
@ -532,3 +514,106 @@ bitflags! {
const IMPORT_ERROR = 0b0000_0100;
}
}
/// A trait for imported symbols.
pub trait Imported<'a> {
/// Returns the call path to the imported symbol.
fn call_path(&self) -> &[&str];
/// Returns the module name of the imported symbol.
fn module_name(&self) -> &[&str];
/// Returns the member name of the imported symbol. For a straight import, this is equivalent
/// to the qualified name; for a `from` import, this is the name of the imported symbol.
fn member_name(&self) -> Cow<'a, str>;
/// Returns the fully-qualified name of the imported symbol.
fn qualified_name(&self) -> String {
format_call_path(self.call_path())
}
}
impl<'a> Imported<'a> for Import<'a> {
/// For example, given `import foo`, returns `["foo"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `import foo`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..1]
}
/// For example, given `import foo`, returns `"foo"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Owned(self.qualified_name())
}
}
impl<'a> Imported<'a> for SubmoduleImport<'a> {
/// For example, given `import foo.bar`, returns `["foo", "bar"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `import foo.bar`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..1]
}
/// For example, given `import foo.bar`, returns `"foo.bar"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Owned(self.qualified_name())
}
}
impl<'a> Imported<'a> for FromImport<'a> {
/// For example, given `from foo import bar`, returns `["foo", "bar"]`.
fn call_path(&self) -> &[&str] {
self.call_path.as_ref()
}
/// For example, given `from foo import bar`, returns `["foo"]`.
fn module_name(&self) -> &[&str] {
&self.call_path[..self.call_path.len() - 1]
}
/// For example, given `from foo import bar`, returns `"bar"`.
fn member_name(&self) -> Cow<'a, str> {
Cow::Borrowed(self.call_path[self.call_path.len() - 1])
}
}
/// A wrapper around an import [`BindingKind`] that can be any of the three types of imports.
#[derive(Debug, Clone)]
pub enum AnyImport<'a> {
Import(&'a Import<'a>),
SubmoduleImport(&'a SubmoduleImport<'a>),
FromImport(&'a FromImport<'a>),
}
impl<'a> Imported<'a> for AnyImport<'a> {
fn call_path(&self) -> &[&str] {
match self {
Self::Import(import) => import.call_path(),
Self::SubmoduleImport(import) => import.call_path(),
Self::FromImport(import) => import.call_path(),
}
}
fn module_name(&self) -> &[&str] {
match self {
Self::Import(import) => import.module_name(),
Self::SubmoduleImport(import) => import.module_name(),
Self::FromImport(import) => import.module_name(),
}
}
fn member_name(&self) -> Cow<'a, str> {
match self {
Self::Import(import) => import.member_name(),
Self::SubmoduleImport(import) => import.member_name(),
Self::FromImport(import) => import.member_name(),
}
}
}

View file

@ -1,15 +1,15 @@
use std::path::Path;
use bitflags::bitflags;
use ruff_python_ast::{self as ast, Expr, Ranged, Stmt};
use ruff_text_size::{TextRange, TextSize};
use rustc_hash::FxHashMap;
use smallvec::smallvec;
use ruff_python_ast::call_path::{collect_call_path, from_unqualified_name, CallPath};
use ruff_python_ast::helpers::from_relative_import;
use ruff_python_ast::{self as ast, Expr, Ranged, Stmt};
use ruff_python_stdlib::path::is_python_stub_file;
use ruff_python_stdlib::typing::is_typing_extension;
use ruff_text_size::{TextRange, TextSize};
use crate::binding::{
Binding, BindingFlags, BindingId, BindingKind, Bindings, Exceptions, FromImport, Import,
@ -23,7 +23,7 @@ use crate::reference::{
ResolvedReference, ResolvedReferenceId, ResolvedReferences, UnresolvedReferences,
};
use crate::scope::{Scope, ScopeId, ScopeKind, Scopes};
use crate::{UnresolvedReference, UnresolvedReferenceFlags};
use crate::{Imported, UnresolvedReference, UnresolvedReferenceFlags};
/// A semantic model for a Python module, to enable querying the module's semantic information.
pub struct SemanticModel<'a> {
@ -583,17 +583,14 @@ impl<'a> SemanticModel<'a> {
// import pyarrow.csv
// print(pa.csv.read_csv("test.csv"))
// ```
let qualified_name = self.bindings[binding_id].qualified_name()?;
let has_alias = qualified_name
.split('.')
.last()
.map(|segment| segment != symbol)
.unwrap_or_default();
if !has_alias {
let import = self.bindings[binding_id].as_any_import()?;
let call_path = import.call_path();
let segment = call_path.last()?;
if *segment == symbol {
return None;
}
let binding_id = self.scopes[scope_id].get(qualified_name)?;
let binding_id = self.scopes[scope_id].get(segment)?;
if !self.bindings[binding_id].kind.is_submodule_import() {
return None;
}
@ -625,53 +622,41 @@ impl<'a> SemanticModel<'a> {
// If the name was already resolved, look it up; otherwise, search for the symbol.
let head = match_head(value)?;
let binding = self
.resolved_names
.get(&head.into())
.map(|id| self.binding(*id))
.or_else(|| self.find_binding(&head.id))?;
let binding = if let Some(id) = self.resolved_names.get(&head.into()) {
self.binding(*id)
} else {
self.find_binding(&head.id)?
};
match &binding.kind {
BindingKind::Import(Import {
qualified_name: name,
}) => {
let call_path = collect_call_path(value)?;
let (_, tail) = call_path.split_first()?;
let mut source_path: CallPath = from_unqualified_name(name);
source_path.extend_from_slice(tail);
Some(source_path)
BindingKind::Import(Import { call_path }) => {
let value_path = collect_call_path(value)?;
let (_, tail) = value_path.split_first()?;
let resolved: CallPath = call_path.iter().chain(tail.iter()).copied().collect();
Some(resolved)
}
BindingKind::SubmoduleImport(SubmoduleImport {
qualified_name: name,
}) => {
let call_path = collect_call_path(value)?;
let (_, tail) = call_path.split_first()?;
let name = name.split('.').next().unwrap_or(name);
let mut source_path: CallPath = from_unqualified_name(name);
source_path.extend_from_slice(tail);
Some(source_path)
BindingKind::SubmoduleImport(SubmoduleImport { call_path }) => {
let value_path = collect_call_path(value)?;
let (_, tail) = value_path.split_first()?;
let resolved: CallPath = call_path
.iter()
.take(1)
.chain(tail.iter())
.copied()
.collect();
Some(resolved)
}
BindingKind::FromImport(FromImport {
qualified_name: name,
}) => {
let call_path = collect_call_path(value)?;
let (_, tail) = call_path.split_first()?;
BindingKind::FromImport(FromImport { call_path }) => {
let value_path = collect_call_path(value)?;
let (_, tail) = value_path.split_first()?;
if name.starts_with('.') {
let mut source_path = from_relative_import(self.module_path?, name);
if source_path.is_empty() {
None
let resolved: CallPath =
if call_path.first().map_or(false, |segment| *segment == ".") {
from_relative_import(self.module_path?, call_path, tail)?
} else {
source_path.extend_from_slice(tail);
Some(source_path)
}
} else {
let mut source_path: CallPath = from_unqualified_name(name);
source_path.extend_from_slice(tail);
Some(source_path)
}
call_path.iter().chain(tail.iter()).copied().collect()
};
Some(resolved)
}
BindingKind::Builtin => Some(smallvec!["", head.id.as_str()]),
_ => None,
@ -695,6 +680,8 @@ impl<'a> SemanticModel<'a> {
module: &str,
member: &str,
) -> Option<ImportedName> {
// TODO(charlie): Pass in a slice.
let module_path: Vec<&str> = module.split('.').collect();
self.scopes().enumerate().find_map(|(scope_index, scope)| {
scope.bindings().find_map(|(name, binding_id)| {
let binding = &self.bindings[binding_id];
@ -702,8 +689,8 @@ impl<'a> SemanticModel<'a> {
// Ex) Given `module="sys"` and `object="exit"`:
// `import sys` -> `sys.exit`
// `import sys as sys2` -> `sys2.exit`
BindingKind::Import(Import { qualified_name }) => {
if qualified_name == &module {
BindingKind::Import(Import { call_path }) => {
if call_path.as_ref() == module_path.as_slice() {
if let Some(source) = binding.source {
// Verify that `sys` isn't bound in an inner scope.
if self
@ -723,10 +710,9 @@ impl<'a> SemanticModel<'a> {
// Ex) Given `module="os.path"` and `object="join"`:
// `from os.path import join` -> `join`
// `from os.path import join as join2` -> `join2`
BindingKind::FromImport(FromImport { qualified_name }) => {
if let Some((target_module, target_member)) = qualified_name.split_once('.')
{
if target_module == module && target_member == member {
BindingKind::FromImport(FromImport { call_path }) => {
if let Some((target_member, target_module)) = call_path.split_last() {
if target_module == module_path.as_slice() && target_member == &member {
if let Some(source) = binding.source {
// Verify that `join` isn't bound in an inner scope.
if self