Scope and Binding IDs (#3572)

This commit is contained in:
Micha Reiser 2023-03-17 17:12:27 +01:00 committed by GitHub
parent 33d2457909
commit 92179e6369
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
22 changed files with 404 additions and 250 deletions

View file

@ -1,6 +1,7 @@
use std::ops::{Deref, Index, IndexMut};
use std::path::Path;
use nohash_hasher::IntMap;
use nohash_hasher::{BuildNoHashHasher, IntMap};
use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Expr, Stmt};
use smallvec::smallvec;
@ -9,7 +10,10 @@ use ruff_python_stdlib::path::is_python_stub_file;
use ruff_python_stdlib::typing::TYPING_EXTENSIONS;
use crate::helpers::{collect_call_path, from_relative_import, Exceptions};
use crate::types::{Binding, BindingKind, CallPath, ExecutionContext, RefEquality, Scope};
use crate::types::{
Binding, BindingId, BindingKind, CallPath, ExecutionContext, RefEquality, Scope, ScopeId,
ScopeKind,
};
use crate::visibility::{module_visibility, Modifier, VisibleScope};
#[allow(clippy::struct_excessive_bools)]
@ -22,13 +26,14 @@ pub struct Context<'a> {
pub depths: FxHashMap<RefEquality<'a, Stmt>, usize>,
pub child_to_parent: FxHashMap<RefEquality<'a, Stmt>, RefEquality<'a, Stmt>>,
// A stack of all bindings created in any scope, at any point in execution.
pub bindings: Vec<Binding<'a>>,
pub bindings: Bindings<'a>,
// Map from binding index to indexes of bindings that redefine it in other scopes.
pub redefinitions: IntMap<usize, Vec<usize>>,
pub redefinitions:
std::collections::HashMap<BindingId, Vec<BindingId>, BuildNoHashHasher<BindingId>>,
pub exprs: Vec<RefEquality<'a, Expr>>,
pub scopes: Vec<Scope<'a>>,
pub scope_stack: Vec<usize>,
pub dead_scopes: Vec<(usize, Vec<usize>)>,
pub scopes: Scopes<'a>,
pub scope_stack: ScopeStack,
pub dead_scopes: Vec<(ScopeId, ScopeStack)>,
// Body iteration; used to peek at siblings.
pub body: &'a [Stmt],
pub body_index: usize,
@ -60,11 +65,11 @@ impl<'a> Context<'a> {
parents: Vec::default(),
depths: FxHashMap::default(),
child_to_parent: FxHashMap::default(),
bindings: Vec::default(),
bindings: Bindings::default(),
redefinitions: IntMap::default(),
exprs: Vec::default(),
scopes: Vec::default(),
scope_stack: Vec::default(),
scopes: Scopes::default(),
scope_stack: ScopeStack::default(),
dead_scopes: Vec::default(),
body: &[],
body_index: 0,
@ -119,7 +124,7 @@ impl<'a> Context<'a> {
/// Return the current `Binding` for a given `name`.
pub fn find_binding(&self, member: &str) -> Option<&Binding> {
self.current_scopes()
self.scopes()
.find_map(|scope| scope.bindings.get(member))
.map(|index| &self.bindings[*index])
}
@ -217,9 +222,10 @@ impl<'a> Context<'a> {
.expect("Attempted to pop without expression");
}
pub fn push_scope(&mut self, scope: Scope<'a>) {
self.scope_stack.push(self.scopes.len());
self.scopes.push(scope);
pub fn push_scope(&mut self, kind: ScopeKind<'a>) -> ScopeId {
let id = self.scopes.push_scope(kind);
self.scope_stack.push(id);
id
}
pub fn pop_scope(&mut self) {
@ -261,23 +267,41 @@ impl<'a> Context<'a> {
self.body.get(self.body_index + 1)
}
pub fn current_scope(&self) -> &Scope {
&self.scopes[*(self.scope_stack.last().expect("No current scope found"))]
/// Returns a reference to the global scope
pub fn global_scope(&self) -> &Scope<'a> {
self.scopes.global()
}
pub fn current_scope_parent(&self) -> Option<&Scope> {
/// Returns a mutable reference to the global scope
pub fn global_scope_mut(&mut self) -> &mut Scope<'a> {
self.scopes.global_mut()
}
/// Returns the current top most scope.
pub fn scope(&self) -> &Scope<'a> {
&self.scopes[self.scope_stack.top().expect("No current scope found")]
}
/// Returns the id of the top-most scope
pub fn scope_id(&self) -> ScopeId {
self.scope_stack.top().expect("No current scope found")
}
/// Returns a mutable reference to the current top most scope.
pub fn scope_mut(&mut self) -> &mut Scope<'a> {
let top_id = self.scope_stack.top().expect("No current scope found");
&mut self.scopes[top_id]
}
pub fn parent_scope(&self) -> Option<&Scope> {
self.scope_stack
.iter()
.rev()
.nth(1)
.map(|index| &self.scopes[*index])
}
pub fn current_scopes(&self) -> impl Iterator<Item = &Scope> {
self.scope_stack
.iter()
.rev()
.map(|index| &self.scopes[*index])
pub fn scopes(&self) -> impl Iterator<Item = &Scope> {
self.scope_stack.iter().map(|index| &self.scopes[*index])
}
pub const fn in_exception_handler(&self) -> bool {
@ -295,3 +319,132 @@ impl<'a> Context<'a> {
}
}
}
/// The scopes of a program indexed by [`ScopeId`]
#[derive(Debug)]
pub struct Scopes<'a>(Vec<Scope<'a>>);
impl<'a> Scopes<'a> {
/// Returns a reference to the global scope
pub fn global(&self) -> &Scope<'a> {
&self[ScopeId::global()]
}
/// Returns a mutable reference to the global scope
pub fn global_mut(&mut self) -> &mut Scope<'a> {
&mut self[ScopeId::global()]
}
/// Pushes a new scope and returns its unique id
fn push_scope(&mut self, kind: ScopeKind<'a>) -> ScopeId {
let next_id = ScopeId::try_from(self.0.len()).unwrap();
self.0.push(Scope::local(next_id, kind));
next_id
}
}
impl Default for Scopes<'_> {
fn default() -> Self {
Self(vec![Scope::global(ScopeKind::Module)])
}
}
impl<'a> Index<ScopeId> for Scopes<'a> {
type Output = Scope<'a>;
fn index(&self, index: ScopeId) -> &Self::Output {
&self.0[usize::from(index)]
}
}
impl<'a> IndexMut<ScopeId> for Scopes<'a> {
fn index_mut(&mut self, index: ScopeId) -> &mut Self::Output {
&mut self.0[usize::from(index)]
}
}
impl<'a> Deref for Scopes<'a> {
type Target = [Scope<'a>];
fn deref(&self) -> &Self::Target {
&self.0
}
}
#[derive(Debug, Clone)]
pub struct ScopeStack(Vec<ScopeId>);
impl ScopeStack {
/// Pushes a new scope on the stack
pub fn push(&mut self, id: ScopeId) {
self.0.push(id);
}
/// Pops the top most scope
pub fn pop(&mut self) -> Option<ScopeId> {
self.0.pop()
}
/// Returns the id of the top-most
pub fn top(&self) -> Option<ScopeId> {
self.0.last().copied()
}
/// Returns an iterator from the current scope to the top scope (reverse iterator)
pub fn iter(&self) -> std::iter::Rev<std::slice::Iter<ScopeId>> {
self.0.iter().rev()
}
}
impl Default for ScopeStack {
fn default() -> Self {
Self(vec![ScopeId::global()])
}
}
/// The bindings in a program.
///
/// Bindings are indexed by [`BindingId`]
#[derive(Debug, Clone, Default)]
pub struct Bindings<'a>(Vec<Binding<'a>>);
impl<'a> Bindings<'a> {
/// Pushes a new binding and returns its id
pub fn push(&mut self, binding: Binding<'a>) -> BindingId {
let id = self.next_id();
self.0.push(binding);
id
}
/// Returns the id that will be assigned when pushing the next binding
pub fn next_id(&self) -> BindingId {
BindingId::try_from(self.0.len()).unwrap()
}
}
impl<'a> Index<BindingId> for Bindings<'a> {
type Output = Binding<'a>;
fn index(&self, index: BindingId) -> &Self::Output {
&self.0[usize::from(index)]
}
}
impl<'a> IndexMut<BindingId> for Bindings<'a> {
fn index_mut(&mut self, index: BindingId) -> &mut Self::Output {
&mut self.0[usize::from(index)]
}
}
impl<'a> Deref for Bindings<'a> {
type Target = [Binding<'a>];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl<'a> FromIterator<Binding<'a>> for Bindings<'a> {
fn from_iter<T: IntoIterator<Item = Binding<'a>>>(iter: T) -> Self {
Self(Vec::from_iter(iter))
}
}

View file

@ -1,14 +1,9 @@
use std::num::TryFromIntError;
use std::ops::Deref;
use std::sync::atomic::{AtomicUsize, Ordering};
use rustc_hash::FxHashMap;
use rustpython_parser::ast::{Arguments, Expr, Keyword, Located, Location, Stmt};
fn id() -> usize {
static COUNTER: AtomicUsize = AtomicUsize::new(1);
COUNTER.fetch_add(1, Ordering::Relaxed)
}
#[derive(Clone)]
pub enum Node<'a> {
Stmt(&'a Stmt),
@ -84,24 +79,63 @@ pub enum ScopeKind<'a> {
Lambda(Lambda<'a>),
}
/// Id uniquely identifying a scope in a program.
///
/// Using a `u32` is sufficient because Ruff only supports parsing documents with a size of max `u32::max`
/// and it is impossible to have more scopes than characters in the file (because defining a function or class
/// requires more than one character).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Hash, Ord, PartialOrd)]
pub struct ScopeId(u32);
impl ScopeId {
/// Returns the ID for the global scope
#[inline]
pub const fn global() -> Self {
ScopeId(0)
}
/// Returns `true` if this is the id of the global scope
pub const fn is_global(&self) -> bool {
self.0 == 0
}
}
impl TryFrom<usize> for ScopeId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(u32::try_from(value)?))
}
}
impl From<ScopeId> for usize {
fn from(value: ScopeId) -> Self {
value.0 as usize
}
}
#[derive(Debug)]
pub struct Scope<'a> {
pub id: usize,
pub id: ScopeId,
pub kind: ScopeKind<'a>,
pub import_starred: bool,
pub uses_locals: bool,
/// A map from bound name to binding index, for live bindings.
pub bindings: FxHashMap<&'a str, usize>,
pub bindings: FxHashMap<&'a str, BindingId>,
/// A map from bound name to binding index, for bindings that were created
/// in the scope but rebound (and thus overridden) later on in the same
/// scope.
pub rebounds: FxHashMap<&'a str, Vec<usize>>,
pub rebounds: FxHashMap<&'a str, Vec<BindingId>>,
}
impl<'a> Scope<'a> {
pub fn new(kind: ScopeKind<'a>) -> Self {
pub fn global(kind: ScopeKind<'a>) -> Self {
Self::local(ScopeId::global(), kind)
}
pub fn local(id: ScopeId, kind: ScopeKind<'a>) -> Self {
Scope {
id: id(),
id,
kind,
import_starred: false,
uses_locals: false,
@ -148,6 +182,30 @@ pub enum BindingKind<'a> {
SubmoduleImportation(&'a str, &'a str),
}
/// ID uniquely identifying a [Binding] in a program.
///
/// Using a `u32` to identify [Binding]s should is sufficient because Ruff only supports documents with a
/// size smaller than or equal to `u32::max`. A document with the size of `u32::max` must have fewer than `u32::max`
/// bindings because bindings must be separated by whitespace (and have an assignment).
#[derive(Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
pub struct BindingId(u32);
impl From<BindingId> for usize {
fn from(value: BindingId) -> Self {
value.0 as usize
}
}
impl TryFrom<usize> for BindingId {
type Error = TryFromIntError;
fn try_from(value: usize) -> Result<Self, Self::Error> {
Ok(Self(u32::try_from(value)?))
}
}
impl nohash_hasher::IsEnabled for BindingId {}
#[derive(Debug, Clone)]
pub struct Binding<'a> {
pub kind: BindingKind<'a>,
@ -158,15 +216,15 @@ pub struct Binding<'a> {
pub source: Option<RefEquality<'a, Stmt>>,
/// Tuple of (scope index, range) indicating the scope and range at which
/// the binding was last used in a runtime context.
pub runtime_usage: Option<(usize, Range)>,
pub runtime_usage: Option<(ScopeId, Range)>,
/// Tuple of (scope index, range) indicating the scope and range at which
/// the binding was last used in a typing-time context.
pub typing_usage: Option<(usize, Range)>,
pub typing_usage: Option<(ScopeId, Range)>,
/// Tuple of (scope index, range) indicating the scope and range at which
/// the binding was last used in a synthetic context. This is used for
/// (e.g.) `__future__` imports, explicit re-exports, and other bindings
/// that should be considered used even if they're never referenced.
pub synthetic_usage: Option<(usize, Range)>,
pub synthetic_usage: Option<(ScopeId, Range)>,
}
#[derive(Copy, Debug, Clone)]
@ -176,7 +234,7 @@ pub enum ExecutionContext {
}
impl<'a> Binding<'a> {
pub fn mark_used(&mut self, scope: usize, range: Range, context: ExecutionContext) {
pub fn mark_used(&mut self, scope: ScopeId, range: Range, context: ExecutionContext) {
match context {
ExecutionContext::Runtime => self.runtime_usage = Some((scope, range)),
ExecutionContext::Typing => self.typing_usage = Some((scope, range)),