mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-01 14:21:53 +00:00
Kick off Red-knot (#10849)
Co-authored-by: Carl Meyer <carl@oddbird.net> Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
parent
845ba7cf5f
commit
7cd065e4a2
24 changed files with 6282 additions and 103 deletions
135
Cargo.lock
generated
135
Cargo.lock
generated
|
@ -36,6 +36,12 @@ dependencies = [
|
|||
"memchr",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "allocator-api2"
|
||||
version = "0.2.16"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
|
||||
|
||||
[[package]]
|
||||
name = "android-tzdata"
|
||||
version = "0.1.1"
|
||||
|
@ -535,6 +541,16 @@ version = "0.2.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
|
||||
|
||||
[[package]]
|
||||
name = "ctrlc"
|
||||
version = "3.4.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345"
|
||||
dependencies = [
|
||||
"nix",
|
||||
"windows-sys 0.52.0",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "darling"
|
||||
version = "0.20.8"
|
||||
|
@ -570,6 +586,19 @@ dependencies = [
|
|||
"syn 2.0.60",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "dashmap"
|
||||
version = "5.5.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"hashbrown 0.14.3",
|
||||
"lock_api",
|
||||
"once_cell",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "diff"
|
||||
version = "0.1.13"
|
||||
|
@ -812,6 +841,10 @@ name = "hashbrown"
|
|||
version = "0.14.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
|
||||
dependencies = [
|
||||
"ahash",
|
||||
"allocator-api2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "heck"
|
||||
|
@ -1214,6 +1247,16 @@ version = "0.4.13"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
|
||||
|
||||
[[package]]
|
||||
name = "lock_api"
|
||||
version = "0.4.11"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
|
||||
dependencies = [
|
||||
"autocfg",
|
||||
"scopeguard",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "log"
|
||||
version = "0.4.21"
|
||||
|
@ -1441,6 +1484,29 @@ version = "0.1.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot"
|
||||
version = "0.12.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
|
||||
dependencies = [
|
||||
"lock_api",
|
||||
"parking_lot_core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "parking_lot_core"
|
||||
version = "0.9.9"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
"redox_syscall",
|
||||
"smallvec",
|
||||
"windows-targets 0.48.5",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "paste"
|
||||
version = "1.0.14"
|
||||
|
@ -1732,6 +1798,37 @@ dependencies = [
|
|||
"crossbeam-utils",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "red_knot"
|
||||
version = "0.1.0"
|
||||
dependencies = [
|
||||
"anyhow",
|
||||
"bitflags 2.5.0",
|
||||
"crossbeam-channel",
|
||||
"ctrlc",
|
||||
"dashmap",
|
||||
"hashbrown 0.14.3",
|
||||
"indexmap",
|
||||
"log",
|
||||
"notify",
|
||||
"parking_lot",
|
||||
"rayon",
|
||||
"ruff_index",
|
||||
"ruff_notebook",
|
||||
"ruff_python_ast",
|
||||
"ruff_python_parser",
|
||||
"ruff_python_trivia",
|
||||
"ruff_text_size",
|
||||
"rustc-hash",
|
||||
"smallvec",
|
||||
"smol_str",
|
||||
"tempfile",
|
||||
"textwrap",
|
||||
"tracing",
|
||||
"tracing-subscriber",
|
||||
"tracing-tree",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "redox_syscall"
|
||||
version = "0.4.1"
|
||||
|
@ -2475,6 +2572,12 @@ version = "1.0.1"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
|
||||
|
||||
[[package]]
|
||||
name = "scopeguard"
|
||||
version = "1.2.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
|
||||
|
||||
[[package]]
|
||||
name = "seahash"
|
||||
version = "4.1.0"
|
||||
|
@ -2628,6 +2731,21 @@ version = "1.13.2"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
|
||||
|
||||
[[package]]
|
||||
name = "smawk"
|
||||
version = "0.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
|
||||
|
||||
[[package]]
|
||||
name = "smol_str"
|
||||
version = "0.2.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49"
|
||||
dependencies = [
|
||||
"serde",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "spin"
|
||||
version = "0.9.8"
|
||||
|
@ -2779,6 +2897,17 @@ dependencies = [
|
|||
"test-case-core",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "textwrap"
|
||||
version = "0.16.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
|
||||
dependencies = [
|
||||
"smawk",
|
||||
"unicode-linebreak",
|
||||
"unicode-width",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "thiserror"
|
||||
version = "1.0.59"
|
||||
|
@ -3034,6 +3163,12 @@ version = "1.0.12"
|
|||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-linebreak"
|
||||
version = "0.1.5"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
|
||||
|
||||
[[package]]
|
||||
name = "unicode-normalization"
|
||||
version = "0.1.23"
|
||||
|
|
|
@ -31,6 +31,7 @@ console_log = { version = "1.0.0" }
|
|||
countme = { version = "3.0.1" }
|
||||
criterion = { version = "0.5.1", default-features = false }
|
||||
crossbeam-channel = { version = "0.5.12" }
|
||||
dashmap = { version = "5.5.3" }
|
||||
dirs = { version = "5.0.0" }
|
||||
drop_bomb = { version = "0.1.5" }
|
||||
env_logger = { version = "0.11.0" }
|
||||
|
@ -39,10 +40,12 @@ filetime = { version = "0.2.23" }
|
|||
fs-err = { version = "2.11.0" }
|
||||
glob = { version = "0.3.1" }
|
||||
globset = { version = "0.4.14" }
|
||||
hashbrown = "0.14.3"
|
||||
hexf-parse = { version = "0.2.1" }
|
||||
ignore = { version = "0.4.22" }
|
||||
imara-diff = { version = "0.1.5" }
|
||||
imperative = { version = "1.0.4" }
|
||||
indexmap = { version = "2.2.6" }
|
||||
indicatif = { version = "0.17.8" }
|
||||
indoc = { version = "2.0.4" }
|
||||
insta = { version = "1.35.1", feature = ["filters", "glob"] }
|
||||
|
@ -68,6 +71,7 @@ once_cell = { version = "1.19.0" }
|
|||
path-absolutize = { version = "3.1.1" }
|
||||
path-slash = { version = "0.2.1" }
|
||||
pathdiff = { version = "0.2.1" }
|
||||
parking_lot = "0.12.1"
|
||||
pep440_rs = { version = "0.6.0", features = ["serde"] }
|
||||
pretty_assertions = "1.3.0"
|
||||
proc-macro2 = { version = "1.0.79" }
|
||||
|
|
45
crates/red_knot/Cargo.toml
Normal file
45
crates/red_knot/Cargo.toml
Normal file
|
@ -0,0 +1,45 @@
|
|||
[package]
|
||||
name = "red_knot"
|
||||
version = "0.1.0"
|
||||
edition.workspace = true
|
||||
rust-version.workspace = true
|
||||
homepage.workspace = true
|
||||
documentation.workspace = true
|
||||
repository.workspace = true
|
||||
authors.workspace = true
|
||||
license.workspace = true
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
ruff_python_parser = { path = "../ruff_python_parser" }
|
||||
ruff_python_ast = { path = "../ruff_python_ast" }
|
||||
ruff_python_trivia = { path = "../ruff_python_trivia" }
|
||||
ruff_text_size = { path = "../ruff_text_size" }
|
||||
ruff_index = { path = "../ruff_index" }
|
||||
ruff_notebook = { path = "../ruff_notebook" }
|
||||
|
||||
anyhow = { workspace = true }
|
||||
bitflags = { workspace = true }
|
||||
ctrlc = "3.4.4"
|
||||
crossbeam-channel = { workspace = true }
|
||||
dashmap = { workspace = true }
|
||||
hashbrown = { workspace = true }
|
||||
indexmap = { workspace = true }
|
||||
log = { workspace = true }
|
||||
notify = { workspace = true }
|
||||
parking_lot = { workspace = true }
|
||||
rayon = { workspace = true }
|
||||
rustc-hash = { workspace = true }
|
||||
smallvec = { workspace = true }
|
||||
smol_str = "0.2.1"
|
||||
tracing = { workspace = true }
|
||||
tracing-subscriber = { workspace = true }
|
||||
tracing-tree = { workspace = true }
|
||||
|
||||
[dev-dependencies]
|
||||
textwrap = "0.16.1"
|
||||
tempfile = { workspace = true }
|
||||
|
||||
[lints]
|
||||
workspace = true
|
415
crates/red_knot/src/ast_ids.rs
Normal file
415
crates/red_knot/src/ast_ids.rs
Normal file
|
@ -0,0 +1,415 @@
|
|||
use std::any::type_name;
|
||||
use std::fmt::{Debug, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
use ruff_index::{Idx, IndexVec};
|
||||
use ruff_python_ast::visitor::preorder;
|
||||
use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal};
|
||||
use ruff_python_ast::{
|
||||
AnyNodeRef, AstNode, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule,
|
||||
NodeKind, Parameter, Stmt, StmtAnnAssign, StmtAssign, StmtAugAssign, StmtClassDef,
|
||||
StmtFunctionDef, StmtGlobal, StmtImport, StmtImportFrom, StmtNonlocal, StmtTypeAlias,
|
||||
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
|
||||
};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
/// A type agnostic ID that uniquely identifies an AST node in a file.
|
||||
#[ruff_index::newtype_index]
|
||||
pub struct AstId;
|
||||
|
||||
/// A typed ID that uniquely identifies an AST node in a file.
|
||||
///
|
||||
/// This is different from [`AstId`] in that it is a combination of ID and the type of the node the ID identifies.
|
||||
/// Typing the ID prevents mixing IDs of different node types and allows to restrict the API to only accept
|
||||
/// nodes for which an ID has been created (not all AST nodes get an ID).
|
||||
pub struct TypedAstId<N: HasAstId> {
|
||||
erased: AstId,
|
||||
_marker: PhantomData<fn() -> N>,
|
||||
}
|
||||
|
||||
impl<N: HasAstId> TypedAstId<N> {
|
||||
/// Upcasts this ID from a more specific node type to a more general node type.
|
||||
pub fn upcast<M: HasAstId>(self) -> TypedAstId<M>
|
||||
where
|
||||
N: Into<M>,
|
||||
{
|
||||
TypedAstId {
|
||||
erased: self.erased,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Copy for TypedAstId<N> {}
|
||||
impl<N: HasAstId> Clone for TypedAstId<N> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> PartialEq for TypedAstId<N> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.erased == other.erased
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Eq for TypedAstId<N> {}
|
||||
impl<N: HasAstId> Hash for TypedAstId<N> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.erased.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Debug for TypedAstId<N> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_tuple("TypedAstId")
|
||||
.field(&self.erased)
|
||||
.field(&type_name::<N>())
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub struct AstIds {
|
||||
ids: IndexVec<AstId, NodeKey>,
|
||||
reverse: FxHashMap<NodeKey, AstId>,
|
||||
}
|
||||
|
||||
impl AstIds {
|
||||
// TODO rust analyzer doesn't allocate an ID for every node. It only allocates ids for
|
||||
// nodes with a corresponding HIR element, that is nodes that are definitions.
|
||||
pub fn from_module(module: &ModModule) -> Self {
|
||||
let mut visitor = AstIdsVisitor::default();
|
||||
|
||||
// TODO: visit_module?
|
||||
// Make sure we visit the root
|
||||
visitor.create_id(module);
|
||||
visitor.visit_body(&module.body);
|
||||
|
||||
while let Some(deferred) = visitor.deferred.pop() {
|
||||
match deferred {
|
||||
DeferredNode::FunctionDefinition(def) => {
|
||||
def.visit_preorder(&mut visitor);
|
||||
}
|
||||
DeferredNode::ClassDefinition(def) => def.visit_preorder(&mut visitor),
|
||||
}
|
||||
}
|
||||
|
||||
AstIds {
|
||||
ids: visitor.ids,
|
||||
reverse: visitor.reverse,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the ID to the root node.
|
||||
pub fn root(&self) -> NodeKey {
|
||||
self.ids[AstId::new(0)]
|
||||
}
|
||||
|
||||
/// Returns the [`TypedAstId`] for a node.
|
||||
pub fn ast_id<N: HasAstId>(&self, node: &N) -> TypedAstId<N> {
|
||||
let key = node.syntax_node_key();
|
||||
TypedAstId {
|
||||
erased: self.reverse.get(&key).copied().unwrap(),
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the [`TypedAstId`] for the node identified with the given [`TypedNodeKey`].
|
||||
pub fn ast_id_for_key<N: HasAstId>(&self, node: &TypedNodeKey<N>) -> TypedAstId<N> {
|
||||
let ast_id = self.ast_id_for_node_key(node.inner);
|
||||
|
||||
TypedAstId {
|
||||
erased: ast_id,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the untyped [`AstId`] for the node identified by the given `node` key.
|
||||
pub fn ast_id_for_node_key(&self, node: NodeKey) -> AstId {
|
||||
self.reverse
|
||||
.get(&node)
|
||||
.copied()
|
||||
.expect("Can't find node in AstIds map.")
|
||||
}
|
||||
|
||||
/// Returns the [`TypedNodeKey`] for the node identified by the given [`TypedAstId`].
|
||||
pub fn key<N: HasAstId>(&self, id: TypedAstId<N>) -> TypedNodeKey<N> {
|
||||
let syntax_key = self.ids[id.erased];
|
||||
|
||||
TypedNodeKey::new(syntax_key).unwrap()
|
||||
}
|
||||
|
||||
pub fn node_key<H: HasAstId>(&self, id: TypedAstId<H>) -> NodeKey {
|
||||
self.ids[id.erased]
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for AstIds {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let mut map = f.debug_map();
|
||||
for (key, value) in self.ids.iter_enumerated() {
|
||||
map.entry(&key, &value);
|
||||
}
|
||||
|
||||
map.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for AstIds {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.ids == other.ids
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for AstIds {}
|
||||
|
||||
#[derive(Default)]
|
||||
struct AstIdsVisitor<'a> {
|
||||
ids: IndexVec<AstId, NodeKey>,
|
||||
reverse: FxHashMap<NodeKey, AstId>,
|
||||
deferred: Vec<DeferredNode<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> AstIdsVisitor<'a> {
|
||||
fn create_id<A: HasAstId>(&mut self, node: &A) {
|
||||
let node_key = node.syntax_node_key();
|
||||
|
||||
let id = self.ids.push(node_key);
|
||||
self.reverse.insert(node_key, id);
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> PreorderVisitor<'a> for AstIdsVisitor<'a> {
|
||||
fn visit_stmt(&mut self, stmt: &'a Stmt) {
|
||||
match stmt {
|
||||
Stmt::FunctionDef(def) => {
|
||||
self.create_id(def);
|
||||
self.deferred.push(DeferredNode::FunctionDefinition(def));
|
||||
return;
|
||||
}
|
||||
// TODO defer visiting the assignment body, type alias parameters etc?
|
||||
Stmt::ClassDef(def) => {
|
||||
self.create_id(def);
|
||||
self.deferred.push(DeferredNode::ClassDefinition(def));
|
||||
return;
|
||||
}
|
||||
Stmt::Expr(_) => {
|
||||
// Skip
|
||||
return;
|
||||
}
|
||||
Stmt::Return(_) => {}
|
||||
Stmt::Delete(_) => {}
|
||||
Stmt::Assign(assignment) => self.create_id(assignment),
|
||||
Stmt::AugAssign(assignment) => {
|
||||
self.create_id(assignment);
|
||||
}
|
||||
Stmt::AnnAssign(assignment) => self.create_id(assignment),
|
||||
Stmt::TypeAlias(assignment) => self.create_id(assignment),
|
||||
Stmt::For(_) => {}
|
||||
Stmt::While(_) => {}
|
||||
Stmt::If(_) => {}
|
||||
Stmt::With(_) => {}
|
||||
Stmt::Match(_) => {}
|
||||
Stmt::Raise(_) => {}
|
||||
Stmt::Try(_) => {}
|
||||
Stmt::Assert(_) => {}
|
||||
Stmt::Import(import) => self.create_id(import),
|
||||
Stmt::ImportFrom(import_from) => self.create_id(import_from),
|
||||
Stmt::Global(global) => self.create_id(global),
|
||||
Stmt::Nonlocal(non_local) => self.create_id(non_local),
|
||||
Stmt::Pass(_) => {}
|
||||
Stmt::Break(_) => {}
|
||||
Stmt::Continue(_) => {}
|
||||
Stmt::IpyEscapeCommand(_) => {}
|
||||
}
|
||||
|
||||
preorder::walk_stmt(self, stmt);
|
||||
}
|
||||
|
||||
fn visit_expr(&mut self, _expr: &'a Expr) {}
|
||||
|
||||
fn visit_parameter(&mut self, parameter: &'a Parameter) {
|
||||
self.create_id(parameter);
|
||||
preorder::walk_parameter(self, parameter);
|
||||
}
|
||||
|
||||
fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
|
||||
match except_handler {
|
||||
ExceptHandler::ExceptHandler(except_handler) => {
|
||||
self.create_id(except_handler);
|
||||
}
|
||||
}
|
||||
|
||||
preorder::walk_except_handler(self, except_handler);
|
||||
}
|
||||
|
||||
fn visit_with_item(&mut self, with_item: &'a WithItem) {
|
||||
self.create_id(with_item);
|
||||
preorder::walk_with_item(self, with_item);
|
||||
}
|
||||
|
||||
fn visit_match_case(&mut self, match_case: &'a MatchCase) {
|
||||
self.create_id(match_case);
|
||||
preorder::walk_match_case(self, match_case);
|
||||
}
|
||||
|
||||
fn visit_type_param(&mut self, type_param: &'a TypeParam) {
|
||||
self.create_id(type_param);
|
||||
}
|
||||
}
|
||||
|
||||
enum DeferredNode<'a> {
|
||||
FunctionDefinition(&'a StmtFunctionDef),
|
||||
ClassDefinition(&'a StmtClassDef),
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct TypedNodeKey<N: AstNode> {
|
||||
/// The type erased node key.
|
||||
inner: NodeKey,
|
||||
_marker: PhantomData<fn() -> N>,
|
||||
}
|
||||
|
||||
impl<N: AstNode> TypedNodeKey<N> {
|
||||
pub fn from_node(node: &N) -> Self {
|
||||
let inner = NodeKey {
|
||||
kind: node.as_any_node_ref().kind(),
|
||||
range: node.range(),
|
||||
};
|
||||
Self {
|
||||
inner,
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn new(node_key: NodeKey) -> Option<Self> {
|
||||
N::can_cast(node_key.kind).then_some(TypedNodeKey {
|
||||
inner: node_key,
|
||||
_marker: PhantomData,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<N::Ref<'a>> {
|
||||
let node_ref = self.inner.resolve(root)?;
|
||||
|
||||
Some(N::cast_ref(node_ref).unwrap())
|
||||
}
|
||||
|
||||
pub fn resolve_unwrap<'a>(&self, root: AnyNodeRef<'a>) -> N::Ref<'a> {
|
||||
self.resolve(root).expect("node should resolve")
|
||||
}
|
||||
|
||||
pub fn erased(&self) -> &NodeKey {
|
||||
&self.inner
|
||||
}
|
||||
}
|
||||
|
||||
struct FindNodeKeyVisitor<'a> {
|
||||
key: NodeKey,
|
||||
result: Option<AnyNodeRef<'a>>,
|
||||
}
|
||||
|
||||
impl<'a> PreorderVisitor<'a> for FindNodeKeyVisitor<'a> {
|
||||
fn enter_node(&mut self, node: AnyNodeRef<'a>) -> TraversalSignal {
|
||||
if self.result.is_some() {
|
||||
return TraversalSignal::Skip;
|
||||
}
|
||||
|
||||
if node.range() == self.key.range && node.kind() == self.key.kind {
|
||||
self.result = Some(node);
|
||||
TraversalSignal::Skip
|
||||
} else if node.range().contains_range(self.key.range) {
|
||||
TraversalSignal::Traverse
|
||||
} else {
|
||||
TraversalSignal::Skip
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_body(&mut self, body: &'a [Stmt]) {
|
||||
// TODO it would be more efficient to use binary search instead of linear
|
||||
for stmt in body {
|
||||
if stmt.range().start() > self.key.range.end() {
|
||||
break;
|
||||
}
|
||||
|
||||
self.visit_stmt(stmt);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// TODO an alternative to this is to have a `NodeId` on each node (in increasing order depending on the position).
|
||||
// This would allow to reduce the size of this to a u32.
|
||||
// What would be nice if we could use an `Arc::weak_ref` here but that only works if we use
|
||||
// `Arc` internally
|
||||
// TODO: Implement the logic to resolve a node, given a db (and the correct file).
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct NodeKey {
|
||||
kind: NodeKind,
|
||||
range: TextRange,
|
||||
}
|
||||
|
||||
impl NodeKey {
|
||||
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<AnyNodeRef<'a>> {
|
||||
// We need to do a binary search here. Only traverse into a node if the range is withint the node
|
||||
let mut visitor = FindNodeKeyVisitor {
|
||||
key: *self,
|
||||
result: None,
|
||||
};
|
||||
|
||||
if visitor.enter_node(root) == TraversalSignal::Traverse {
|
||||
root.visit_preorder(&mut visitor);
|
||||
}
|
||||
|
||||
visitor.result
|
||||
}
|
||||
}
|
||||
|
||||
/// Marker trait implemented by AST nodes for which we extract the `AstId`.
|
||||
pub trait HasAstId: AstNode {
|
||||
fn node_key(&self) -> TypedNodeKey<Self>
|
||||
where
|
||||
Self: Sized,
|
||||
{
|
||||
TypedNodeKey {
|
||||
inner: self.syntax_node_key(),
|
||||
_marker: PhantomData,
|
||||
}
|
||||
}
|
||||
|
||||
fn syntax_node_key(&self) -> NodeKey {
|
||||
NodeKey {
|
||||
kind: self.as_any_node_ref().kind(),
|
||||
range: self.range(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl HasAstId for StmtFunctionDef {}
|
||||
impl HasAstId for StmtClassDef {}
|
||||
impl HasAstId for StmtAnnAssign {}
|
||||
impl HasAstId for StmtAugAssign {}
|
||||
impl HasAstId for StmtAssign {}
|
||||
impl HasAstId for StmtTypeAlias {}
|
||||
|
||||
impl HasAstId for ModModule {}
|
||||
|
||||
impl HasAstId for StmtImport {}
|
||||
|
||||
impl HasAstId for StmtImportFrom {}
|
||||
|
||||
impl HasAstId for Parameter {}
|
||||
|
||||
impl HasAstId for TypeParam {}
|
||||
impl HasAstId for Stmt {}
|
||||
impl HasAstId for TypeParamTypeVar {}
|
||||
impl HasAstId for TypeParamTypeVarTuple {}
|
||||
impl HasAstId for TypeParamParamSpec {}
|
||||
impl HasAstId for StmtGlobal {}
|
||||
impl HasAstId for StmtNonlocal {}
|
||||
|
||||
impl HasAstId for ExceptHandlerExceptHandler {}
|
||||
impl HasAstId for WithItem {}
|
||||
impl HasAstId for MatchCase {}
|
152
crates/red_knot/src/cache.rs
Normal file
152
crates/red_knot/src/cache.rs
Normal file
|
@ -0,0 +1,152 @@
|
|||
use std::fmt::Formatter;
|
||||
use std::hash::Hash;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
|
||||
use dashmap::mapref::entry::Entry;
|
||||
|
||||
use crate::FxDashMap;
|
||||
|
||||
/// Simple key value cache that locks on a per-key level.
|
||||
pub struct KeyValueCache<K, V> {
|
||||
map: FxDashMap<K, V>,
|
||||
statistics: CacheStatistics,
|
||||
}
|
||||
|
||||
impl<K, V> KeyValueCache<K, V>
|
||||
where
|
||||
K: Eq + Hash + Clone,
|
||||
V: Clone,
|
||||
{
|
||||
pub fn try_get(&self, key: &K) -> Option<V> {
|
||||
if let Some(existing) = self.map.get(key) {
|
||||
self.statistics.hit();
|
||||
Some(existing.clone())
|
||||
} else {
|
||||
self.statistics.miss();
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
pub fn get<F>(&self, key: &K, compute: F) -> V
|
||||
where
|
||||
F: FnOnce(&K) -> V,
|
||||
{
|
||||
match self.map.entry(key.clone()) {
|
||||
Entry::Occupied(cached) => {
|
||||
self.statistics.hit();
|
||||
|
||||
cached.get().clone()
|
||||
}
|
||||
Entry::Vacant(vacant) => {
|
||||
self.statistics.miss();
|
||||
|
||||
let value = compute(key);
|
||||
vacant.insert(value.clone());
|
||||
value
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set(&mut self, key: K, value: V) {
|
||||
self.map.insert(key, value);
|
||||
}
|
||||
|
||||
pub fn remove(&mut self, key: &K) -> Option<V> {
|
||||
self.map.remove(key).map(|(_, value)| value)
|
||||
}
|
||||
|
||||
pub fn clear(&mut self) {
|
||||
self.map.clear();
|
||||
self.map.shrink_to_fit();
|
||||
}
|
||||
|
||||
pub fn statistics(&self) -> Option<Statistics> {
|
||||
self.statistics.to_statistics()
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> Default for KeyValueCache<K, V>
|
||||
where
|
||||
K: Eq + Hash,
|
||||
V: Clone,
|
||||
{
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
map: FxDashMap::default(),
|
||||
statistics: CacheStatistics::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<K, V> std::fmt::Debug for KeyValueCache<K, V>
|
||||
where
|
||||
K: std::fmt::Debug + Eq + Hash,
|
||||
V: std::fmt::Debug,
|
||||
{
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let mut debug = f.debug_map();
|
||||
|
||||
for entry in &self.map {
|
||||
debug.entry(&entry.value(), &entry.key());
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq)]
|
||||
pub struct Statistics {
|
||||
pub hits: usize,
|
||||
pub misses: usize,
|
||||
}
|
||||
|
||||
impl Statistics {
|
||||
#[allow(clippy::cast_precision_loss)]
|
||||
pub fn hit_rate(&self) -> Option<f64> {
|
||||
if self.hits + self.misses == 0 {
|
||||
return None;
|
||||
}
|
||||
|
||||
Some((self.hits as f64) / (self.hits + self.misses) as f64)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(debug_assertions)]
|
||||
pub type CacheStatistics = DebugStatistics;
|
||||
|
||||
#[cfg(not(debug_assertions))]
|
||||
pub type CacheStatistics = ReleaseStatistics;
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct DebugStatistics {
|
||||
hits: AtomicUsize,
|
||||
misses: AtomicUsize,
|
||||
}
|
||||
|
||||
impl DebugStatistics {
|
||||
// TODO figure out appropriate Ordering
|
||||
pub fn hit(&self) {
|
||||
self.hits.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
pub fn miss(&self) {
|
||||
self.misses.fetch_add(1, Ordering::SeqCst);
|
||||
}
|
||||
|
||||
pub fn to_statistics(&self) -> Option<Statistics> {
|
||||
let hits = self.hits.load(Ordering::SeqCst);
|
||||
let misses = self.misses.load(Ordering::SeqCst);
|
||||
|
||||
Some(Statistics { hits, misses })
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ReleaseStatistics;
|
||||
|
||||
impl ReleaseStatistics {
|
||||
#[inline]
|
||||
pub fn to_statistics(&self) -> Option<Statistics> {
|
||||
None
|
||||
}
|
||||
}
|
65
crates/red_knot/src/cancellation.rs
Normal file
65
crates/red_knot/src/cancellation.rs
Normal file
|
@ -0,0 +1,65 @@
|
|||
use std::sync::{Arc, Condvar, Mutex};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct CancellationSource {
|
||||
signal: Arc<(Mutex<bool>, Condvar)>,
|
||||
}
|
||||
|
||||
impl CancellationSource {
|
||||
pub fn new() -> Self {
|
||||
Self {
|
||||
signal: Arc::new((Mutex::new(false), Condvar::default())),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn cancel(&self) {
|
||||
let (cancelled, condvar) = &*self.signal;
|
||||
|
||||
let mut cancelled = cancelled.lock().unwrap();
|
||||
|
||||
if *cancelled {
|
||||
return;
|
||||
}
|
||||
|
||||
*cancelled = true;
|
||||
condvar.notify_all();
|
||||
}
|
||||
|
||||
pub fn is_cancelled(&self) -> bool {
|
||||
let (cancelled, _) = &*self.signal;
|
||||
|
||||
*cancelled.lock().unwrap()
|
||||
}
|
||||
|
||||
pub fn token(&self) -> CancellationToken {
|
||||
CancellationToken {
|
||||
signal: self.signal.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CancellationToken {
|
||||
signal: Arc<(Mutex<bool>, Condvar)>,
|
||||
}
|
||||
|
||||
impl CancellationToken {
|
||||
/// Returns `true` if cancellation has been requested.
|
||||
pub fn is_cancelled(&self) -> bool {
|
||||
let (cancelled, _) = &*self.signal;
|
||||
|
||||
*cancelled.lock().unwrap()
|
||||
}
|
||||
|
||||
pub fn wait(&self) {
|
||||
let (bool, condvar) = &*self.signal;
|
||||
|
||||
let lock = condvar
|
||||
.wait_while(bool.lock().unwrap(), |bool| !*bool)
|
||||
.unwrap();
|
||||
|
||||
debug_assert!(*lock);
|
||||
|
||||
drop(lock);
|
||||
}
|
||||
}
|
171
crates/red_knot/src/db.rs
Normal file
171
crates/red_knot/src/db.rs
Normal file
|
@ -0,0 +1,171 @@
|
|||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::files::FileId;
|
||||
use crate::lint::{Diagnostics, LintSyntaxStorage};
|
||||
use crate::module::{Module, ModuleData, ModuleName, ModuleResolver, ModuleSearchPath};
|
||||
use crate::parse::{Parsed, ParsedStorage};
|
||||
use crate::source::{Source, SourceStorage};
|
||||
use crate::symbols::{SymbolId, SymbolTable, SymbolTablesStorage};
|
||||
use crate::types::{Type, TypeStore};
|
||||
|
||||
pub trait SourceDb {
|
||||
// queries
|
||||
fn file_id(&self, path: &std::path::Path) -> FileId;
|
||||
|
||||
fn file_path(&self, file_id: FileId) -> Arc<std::path::Path>;
|
||||
|
||||
fn source(&self, file_id: FileId) -> Source;
|
||||
|
||||
fn parse(&self, file_id: FileId) -> Parsed;
|
||||
|
||||
fn lint_syntax(&self, file_id: FileId) -> Diagnostics;
|
||||
}
|
||||
|
||||
pub trait SemanticDb: SourceDb {
|
||||
// queries
|
||||
fn resolve_module(&self, name: ModuleName) -> Option<Module>;
|
||||
|
||||
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable>;
|
||||
|
||||
// mutations
|
||||
fn path_to_module(&mut self, path: &Path) -> Option<Module>;
|
||||
|
||||
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)>;
|
||||
|
||||
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>);
|
||||
|
||||
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type;
|
||||
}
|
||||
|
||||
pub trait Db: SemanticDb {}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SourceJar {
|
||||
pub sources: SourceStorage,
|
||||
pub parsed: ParsedStorage,
|
||||
pub lint_syntax: LintSyntaxStorage,
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SemanticJar {
|
||||
pub module_resolver: ModuleResolver,
|
||||
pub symbol_tables: SymbolTablesStorage,
|
||||
pub type_store: TypeStore,
|
||||
}
|
||||
|
||||
/// Gives access to a specific jar in the database.
|
||||
///
|
||||
/// Nope, the terminology isn't borrowed from Java but from Salsa <https://salsa-rs.github.io/salsa/>,
|
||||
/// which is an analogy to storing the salsa in different jars.
|
||||
///
|
||||
/// The basic idea is that each crate can define its own jar and the jars can be combined to a single
|
||||
/// database in the top level crate. Each crate also defines its own `Database` trait. The combination of
|
||||
/// `Database` trait and the jar allows to write queries in isolation without having to know how they get composed at the upper levels.
|
||||
///
|
||||
/// Salsa further defines a `HasIngredient` trait which slices the jar to a specific storage (e.g. a specific cache).
|
||||
/// We don't need this just jet because we write our queries by hand. We may want a similar trait if we decide
|
||||
/// to use a macro to generate the queries.
|
||||
pub trait HasJar<T> {
|
||||
/// Gives a read-only reference to the jar.
|
||||
fn jar(&self) -> &T;
|
||||
|
||||
/// Gives a mutable reference to the jar.
|
||||
fn jar_mut(&mut self) -> &mut T;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub(crate) mod tests {
|
||||
use crate::db::{HasJar, SourceDb, SourceJar};
|
||||
use crate::files::{FileId, Files};
|
||||
use crate::lint::{lint_syntax, Diagnostics};
|
||||
use crate::module::{
|
||||
add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData,
|
||||
ModuleName, ModuleSearchPath,
|
||||
};
|
||||
use crate::parse::{parse, Parsed};
|
||||
use crate::source::{source_text, Source};
|
||||
use crate::symbols::{symbol_table, SymbolId, SymbolTable};
|
||||
use crate::types::{infer_symbol_type, Type};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::{SemanticDb, SemanticJar};
|
||||
|
||||
// This can be a partial database used in a single crate for testing.
|
||||
// It would hold fewer data than the full database.
|
||||
#[derive(Debug, Default)]
|
||||
pub(crate) struct TestDb {
|
||||
files: Files,
|
||||
source: SourceJar,
|
||||
semantic: SemanticJar,
|
||||
}
|
||||
|
||||
impl HasJar<SourceJar> for TestDb {
|
||||
fn jar(&self) -> &SourceJar {
|
||||
&self.source
|
||||
}
|
||||
|
||||
fn jar_mut(&mut self) -> &mut SourceJar {
|
||||
&mut self.source
|
||||
}
|
||||
}
|
||||
|
||||
impl HasJar<SemanticJar> for TestDb {
|
||||
fn jar(&self) -> &SemanticJar {
|
||||
&self.semantic
|
||||
}
|
||||
|
||||
fn jar_mut(&mut self) -> &mut SemanticJar {
|
||||
&mut self.semantic
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceDb for TestDb {
|
||||
fn file_id(&self, path: &Path) -> FileId {
|
||||
self.files.intern(path)
|
||||
}
|
||||
|
||||
fn file_path(&self, file_id: FileId) -> Arc<Path> {
|
||||
self.files.path(file_id)
|
||||
}
|
||||
|
||||
fn source(&self, file_id: FileId) -> Source {
|
||||
source_text(self, file_id)
|
||||
}
|
||||
|
||||
fn parse(&self, file_id: FileId) -> Parsed {
|
||||
parse(self, file_id)
|
||||
}
|
||||
|
||||
fn lint_syntax(&self, file_id: FileId) -> Diagnostics {
|
||||
lint_syntax(self, file_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl SemanticDb for TestDb {
|
||||
fn resolve_module(&self, name: ModuleName) -> Option<Module> {
|
||||
resolve_module(self, name)
|
||||
}
|
||||
|
||||
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable> {
|
||||
symbol_table(self, file_id)
|
||||
}
|
||||
|
||||
fn path_to_module(&mut self, path: &Path) -> Option<Module> {
|
||||
path_to_module(self, path)
|
||||
}
|
||||
|
||||
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
|
||||
add_module(self, path)
|
||||
}
|
||||
|
||||
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>) {
|
||||
set_module_search_paths(self, paths);
|
||||
}
|
||||
|
||||
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type {
|
||||
infer_symbol_type(self, file_id, symbol_id)
|
||||
}
|
||||
}
|
||||
}
|
148
crates/red_knot/src/files.rs
Normal file
148
crates/red_knot/src/files.rs
Normal file
|
@ -0,0 +1,148 @@
|
|||
use std::fmt::{Debug, Formatter};
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use hashbrown::hash_map::RawEntryMut;
|
||||
use parking_lot::RwLock;
|
||||
use rustc_hash::FxHasher;
|
||||
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
|
||||
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
|
||||
|
||||
#[newtype_index]
|
||||
pub struct FileId;
|
||||
|
||||
// TODO we'll need a higher level virtual file system abstraction that allows testing if a file exists
|
||||
// or retrieving its content (ideally lazily and in a way that the memory can be retained later)
|
||||
// I suspect that we'll end up with a FileSystem trait and our own Path abstraction.
|
||||
#[derive(Clone, Default)]
|
||||
pub struct Files {
|
||||
inner: Arc<RwLock<FilesInner>>,
|
||||
}
|
||||
|
||||
impl Files {
|
||||
#[tracing::instrument(level = "trace", skip(path))]
|
||||
pub fn intern(&self, path: &Path) -> FileId {
|
||||
self.inner.write().intern(path)
|
||||
}
|
||||
|
||||
pub fn try_get(&self, path: &Path) -> Option<FileId> {
|
||||
self.inner.read().try_get(path)
|
||||
}
|
||||
|
||||
// TODO Can we avoid using an `Arc` here? salsa can return references for some reason.
|
||||
pub fn path(&self, id: FileId) -> Arc<Path> {
|
||||
self.inner.read().path(id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Debug for Files {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let files = self.inner.read();
|
||||
let mut debug = f.debug_map();
|
||||
for item in files.iter() {
|
||||
debug.entry(&item.0, &item.1);
|
||||
}
|
||||
|
||||
debug.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for Files {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.inner.read().eq(&other.inner.read())
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for Files {}
|
||||
|
||||
#[derive(Default)]
|
||||
struct FilesInner {
|
||||
by_path: Map<FileId, ()>,
|
||||
// TODO should we use a map here to reclaim the space for removed files?
|
||||
// TODO I think we should use our own path abstraction here to avoid having to normalize paths
|
||||
// and dealing with non-utf paths everywhere.
|
||||
by_id: IndexVec<FileId, Arc<Path>>,
|
||||
}
|
||||
|
||||
impl FilesInner {
|
||||
/// Inserts the path and returns a new id for it or returns the id if it is an existing path.
|
||||
// TODO should this accept Path or PathBuf?
|
||||
pub(crate) fn intern(&mut self, path: &Path) -> FileId {
|
||||
let mut hasher = FxHasher::default();
|
||||
path.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
let entry = self
|
||||
.by_path
|
||||
.raw_entry_mut()
|
||||
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path);
|
||||
|
||||
match entry {
|
||||
RawEntryMut::Occupied(entry) => *entry.key(),
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
let id = self.by_id.push(Arc::from(path));
|
||||
entry.insert_with_hasher(hash, id, (), |_| hash);
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn try_get(&self, path: &Path) -> Option<FileId> {
|
||||
let mut hasher = FxHasher::default();
|
||||
path.hash(&mut hasher);
|
||||
let hash = hasher.finish();
|
||||
|
||||
Some(
|
||||
*self
|
||||
.by_path
|
||||
.raw_entry()
|
||||
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path)?
|
||||
.0,
|
||||
)
|
||||
}
|
||||
|
||||
/// Returns the path for the file with the given id.
|
||||
pub(crate) fn path(&self, id: FileId) -> Arc<Path> {
|
||||
self.by_id[id].clone()
|
||||
}
|
||||
|
||||
pub(crate) fn iter(&self) -> impl Iterator<Item = (FileId, Arc<Path>)> + '_ {
|
||||
self.by_path.keys().map(|id| (*id, self.by_id[*id].clone()))
|
||||
}
|
||||
}
|
||||
|
||||
impl PartialEq for FilesInner {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.by_id == other.by_id
|
||||
}
|
||||
}
|
||||
|
||||
impl Eq for FilesInner {}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use std::path::PathBuf;
|
||||
|
||||
#[test]
|
||||
fn insert_path_twice_same_id() {
|
||||
let files = Files::default();
|
||||
let path = PathBuf::from("foo/bar");
|
||||
let id1 = files.intern(&path);
|
||||
let id2 = files.intern(&path);
|
||||
assert_eq!(id1, id2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_different_paths_different_ids() {
|
||||
let files = Files::default();
|
||||
let path1 = PathBuf::from("foo/bar");
|
||||
let path2 = PathBuf::from("foo/bar/baz");
|
||||
let id1 = files.intern(&path1);
|
||||
let id2 = files.intern(&path2);
|
||||
assert_ne!(id1, id2);
|
||||
}
|
||||
}
|
67
crates/red_knot/src/hir.rs
Normal file
67
crates/red_knot/src/hir.rs
Normal file
|
@ -0,0 +1,67 @@
|
|||
//! Key observations
|
||||
//!
|
||||
//! The HIR avoids allocations to large extends by:
|
||||
//! * Using an arena per node type
|
||||
//! * using ids and id ranges to reference items.
|
||||
//!
|
||||
//! Using separate arena per node type has the advantage that the IDs are relatively stable, because
|
||||
//! they only change when a node of the same kind has been added or removed. (What's unclear is if that matters or if
|
||||
//! it still triggers a re-compute because the AST-id in the node has changed).
|
||||
//!
|
||||
//! The HIR does not store all details. It mainly stores the *public* interface. There's a reference
|
||||
//! back to the AST node to get more details.
|
||||
//!
|
||||
//!
|
||||
|
||||
use crate::ast_ids::{HasAstId, TypedAstId};
|
||||
use crate::files::FileId;
|
||||
use std::fmt::Formatter;
|
||||
use std::hash::{Hash, Hasher};
|
||||
|
||||
pub struct HirAstId<N: HasAstId> {
|
||||
file_id: FileId,
|
||||
node_id: TypedAstId<N>,
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Copy for HirAstId<N> {}
|
||||
impl<N: HasAstId> Clone for HirAstId<N> {
|
||||
fn clone(&self) -> Self {
|
||||
*self
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> PartialEq for HirAstId<N> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.file_id == other.file_id && self.node_id == other.node_id
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Eq for HirAstId<N> {}
|
||||
|
||||
impl<N: HasAstId> std::fmt::Debug for HirAstId<N> {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("HirAstId")
|
||||
.field("file_id", &self.file_id)
|
||||
.field("node_id", &self.node_id)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> Hash for HirAstId<N> {
|
||||
fn hash<H: Hasher>(&self, state: &mut H) {
|
||||
self.file_id.hash(state);
|
||||
self.node_id.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<N: HasAstId> HirAstId<N> {
|
||||
pub fn upcast<M: HasAstId>(self) -> HirAstId<M>
|
||||
where
|
||||
N: Into<M>,
|
||||
{
|
||||
HirAstId {
|
||||
file_id: self.file_id,
|
||||
node_id: self.node_id.upcast(),
|
||||
}
|
||||
}
|
||||
}
|
556
crates/red_knot/src/hir/definition.rs
Normal file
556
crates/red_knot/src/hir/definition.rs
Normal file
|
@ -0,0 +1,556 @@
|
|||
use std::ops::{Index, Range};
|
||||
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
use ruff_python_ast::visitor::preorder;
|
||||
use ruff_python_ast::visitor::preorder::PreorderVisitor;
|
||||
use ruff_python_ast::{
|
||||
Decorator, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule, Stmt,
|
||||
StmtAnnAssign, StmtAssign, StmtClassDef, StmtFunctionDef, StmtGlobal, StmtImport,
|
||||
StmtImportFrom, StmtNonlocal, StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
|
||||
TypeParamTypeVarTuple, WithItem,
|
||||
};
|
||||
|
||||
use crate::ast_ids::{AstIds, HasAstId};
|
||||
use crate::files::FileId;
|
||||
use crate::hir::HirAstId;
|
||||
use crate::Name;
|
||||
|
||||
#[newtype_index]
|
||||
pub struct FunctionId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Function {
|
||||
ast_id: HirAstId<StmtFunctionDef>,
|
||||
name: Name,
|
||||
parameters: Range<ParameterId>,
|
||||
type_parameters: Range<TypeParameterId>, // TODO: type_parameters, return expression, decorators
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct ParameterId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Parameter {
|
||||
kind: ParameterKind,
|
||||
name: Name,
|
||||
default: Option<()>, // TODO use expression HIR
|
||||
ast_id: HirAstId<ruff_python_ast::Parameter>,
|
||||
}
|
||||
|
||||
// TODO or should `Parameter` be an enum?
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum ParameterKind {
|
||||
PositionalOnly,
|
||||
Arguments,
|
||||
Vararg,
|
||||
KeywordOnly,
|
||||
Kwarg,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct ClassId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Class {
|
||||
name: Name,
|
||||
ast_id: HirAstId<StmtClassDef>,
|
||||
// TODO type parameters, inheritance, decorators, members
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct AssignmentId;
|
||||
|
||||
// This can have more than one name...
|
||||
// but that means we can't implement `name()` on `ModuleItem`.
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Assignment {
|
||||
// TODO: Handle multiple names / targets
|
||||
name: Name,
|
||||
ast_id: HirAstId<StmtAssign>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct AnnotatedAssignment {
|
||||
name: Name,
|
||||
ast_id: HirAstId<StmtAnnAssign>,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct AnnotatedAssignmentId;
|
||||
|
||||
#[newtype_index]
|
||||
pub struct TypeAliasId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct TypeAlias {
|
||||
name: Name,
|
||||
ast_id: HirAstId<StmtTypeAlias>,
|
||||
parameters: Range<TypeParameterId>,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct TypeParameterId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub enum TypeParameter {
|
||||
TypeVar(TypeParameterTypeVar),
|
||||
ParamSpec(TypeParameterParamSpec),
|
||||
TypeVarTuple(TypeParameterTypeVarTuple),
|
||||
}
|
||||
|
||||
impl TypeParameter {
|
||||
pub fn ast_id(&self) -> HirAstId<TypeParam> {
|
||||
match self {
|
||||
TypeParameter::TypeVar(type_var) => type_var.ast_id.upcast(),
|
||||
TypeParameter::ParamSpec(param_spec) => param_spec.ast_id.upcast(),
|
||||
TypeParameter::TypeVarTuple(type_var_tuple) => type_var_tuple.ast_id.upcast(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct TypeParameterTypeVar {
|
||||
name: Name,
|
||||
ast_id: HirAstId<TypeParamTypeVar>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct TypeParameterParamSpec {
|
||||
name: Name,
|
||||
ast_id: HirAstId<TypeParamParamSpec>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct TypeParameterTypeVarTuple {
|
||||
name: Name,
|
||||
ast_id: HirAstId<TypeParamTypeVarTuple>,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct GlobalId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct Global {
|
||||
// TODO track names
|
||||
ast_id: HirAstId<StmtGlobal>,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct NonLocalId;
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq)]
|
||||
pub struct NonLocal {
|
||||
// TODO track names
|
||||
ast_id: HirAstId<StmtNonlocal>,
|
||||
}
|
||||
|
||||
pub enum DefinitionId {
|
||||
Function(FunctionId),
|
||||
Parameter(ParameterId),
|
||||
Class(ClassId),
|
||||
Assignment(AssignmentId),
|
||||
AnnotatedAssignment(AnnotatedAssignmentId),
|
||||
Global(GlobalId),
|
||||
NonLocal(NonLocalId),
|
||||
TypeParameter(TypeParameterId),
|
||||
TypeAlias(TypeAlias),
|
||||
}
|
||||
|
||||
pub enum DefinitionItem {
|
||||
Function(Function),
|
||||
Parameter(Parameter),
|
||||
Class(Class),
|
||||
Assignment(Assignment),
|
||||
AnnotatedAssignment(AnnotatedAssignment),
|
||||
Global(Global),
|
||||
NonLocal(NonLocal),
|
||||
TypeParameter(TypeParameter),
|
||||
TypeAlias(TypeAlias),
|
||||
}
|
||||
|
||||
// The closest is rust-analyzers item-tree. It only represents "Items" which make the public interface of a module
|
||||
// (it excludes any other statement or expressions). rust-analyzer uses it as the main input to the name resolution
|
||||
// algorithm
|
||||
// > It is the input to the name resolution algorithm, as well as to the queries defined in `adt.rs`,
|
||||
// > `data.rs`, and most things in `attr.rs`.
|
||||
//
|
||||
// > One important purpose of this layer is to provide an "invalidation barrier" for incremental
|
||||
// > computations: when typing inside an item body, the `ItemTree` of the modified file is typically
|
||||
// > unaffected, so we don't have to recompute name resolution results or item data (see `data.rs`).
|
||||
//
|
||||
// I haven't fully figured this out but I think that this composes the "public" interface of a module?
|
||||
// But maybe that's too optimistic.
|
||||
//
|
||||
//
|
||||
#[derive(Debug, Clone, Default, Eq, PartialEq)]
|
||||
pub struct Definitions {
|
||||
functions: IndexVec<FunctionId, Function>,
|
||||
parameters: IndexVec<ParameterId, Parameter>,
|
||||
classes: IndexVec<ClassId, Class>,
|
||||
assignments: IndexVec<AssignmentId, Assignment>,
|
||||
annotated_assignments: IndexVec<AnnotatedAssignmentId, AnnotatedAssignment>,
|
||||
type_aliases: IndexVec<TypeAliasId, TypeAlias>,
|
||||
type_parameters: IndexVec<TypeParameterId, TypeParameter>,
|
||||
globals: IndexVec<GlobalId, Global>,
|
||||
non_locals: IndexVec<NonLocalId, NonLocal>,
|
||||
}
|
||||
|
||||
impl Definitions {
|
||||
pub fn from_module(module: &ModModule, ast_ids: &AstIds, file_id: FileId) -> Self {
|
||||
let mut visitor = DefinitionsVisitor {
|
||||
definitions: Definitions::default(),
|
||||
ast_ids,
|
||||
file_id,
|
||||
};
|
||||
|
||||
visitor.visit_body(&module.body);
|
||||
|
||||
visitor.definitions
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<FunctionId> for Definitions {
|
||||
type Output = Function;
|
||||
|
||||
fn index(&self, index: FunctionId) -> &Self::Output {
|
||||
&self.functions[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<ParameterId> for Definitions {
|
||||
type Output = Parameter;
|
||||
|
||||
fn index(&self, index: ParameterId) -> &Self::Output {
|
||||
&self.parameters[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<ClassId> for Definitions {
|
||||
type Output = Class;
|
||||
|
||||
fn index(&self, index: ClassId) -> &Self::Output {
|
||||
&self.classes[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<AssignmentId> for Definitions {
|
||||
type Output = Assignment;
|
||||
|
||||
fn index(&self, index: AssignmentId) -> &Self::Output {
|
||||
&self.assignments[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<AnnotatedAssignmentId> for Definitions {
|
||||
type Output = AnnotatedAssignment;
|
||||
|
||||
fn index(&self, index: AnnotatedAssignmentId) -> &Self::Output {
|
||||
&self.annotated_assignments[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<TypeAliasId> for Definitions {
|
||||
type Output = TypeAlias;
|
||||
|
||||
fn index(&self, index: TypeAliasId) -> &Self::Output {
|
||||
&self.type_aliases[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<GlobalId> for Definitions {
|
||||
type Output = Global;
|
||||
|
||||
fn index(&self, index: GlobalId) -> &Self::Output {
|
||||
&self.globals[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<NonLocalId> for Definitions {
|
||||
type Output = NonLocal;
|
||||
|
||||
fn index(&self, index: NonLocalId) -> &Self::Output {
|
||||
&self.non_locals[index]
|
||||
}
|
||||
}
|
||||
|
||||
impl Index<TypeParameterId> for Definitions {
|
||||
type Output = TypeParameter;
|
||||
|
||||
fn index(&self, index: TypeParameterId) -> &Self::Output {
|
||||
&self.type_parameters[index]
|
||||
}
|
||||
}
|
||||
|
||||
struct DefinitionsVisitor<'a> {
|
||||
definitions: Definitions,
|
||||
ast_ids: &'a AstIds,
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
impl DefinitionsVisitor<'_> {
|
||||
fn ast_id<N: HasAstId>(&self, node: &N) -> HirAstId<N> {
|
||||
HirAstId {
|
||||
file_id: self.file_id,
|
||||
node_id: self.ast_ids.ast_id(node),
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_function_def(&mut self, function: &StmtFunctionDef) -> FunctionId {
|
||||
let name = Name::new(&function.name);
|
||||
|
||||
let first_type_parameter_id = self.definitions.type_parameters.next_index();
|
||||
let mut last_type_parameter_id = first_type_parameter_id;
|
||||
|
||||
if let Some(type_params) = &function.type_params {
|
||||
for parameter in &type_params.type_params {
|
||||
let id = self.lower_type_parameter(parameter);
|
||||
last_type_parameter_id = id;
|
||||
}
|
||||
}
|
||||
|
||||
let parameters = self.lower_parameters(&function.parameters);
|
||||
|
||||
self.definitions.functions.push(Function {
|
||||
name,
|
||||
ast_id: self.ast_id(function),
|
||||
parameters,
|
||||
type_parameters: first_type_parameter_id..last_type_parameter_id,
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_parameters(&mut self, parameters: &ruff_python_ast::Parameters) -> Range<ParameterId> {
|
||||
let first_parameter_id = self.definitions.parameters.next_index();
|
||||
let mut last_parameter_id = first_parameter_id;
|
||||
|
||||
for parameter in ¶meters.posonlyargs {
|
||||
last_parameter_id = self.definitions.parameters.push(Parameter {
|
||||
kind: ParameterKind::PositionalOnly,
|
||||
name: Name::new(¶meter.parameter.name),
|
||||
default: None,
|
||||
ast_id: self.ast_id(¶meter.parameter),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(vararg) = ¶meters.vararg {
|
||||
last_parameter_id = self.definitions.parameters.push(Parameter {
|
||||
kind: ParameterKind::Vararg,
|
||||
name: Name::new(&vararg.name),
|
||||
default: None,
|
||||
ast_id: self.ast_id(vararg),
|
||||
});
|
||||
}
|
||||
|
||||
for parameter in ¶meters.kwonlyargs {
|
||||
last_parameter_id = self.definitions.parameters.push(Parameter {
|
||||
kind: ParameterKind::KeywordOnly,
|
||||
name: Name::new(¶meter.parameter.name),
|
||||
default: None,
|
||||
ast_id: self.ast_id(¶meter.parameter),
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(kwarg) = ¶meters.kwarg {
|
||||
last_parameter_id = self.definitions.parameters.push(Parameter {
|
||||
kind: ParameterKind::KeywordOnly,
|
||||
name: Name::new(&kwarg.name),
|
||||
default: None,
|
||||
ast_id: self.ast_id(kwarg),
|
||||
});
|
||||
}
|
||||
|
||||
first_parameter_id..last_parameter_id
|
||||
}
|
||||
|
||||
fn lower_class_def(&mut self, class: &StmtClassDef) -> ClassId {
|
||||
let name = Name::new(&class.name);
|
||||
|
||||
self.definitions.classes.push(Class {
|
||||
name,
|
||||
ast_id: self.ast_id(class),
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_assignment(&mut self, assignment: &StmtAssign) {
|
||||
// FIXME handle multiple names
|
||||
if let Some(Expr::Name(name)) = assignment.targets.first() {
|
||||
self.definitions.assignments.push(Assignment {
|
||||
name: Name::new(&name.id),
|
||||
ast_id: self.ast_id(assignment),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_annotated_assignment(&mut self, annotated_assignment: &StmtAnnAssign) {
|
||||
if let Expr::Name(name) = &*annotated_assignment.target {
|
||||
self.definitions
|
||||
.annotated_assignments
|
||||
.push(AnnotatedAssignment {
|
||||
name: Name::new(&name.id),
|
||||
ast_id: self.ast_id(annotated_assignment),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_type_alias(&mut self, type_alias: &StmtTypeAlias) {
|
||||
if let Expr::Name(name) = &*type_alias.name {
|
||||
let name = Name::new(&name.id);
|
||||
|
||||
let lower_parameters_id = self.definitions.type_parameters.next_index();
|
||||
let mut last_parameter_id = lower_parameters_id;
|
||||
|
||||
if let Some(type_params) = &type_alias.type_params {
|
||||
for type_parameter in &type_params.type_params {
|
||||
let id = self.lower_type_parameter(type_parameter);
|
||||
last_parameter_id = id;
|
||||
}
|
||||
}
|
||||
|
||||
self.definitions.type_aliases.push(TypeAlias {
|
||||
name,
|
||||
ast_id: self.ast_id(type_alias),
|
||||
parameters: lower_parameters_id..last_parameter_id,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_type_parameter(&mut self, type_parameter: &TypeParam) -> TypeParameterId {
|
||||
match type_parameter {
|
||||
TypeParam::TypeVar(type_var) => {
|
||||
self.definitions
|
||||
.type_parameters
|
||||
.push(TypeParameter::TypeVar(TypeParameterTypeVar {
|
||||
name: Name::new(&type_var.name),
|
||||
ast_id: self.ast_id(type_var),
|
||||
}))
|
||||
}
|
||||
TypeParam::ParamSpec(param_spec) => {
|
||||
self.definitions
|
||||
.type_parameters
|
||||
.push(TypeParameter::ParamSpec(TypeParameterParamSpec {
|
||||
name: Name::new(¶m_spec.name),
|
||||
ast_id: self.ast_id(param_spec),
|
||||
}))
|
||||
}
|
||||
TypeParam::TypeVarTuple(type_var_tuple) => {
|
||||
self.definitions
|
||||
.type_parameters
|
||||
.push(TypeParameter::TypeVarTuple(TypeParameterTypeVarTuple {
|
||||
name: Name::new(&type_var_tuple.name),
|
||||
ast_id: self.ast_id(type_var_tuple),
|
||||
}))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn lower_import(&mut self, _import: &StmtImport) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
fn lower_import_from(&mut self, _import_from: &StmtImportFrom) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
fn lower_global(&mut self, global: &StmtGlobal) -> GlobalId {
|
||||
self.definitions.globals.push(Global {
|
||||
ast_id: self.ast_id(global),
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_non_local(&mut self, non_local: &StmtNonlocal) -> NonLocalId {
|
||||
self.definitions.non_locals.push(NonLocal {
|
||||
ast_id: self.ast_id(non_local),
|
||||
})
|
||||
}
|
||||
|
||||
fn lower_except_handler(&mut self, _except_handler: &ExceptHandlerExceptHandler) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
fn lower_with_item(&mut self, _with_item: &WithItem) {
|
||||
// TODO
|
||||
}
|
||||
|
||||
fn lower_match_case(&mut self, _match_case: &MatchCase) {
|
||||
// TODO
|
||||
}
|
||||
}
|
||||
|
||||
impl PreorderVisitor<'_> for DefinitionsVisitor<'_> {
|
||||
fn visit_stmt(&mut self, stmt: &Stmt) {
|
||||
match stmt {
|
||||
// Definition statements
|
||||
Stmt::FunctionDef(definition) => {
|
||||
self.lower_function_def(definition);
|
||||
self.visit_body(&definition.body);
|
||||
}
|
||||
Stmt::ClassDef(definition) => {
|
||||
self.lower_class_def(definition);
|
||||
self.visit_body(&definition.body);
|
||||
}
|
||||
Stmt::Assign(assignment) => {
|
||||
self.lower_assignment(assignment);
|
||||
}
|
||||
Stmt::AnnAssign(annotated_assignment) => {
|
||||
self.lower_annotated_assignment(annotated_assignment);
|
||||
}
|
||||
Stmt::TypeAlias(type_alias) => {
|
||||
self.lower_type_alias(type_alias);
|
||||
}
|
||||
|
||||
Stmt::Import(import) => self.lower_import(import),
|
||||
Stmt::ImportFrom(import_from) => self.lower_import_from(import_from),
|
||||
Stmt::Global(global) => {
|
||||
self.lower_global(global);
|
||||
}
|
||||
Stmt::Nonlocal(non_local) => {
|
||||
self.lower_non_local(non_local);
|
||||
}
|
||||
|
||||
// Visit the compound statement bodies because they can contain other definitions.
|
||||
Stmt::For(_)
|
||||
| Stmt::While(_)
|
||||
| Stmt::If(_)
|
||||
| Stmt::With(_)
|
||||
| Stmt::Match(_)
|
||||
| Stmt::Try(_) => {
|
||||
preorder::walk_stmt(self, stmt);
|
||||
}
|
||||
|
||||
// Skip over simple statements because they can't contain any other definitions.
|
||||
Stmt::Return(_)
|
||||
| Stmt::Delete(_)
|
||||
| Stmt::AugAssign(_)
|
||||
| Stmt::Raise(_)
|
||||
| Stmt::Assert(_)
|
||||
| Stmt::Expr(_)
|
||||
| Stmt::Pass(_)
|
||||
| Stmt::Break(_)
|
||||
| Stmt::Continue(_)
|
||||
| Stmt::IpyEscapeCommand(_) => {
|
||||
// No op
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_expr(&mut self, _: &'_ Expr) {}
|
||||
|
||||
fn visit_decorator(&mut self, _decorator: &'_ Decorator) {}
|
||||
|
||||
fn visit_except_handler(&mut self, except_handler: &'_ ExceptHandler) {
|
||||
match except_handler {
|
||||
ExceptHandler::ExceptHandler(except_handler) => {
|
||||
self.lower_except_handler(except_handler);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn visit_with_item(&mut self, with_item: &'_ WithItem) {
|
||||
self.lower_with_item(with_item);
|
||||
}
|
||||
|
||||
fn visit_match_case(&mut self, match_case: &'_ MatchCase) {
|
||||
self.lower_match_case(match_case);
|
||||
self.visit_body(&match_case.body);
|
||||
}
|
||||
}
|
83
crates/red_knot/src/lib.rs
Normal file
83
crates/red_knot/src/lib.rs
Normal file
|
@ -0,0 +1,83 @@
|
|||
use std::hash::BuildHasherDefault;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use rustc_hash::{FxHashSet, FxHasher};
|
||||
|
||||
use crate::files::FileId;
|
||||
|
||||
pub mod ast_ids;
|
||||
pub mod cache;
|
||||
pub mod cancellation;
|
||||
pub mod db;
|
||||
pub mod files;
|
||||
pub mod hir;
|
||||
pub mod lint;
|
||||
pub mod module;
|
||||
mod parse;
|
||||
pub mod program;
|
||||
pub mod source;
|
||||
mod symbols;
|
||||
mod types;
|
||||
pub mod watch;
|
||||
|
||||
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
|
||||
#[allow(unused)]
|
||||
pub(crate) type FxDashSet<V> = dashmap::DashSet<V, BuildHasherDefault<FxHasher>>;
|
||||
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Workspace {
|
||||
/// TODO this should be a resolved path. We should probably use a newtype wrapper that guarantees that
|
||||
/// PATH is a UTF-8 path and is normalized.
|
||||
root: PathBuf,
|
||||
/// The files that are open in the workspace.
|
||||
///
|
||||
/// * Editor: The files that are actively being edited in the editor (the user has a tab open with the file).
|
||||
/// * CLI: The resolved files passed as arguments to the CLI.
|
||||
open_files: FxHashSet<FileId>,
|
||||
}
|
||||
|
||||
impl Workspace {
|
||||
pub fn new(root: PathBuf) -> Self {
|
||||
Self {
|
||||
root,
|
||||
open_files: FxHashSet::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn root(&self) -> &Path {
|
||||
self.root.as_path()
|
||||
}
|
||||
|
||||
// TODO having the content in workspace feels wrong.
|
||||
pub fn open_file(&mut self, file_id: FileId) {
|
||||
self.open_files.insert(file_id);
|
||||
}
|
||||
|
||||
pub fn close_file(&mut self, file_id: FileId) {
|
||||
self.open_files.remove(&file_id);
|
||||
}
|
||||
|
||||
// TODO introduce an `OpenFile` type instead of using an anonymous tuple.
|
||||
pub fn open_files(&self) -> impl Iterator<Item = FileId> + '_ {
|
||||
self.open_files.iter().copied()
|
||||
}
|
||||
|
||||
pub fn is_file_open(&self, file_id: FileId) -> bool {
|
||||
self.open_files.contains(&file_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
|
||||
pub struct Name(smol_str::SmolStr);
|
||||
|
||||
impl Name {
|
||||
#[inline]
|
||||
pub fn new(name: &str) -> Self {
|
||||
Self(smol_str::SmolStr::new(name))
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
self.0.as_str()
|
||||
}
|
||||
}
|
124
crates/red_knot/src/lint.rs
Normal file
124
crates/red_knot/src/lint.rs
Normal file
|
@ -0,0 +1,124 @@
|
|||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_python_ast::visitor::Visitor;
|
||||
use ruff_python_ast::StringLiteral;
|
||||
|
||||
use crate::cache::KeyValueCache;
|
||||
use crate::db::{HasJar, SourceDb, SourceJar};
|
||||
use crate::files::FileId;
|
||||
|
||||
pub(crate) fn lint_syntax<Db>(db: &Db, file_id: FileId) -> Diagnostics
|
||||
where
|
||||
Db: SourceDb + HasJar<SourceJar>,
|
||||
{
|
||||
let storage = &db.jar().lint_syntax;
|
||||
|
||||
storage.get(&file_id, |file_id| {
|
||||
let mut diagnostics = Vec::new();
|
||||
|
||||
let source = db.source(*file_id);
|
||||
lint_lines(source.text(), &mut diagnostics);
|
||||
|
||||
let parsed = db.parse(*file_id);
|
||||
|
||||
if parsed.errors().is_empty() {
|
||||
let ast = parsed.ast();
|
||||
|
||||
let mut visitor = SyntaxLintVisitor {
|
||||
diagnostics,
|
||||
source: source.text(),
|
||||
};
|
||||
visitor.visit_body(&ast.body);
|
||||
diagnostics = visitor.diagnostics;
|
||||
} else {
|
||||
diagnostics.extend(parsed.errors().iter().map(std::string::ToString::to_string));
|
||||
}
|
||||
|
||||
Diagnostics::from(diagnostics)
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) fn lint_lines(source: &str, diagnostics: &mut Vec<String>) {
|
||||
for (line_number, line) in source.lines().enumerate() {
|
||||
if line.len() < 88 {
|
||||
continue;
|
||||
}
|
||||
|
||||
let char_count = line.chars().count();
|
||||
if char_count > 88 {
|
||||
diagnostics.push(format!(
|
||||
"Line {} is too long ({} characters)",
|
||||
line_number + 1,
|
||||
char_count
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct SyntaxLintVisitor<'a> {
|
||||
diagnostics: Vec<String>,
|
||||
source: &'a str,
|
||||
}
|
||||
|
||||
impl Visitor<'_> for SyntaxLintVisitor<'_> {
|
||||
fn visit_string_literal(&mut self, string_literal: &'_ StringLiteral) {
|
||||
// A very naive implementation of use double quotes
|
||||
let text = &self.source[string_literal.range];
|
||||
|
||||
if text.starts_with('\'') {
|
||||
self.diagnostics
|
||||
.push("Use double quotes for strings".to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Diagnostics {
|
||||
Empty,
|
||||
List(Arc<Vec<String>>),
|
||||
}
|
||||
|
||||
impl Diagnostics {
|
||||
pub fn as_slice(&self) -> &[String] {
|
||||
match self {
|
||||
Diagnostics::Empty => &[],
|
||||
Diagnostics::List(list) => list.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for Diagnostics {
|
||||
type Target = [String];
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.as_slice()
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Vec<String>> for Diagnostics {
|
||||
fn from(value: Vec<String>) -> Self {
|
||||
if value.is_empty() {
|
||||
Diagnostics::Empty
|
||||
} else {
|
||||
Diagnostics::List(Arc::new(value))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
pub struct LintSyntaxStorage(KeyValueCache<FileId, Diagnostics>);
|
||||
|
||||
impl Deref for LintSyntaxStorage {
|
||||
type Target = KeyValueCache<FileId, Diagnostics>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for LintSyntaxStorage {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
399
crates/red_knot/src/main.rs
Normal file
399
crates/red_knot/src/main.rs
Normal file
|
@ -0,0 +1,399 @@
|
|||
use std::collections::hash_map::Entry;
|
||||
use std::num::NonZeroUsize;
|
||||
use std::path::Path;
|
||||
use std::sync::atomic::{AtomicUsize, Ordering};
|
||||
use std::sync::{Arc, Mutex};
|
||||
|
||||
use rustc_hash::FxHashMap;
|
||||
use tracing::subscriber::Interest;
|
||||
use tracing::{Level, Metadata};
|
||||
use tracing_subscriber::filter::LevelFilter;
|
||||
use tracing_subscriber::layer::{Context, Filter, SubscriberExt};
|
||||
use tracing_subscriber::{Layer, Registry};
|
||||
use tracing_tree::time::Uptime;
|
||||
|
||||
use red_knot::cancellation::CancellationSource;
|
||||
use red_knot::db::{HasJar, SourceDb, SourceJar};
|
||||
use red_knot::files::FileId;
|
||||
use red_knot::module::{ModuleSearchPath, ModuleSearchPathKind};
|
||||
use red_knot::program::{FileChange, FileChangeKind, Program};
|
||||
use red_knot::watch::FileWatcher;
|
||||
use red_knot::{files, Workspace};
|
||||
|
||||
#[allow(
|
||||
clippy::dbg_macro,
|
||||
clippy::print_stdout,
|
||||
clippy::unnecessary_wraps,
|
||||
clippy::print_stderr
|
||||
)]
|
||||
fn main() -> anyhow::Result<()> {
|
||||
setup_tracing();
|
||||
|
||||
let arguments: Vec<_> = std::env::args().collect();
|
||||
|
||||
if arguments.len() < 2 {
|
||||
eprintln!("Usage: red_knot <path>");
|
||||
return Err(anyhow::anyhow!("Invalid arguments"));
|
||||
}
|
||||
|
||||
let entry_point = Path::new(&arguments[1]);
|
||||
|
||||
if !entry_point.exists() {
|
||||
eprintln!("The entry point does not exist.");
|
||||
return Err(anyhow::anyhow!("Invalid arguments"));
|
||||
}
|
||||
|
||||
if !entry_point.is_file() {
|
||||
eprintln!("The entry point is not a file.");
|
||||
return Err(anyhow::anyhow!("Invalid arguments"));
|
||||
}
|
||||
|
||||
let files = files::Files::default();
|
||||
let workspace_folder = entry_point.parent().unwrap();
|
||||
let mut workspace = Workspace::new(workspace_folder.to_path_buf());
|
||||
|
||||
let workspace_search_path = ModuleSearchPath::new(
|
||||
workspace.root().to_path_buf(),
|
||||
ModuleSearchPathKind::FirstParty,
|
||||
);
|
||||
|
||||
let entry_id = files.intern(entry_point);
|
||||
|
||||
let mut program = Program::new(vec![workspace_search_path], files.clone());
|
||||
|
||||
workspace.open_file(entry_id);
|
||||
|
||||
let (sender, receiver) = crossbeam_channel::bounded(
|
||||
std::thread::available_parallelism()
|
||||
.map(NonZeroUsize::get)
|
||||
.unwrap_or(50)
|
||||
.max(4), // TODO: Both these numbers are very arbitrary. Pick sensible defaults.
|
||||
);
|
||||
|
||||
// Listen to Ctrl+C and abort the watch mode.
|
||||
let abort_sender = Mutex::new(Some(sender.clone()));
|
||||
ctrlc::set_handler(move || {
|
||||
let mut lock = abort_sender.lock().unwrap();
|
||||
|
||||
if let Some(sender) = lock.take() {
|
||||
sender.send(Message::Exit).unwrap();
|
||||
}
|
||||
})?;
|
||||
|
||||
// Watch for file changes and re-trigger the analysis.
|
||||
let file_changes_sender = sender.clone();
|
||||
|
||||
let mut file_watcher = FileWatcher::new(
|
||||
move |changes| {
|
||||
file_changes_sender
|
||||
.send(Message::FileChanges(changes))
|
||||
.unwrap();
|
||||
},
|
||||
files.clone(),
|
||||
)?;
|
||||
|
||||
file_watcher.watch_folder(workspace_folder)?;
|
||||
|
||||
let files_to_check = vec![entry_id];
|
||||
|
||||
// Main loop that runs until the user exits the program
|
||||
// Runs the analysis for each changed file. Cancels the analysis if a new change is detected.
|
||||
loop {
|
||||
let changes = {
|
||||
tracing::trace!("Main Loop: Tick");
|
||||
|
||||
// Token to cancel the analysis if a new change is detected.
|
||||
let run_cancellation_token_source = CancellationSource::new();
|
||||
let run_cancellation_token = run_cancellation_token_source.token();
|
||||
|
||||
// Tracks the number of pending analysis runs.
|
||||
let pending_analysis = Arc::new(AtomicUsize::new(0));
|
||||
|
||||
// Take read-only references that are copy and Send.
|
||||
let program = &program;
|
||||
let workspace = &workspace;
|
||||
|
||||
let receiver = receiver.clone();
|
||||
let started_analysis = pending_analysis.clone();
|
||||
|
||||
// Orchestration task. Ideally, we would run this on main but we should start it as soon as possible so that
|
||||
// we avoid scheduling tasks when we already know that we're about to exit or cancel the analysis because of a file change.
|
||||
// This uses `std::thread::spawn` because we don't want it to run inside of the thread pool
|
||||
// or this code deadlocks when using a thread pool of the size 1.
|
||||
let orchestration_handle = std::thread::spawn(move || {
|
||||
fn consume_pending_messages(
|
||||
receiver: &crossbeam_channel::Receiver<Message>,
|
||||
mut aggregated_changes: AggregatedChanges,
|
||||
) -> NextTickCommand {
|
||||
loop {
|
||||
// Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms.
|
||||
crossbeam_channel::select! {
|
||||
recv(receiver) -> message => {
|
||||
match message {
|
||||
Ok(Message::Exit) => {
|
||||
return NextTickCommand::Exit;
|
||||
}
|
||||
Ok(Message::FileChanges(file_changes)) => {
|
||||
aggregated_changes.extend(file_changes);
|
||||
}
|
||||
|
||||
Ok(Message::AnalysisCancelled | Message::AnalysisCompleted(_)) => {
|
||||
unreachable!(
|
||||
"All analysis should have been completed at this time"
|
||||
);
|
||||
},
|
||||
|
||||
Err(_) => {
|
||||
// There are no more senders, no point in waiting for more messages
|
||||
break;
|
||||
}
|
||||
}
|
||||
},
|
||||
default(std::time::Duration::from_millis(100)) => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
NextTickCommand::FileChanges(aggregated_changes)
|
||||
}
|
||||
|
||||
let mut diagnostics = Vec::new();
|
||||
let mut aggregated_changes = AggregatedChanges::default();
|
||||
|
||||
for message in &receiver {
|
||||
match message {
|
||||
Message::AnalysisCompleted(file_diagnostics) => {
|
||||
diagnostics.extend_from_slice(&file_diagnostics);
|
||||
|
||||
if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 {
|
||||
// Analysis completed, print the diagnostics.
|
||||
dbg!(&diagnostics);
|
||||
}
|
||||
}
|
||||
|
||||
Message::AnalysisCancelled => {
|
||||
if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 {
|
||||
return consume_pending_messages(&receiver, aggregated_changes);
|
||||
}
|
||||
}
|
||||
|
||||
Message::Exit => {
|
||||
run_cancellation_token_source.cancel();
|
||||
|
||||
// Don't consume any outstanding messages because we're exiting anyway.
|
||||
return NextTickCommand::Exit;
|
||||
}
|
||||
|
||||
Message::FileChanges(changes) => {
|
||||
// Request cancellation, but wait until all analysis tasks have completed to
|
||||
// avoid stale messages in the next main loop.
|
||||
run_cancellation_token_source.cancel();
|
||||
|
||||
aggregated_changes.extend(changes);
|
||||
|
||||
if pending_analysis.load(Ordering::SeqCst) == 0 {
|
||||
return consume_pending_messages(&receiver, aggregated_changes);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// This can be reached if there's no Ctrl+C and no file watcher handler.
|
||||
// In that case, assume that we don't run in watch mode and exit.
|
||||
NextTickCommand::Exit
|
||||
});
|
||||
|
||||
// Star the analysis task on the thread pool and wait until they complete.
|
||||
rayon::scope(|scope| {
|
||||
for file in &files_to_check {
|
||||
let cancellation_token = run_cancellation_token.clone();
|
||||
if cancellation_token.is_cancelled() {
|
||||
break;
|
||||
}
|
||||
|
||||
let sender = sender.clone();
|
||||
|
||||
started_analysis.fetch_add(1, Ordering::SeqCst);
|
||||
|
||||
// TODO: How do we allow the host to control the number of threads used?
|
||||
// Or should we just assume that each host implements its own main loop,
|
||||
// I don't think that's entirely unreasonable but we should avoid
|
||||
// having different main loops per host AND command (e.g. format vs check vs lint)
|
||||
scope.spawn(move |_| {
|
||||
if cancellation_token.is_cancelled() {
|
||||
tracing::trace!("Exit analysis because cancellation was requested.");
|
||||
sender.send(Message::AnalysisCancelled).unwrap();
|
||||
return;
|
||||
}
|
||||
|
||||
// TODO schedule the dependencies.
|
||||
let mut diagnostics = Vec::new();
|
||||
|
||||
if workspace.is_file_open(*file) {
|
||||
diagnostics.extend_from_slice(&program.lint_syntax(*file));
|
||||
}
|
||||
|
||||
sender
|
||||
.send(Message::AnalysisCompleted(diagnostics))
|
||||
.unwrap();
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
// Wait for the orchestration task to complete. This either returns the file changes
|
||||
// or instructs the main loop to exit.
|
||||
match orchestration_handle.join().unwrap() {
|
||||
NextTickCommand::FileChanges(changes) => changes,
|
||||
NextTickCommand::Exit => {
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// We have a mutable reference here and can perform all necessary invalidations.
|
||||
program.apply_changes(changes.iter());
|
||||
}
|
||||
|
||||
let source_jar: &SourceJar = program.jar();
|
||||
|
||||
dbg!(source_jar.parsed.statistics());
|
||||
dbg!(source_jar.sources.statistics());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
enum Message {
|
||||
AnalysisCompleted(Vec<String>),
|
||||
AnalysisCancelled,
|
||||
Exit,
|
||||
FileChanges(Vec<FileChange>),
|
||||
}
|
||||
|
||||
#[derive(Default, Debug)]
|
||||
struct AggregatedChanges {
|
||||
changes: FxHashMap<FileId, FileChangeKind>,
|
||||
}
|
||||
|
||||
impl AggregatedChanges {
|
||||
fn add(&mut self, change: FileChange) {
|
||||
match self.changes.entry(change.file_id()) {
|
||||
Entry::Occupied(mut entry) => {
|
||||
let merged = entry.get_mut();
|
||||
|
||||
match (merged, change.kind()) {
|
||||
(FileChangeKind::Created, FileChangeKind::Deleted) => {
|
||||
// Deletion after creations means that ruff never saw the file.
|
||||
entry.remove();
|
||||
}
|
||||
(FileChangeKind::Created, FileChangeKind::Modified) => {
|
||||
// No-op, for ruff, modifying a file that it doesn't yet know that it exists is still considered a creation.
|
||||
}
|
||||
|
||||
(FileChangeKind::Modified, FileChangeKind::Created) => {
|
||||
// Uhh, that should probably not happen. Continue considering it a modification.
|
||||
}
|
||||
|
||||
(FileChangeKind::Modified, FileChangeKind::Deleted) => {
|
||||
*entry.get_mut() = FileChangeKind::Deleted;
|
||||
}
|
||||
|
||||
(FileChangeKind::Deleted, FileChangeKind::Created) => {
|
||||
*entry.get_mut() = FileChangeKind::Modified;
|
||||
}
|
||||
|
||||
(FileChangeKind::Deleted, FileChangeKind::Modified) => {
|
||||
// That's weird, but let's consider it a modification.
|
||||
*entry.get_mut() = FileChangeKind::Modified;
|
||||
}
|
||||
|
||||
(FileChangeKind::Created, FileChangeKind::Created)
|
||||
| (FileChangeKind::Modified, FileChangeKind::Modified)
|
||||
| (FileChangeKind::Deleted, FileChangeKind::Deleted) => {
|
||||
// No-op transitions. Some of them should be impossible but we handle them anyway.
|
||||
}
|
||||
}
|
||||
}
|
||||
Entry::Vacant(entry) => {
|
||||
entry.insert(change.kind());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn extend<I>(&mut self, changes: I)
|
||||
where
|
||||
I: IntoIterator<Item = FileChange>,
|
||||
I::IntoIter: ExactSizeIterator,
|
||||
{
|
||||
let iter = changes.into_iter();
|
||||
self.changes.reserve(iter.len());
|
||||
|
||||
for change in iter {
|
||||
self.add(change);
|
||||
}
|
||||
}
|
||||
|
||||
fn iter(&self) -> impl Iterator<Item = FileChange> + '_ {
|
||||
self.changes
|
||||
.iter()
|
||||
.map(|(id, kind)| FileChange::new(*id, *kind))
|
||||
}
|
||||
}
|
||||
|
||||
enum NextTickCommand {
|
||||
/// Exit the main loop in the next tick
|
||||
Exit,
|
||||
/// Apply the given changes in the next main loop tick.
|
||||
FileChanges(AggregatedChanges),
|
||||
}
|
||||
|
||||
fn setup_tracing() {
|
||||
let subscriber = Registry::default().with(
|
||||
tracing_tree::HierarchicalLayer::default()
|
||||
.with_indent_lines(true)
|
||||
.with_indent_amount(2)
|
||||
.with_bracketed_fields(true)
|
||||
.with_targets(true)
|
||||
.with_writer(|| Box::new(std::io::stderr()))
|
||||
.with_timer(Uptime::default())
|
||||
.with_filter(LoggingFilter {
|
||||
trace_level: Level::TRACE,
|
||||
}),
|
||||
);
|
||||
|
||||
tracing::subscriber::set_global_default(subscriber).unwrap();
|
||||
}
|
||||
|
||||
struct LoggingFilter {
|
||||
trace_level: Level,
|
||||
}
|
||||
|
||||
impl LoggingFilter {
|
||||
fn is_enabled(&self, meta: &Metadata<'_>) -> bool {
|
||||
let filter = if meta.target().starts_with("red_knot") || meta.target().starts_with("ruff") {
|
||||
self.trace_level
|
||||
} else {
|
||||
Level::INFO
|
||||
};
|
||||
|
||||
meta.level() <= &filter
|
||||
}
|
||||
}
|
||||
|
||||
impl<S> Filter<S> for LoggingFilter {
|
||||
fn enabled(&self, meta: &Metadata<'_>, _cx: &Context<'_, S>) -> bool {
|
||||
self.is_enabled(meta)
|
||||
}
|
||||
|
||||
fn callsite_enabled(&self, meta: &'static Metadata<'static>) -> Interest {
|
||||
if self.is_enabled(meta) {
|
||||
Interest::always()
|
||||
} else {
|
||||
Interest::never()
|
||||
}
|
||||
}
|
||||
|
||||
fn max_level_hint(&self) -> Option<LevelFilter> {
|
||||
Some(LevelFilter::from_level(self.trace_level))
|
||||
}
|
||||
}
|
911
crates/red_knot/src/module.rs
Normal file
911
crates/red_knot/src/module.rs
Normal file
|
@ -0,0 +1,911 @@
|
|||
use std::fmt::Formatter;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::atomic::AtomicU32;
|
||||
use std::sync::Arc;
|
||||
|
||||
use dashmap::mapref::entry::Entry;
|
||||
|
||||
use crate::db::{HasJar, SemanticDb, SemanticJar};
|
||||
use crate::files::FileId;
|
||||
use crate::FxDashMap;
|
||||
|
||||
/// ID uniquely identifying a module.
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct Module(u32);
|
||||
|
||||
impl Module {
|
||||
pub fn name<Db>(&self, db: &Db) -> ModuleName
|
||||
where
|
||||
Db: HasJar<SemanticJar>,
|
||||
{
|
||||
let modules = &db.jar().module_resolver;
|
||||
|
||||
modules.modules.get(self).unwrap().name.clone()
|
||||
}
|
||||
|
||||
pub fn path<Db>(&self, db: &Db) -> ModulePath
|
||||
where
|
||||
Db: HasJar<SemanticJar>,
|
||||
{
|
||||
let modules = &db.jar().module_resolver;
|
||||
|
||||
modules.modules.get(self).unwrap().path.clone()
|
||||
}
|
||||
}
|
||||
|
||||
/// A module name, e.g. `foo.bar`.
|
||||
///
|
||||
/// Always normalized to the absolute form (never a relative module name).
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub struct ModuleName(smol_str::SmolStr);
|
||||
|
||||
impl ModuleName {
|
||||
pub fn new(name: &str) -> Self {
|
||||
debug_assert!(!name.is_empty());
|
||||
|
||||
Self(smol_str::SmolStr::new(name))
|
||||
}
|
||||
|
||||
pub fn relative(_dots: u32, name: &str, _to: &Path) -> Self {
|
||||
// FIXME: Take `to` and `dots` into account.
|
||||
Self(smol_str::SmolStr::new(name))
|
||||
}
|
||||
|
||||
pub fn from_relative_path(path: &Path) -> Option<Self> {
|
||||
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
|
||||
path.parent()?
|
||||
} else {
|
||||
path
|
||||
};
|
||||
|
||||
let name = if let Some(parent) = path.parent() {
|
||||
let mut name = String::with_capacity(path.as_os_str().len());
|
||||
|
||||
for component in parent.components() {
|
||||
name.push_str(component.as_os_str().to_str()?);
|
||||
name.push('.');
|
||||
}
|
||||
|
||||
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
|
||||
name.push_str(path.file_stem().unwrap().to_str()?);
|
||||
|
||||
smol_str::SmolStr::from(name)
|
||||
} else {
|
||||
smol_str::SmolStr::new(path.file_stem()?.to_str()?)
|
||||
};
|
||||
|
||||
Some(Self(name))
|
||||
}
|
||||
|
||||
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
|
||||
self.0.split('.')
|
||||
}
|
||||
|
||||
pub fn parent(&self) -> Option<ModuleName> {
|
||||
let (_, parent) = self.0.rsplit_once('.')?;
|
||||
|
||||
Some(Self(smol_str::SmolStr::new(parent)))
|
||||
}
|
||||
|
||||
pub fn starts_with(&self, other: &ModuleName) -> bool {
|
||||
self.0.starts_with(other.0.as_str())
|
||||
}
|
||||
|
||||
pub fn as_str(&self) -> &str {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for ModuleName {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.write_str(&self.0)
|
||||
}
|
||||
}
|
||||
|
||||
/// A search path in which to search modules.
|
||||
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
|
||||
///
|
||||
/// Cloning a search path is cheap because it's an `Arc`.
|
||||
#[derive(Clone, PartialEq, Eq)]
|
||||
pub struct ModuleSearchPath {
|
||||
inner: Arc<ModuleSearchPathInner>,
|
||||
}
|
||||
|
||||
impl ModuleSearchPath {
|
||||
pub fn new(path: PathBuf, kind: ModuleSearchPathKind) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(ModuleSearchPathInner { path, kind }),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> ModuleSearchPathKind {
|
||||
self.inner.kind
|
||||
}
|
||||
|
||||
pub fn path(&self) -> &Path {
|
||||
&self.inner.path
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Debug for ModuleSearchPath {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
self.inner.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
struct ModuleSearchPathInner {
|
||||
path: PathBuf,
|
||||
kind: ModuleSearchPathKind,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
|
||||
pub enum ModuleSearchPathKind {
|
||||
// Project dependency
|
||||
FirstParty,
|
||||
|
||||
// e.g. site packages
|
||||
ThirdParty,
|
||||
|
||||
// e.g. built-in modules, typeshed
|
||||
StandardLibrary,
|
||||
}
|
||||
|
||||
#[derive(Debug, Eq, PartialEq)]
|
||||
pub struct ModuleData {
|
||||
name: ModuleName,
|
||||
path: ModulePath,
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Queries
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
/// Resolves a module name to a module id
|
||||
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient and, therefore, cannot be used as part of a query.
|
||||
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
|
||||
#[tracing::instrument(level = "trace", skip(db))]
|
||||
pub fn resolve_module<Db>(db: &Db, name: ModuleName) -> Option<Module>
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let jar = db.jar();
|
||||
let modules = &jar.module_resolver;
|
||||
|
||||
let entry = modules.by_name.entry(name.clone());
|
||||
|
||||
match entry {
|
||||
Entry::Occupied(entry) => Some(*entry.get()),
|
||||
Entry::Vacant(entry) => {
|
||||
let (root_path, absolute_path) = resolve_name(&name, &modules.search_paths)?;
|
||||
let normalized = absolute_path.canonicalize().ok()?;
|
||||
|
||||
let file_id = db.file_id(&normalized);
|
||||
let path = ModulePath::new(root_path.clone(), file_id);
|
||||
|
||||
let id = Module(
|
||||
modules
|
||||
.next_module_id
|
||||
.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
|
||||
);
|
||||
|
||||
modules
|
||||
.modules
|
||||
.insert(id, Arc::from(ModuleData { name, path }));
|
||||
|
||||
// A path can map to multiple modules because of symlinks:
|
||||
// ```
|
||||
// foo.py
|
||||
// bar.py -> foo.py
|
||||
// ```
|
||||
// Here, both `foo` and `bar` resolve to the same module but through different paths.
|
||||
// That's why we need to insert the absolute path and not the normalized path here.
|
||||
modules.by_path.insert(absolute_path, id);
|
||||
|
||||
entry.insert_entry(id);
|
||||
|
||||
Some(id)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//////////////////////////////////////////////////////
|
||||
// Mutations
|
||||
//////////////////////////////////////////////////////
|
||||
|
||||
/// Changes the module search paths to `search_paths`.
|
||||
pub fn set_module_search_paths<Db>(db: &mut Db, search_paths: Vec<ModuleSearchPath>)
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let jar = db.jar_mut();
|
||||
|
||||
jar.module_resolver = ModuleResolver::new(search_paths);
|
||||
}
|
||||
|
||||
/// Resolves the module id for the file with the given id.
|
||||
///
|
||||
/// Returns `None` if the file is not a module in `sys.path`.
|
||||
pub fn file_to_module<Db>(db: &mut Db, file: FileId) -> Option<Module>
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let path = db.file_path(file);
|
||||
path_to_module(db, &path)
|
||||
}
|
||||
|
||||
/// Resolves the module id for the given path.
|
||||
///
|
||||
/// Returns `None` if the path is not a module in `sys.path`.
|
||||
// WARNING!: It's important that this method takes `&mut self`. Without, the implementation is prone to race conditions.
|
||||
// Note: This won't work with salsa because `Path` is not an ingredient.
|
||||
pub fn path_to_module<Db>(db: &mut Db, path: &Path) -> Option<Module>
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let jar = db.jar_mut();
|
||||
let modules = &mut jar.module_resolver;
|
||||
debug_assert!(path.is_absolute());
|
||||
|
||||
if let Some(existing) = modules.by_path.get(path) {
|
||||
return Some(*existing);
|
||||
}
|
||||
|
||||
let root_path = modules
|
||||
.search_paths
|
||||
.iter()
|
||||
.find(|root| path.starts_with(root.path()))?
|
||||
.clone();
|
||||
|
||||
// SAFETY: `strip_prefix` is guaranteed to succeed because we search the root that is a prefix of the path.
|
||||
let relative_path = path.strip_prefix(root_path.path()).unwrap();
|
||||
let module_name = ModuleName::from_relative_path(relative_path)?;
|
||||
|
||||
// Resolve the module name to see if Python would resolve the name to the same path.
|
||||
// If it doesn't, then that means that multiple modules have the same in different
|
||||
// root paths, but that the module corresponding to the past path is in a lower priority path,
|
||||
// in which case we ignore it.
|
||||
let module_id = resolve_module(db, module_name)?;
|
||||
// Note: Guaranteed to be race-free because we're holding a mutable reference of `self` here.
|
||||
let module_path = module_id.path(db);
|
||||
|
||||
if module_path.root() == &root_path {
|
||||
let normalized = path.canonicalize().ok()?;
|
||||
let interned_normalized = db.file_id(&normalized);
|
||||
|
||||
if interned_normalized != module_path.file() {
|
||||
// This path is for a module with the same name but with a different precedence. For example:
|
||||
// ```
|
||||
// src/foo.py
|
||||
// src/foo/__init__.py
|
||||
// ```
|
||||
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
|
||||
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
|
||||
return None;
|
||||
}
|
||||
|
||||
// Path has been inserted by `resolved`
|
||||
Some(module_id)
|
||||
} else {
|
||||
// This path is for a module with the same name but in a module search path with a lower priority.
|
||||
// Ignore it.
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Adds a module to the resolver.
|
||||
///
|
||||
/// Returns `None` if the path doesn't resolve to a module.
|
||||
///
|
||||
/// Returns `Some` with the id of the module and the ids of the modules that need re-resolving
|
||||
/// because they were part of a namespace package and might now resolve differently.
|
||||
/// Note: This won't work with salsa because `Path` is not an ingredient.
|
||||
pub fn add_module<Db>(db: &mut Db, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)>
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
// No locking is required because we're holding a mutable reference to `modules`.
|
||||
|
||||
// TODO This needs tests
|
||||
|
||||
// Note: Intentionally by-pass caching here. Module should not be in the cache yet.
|
||||
let module = path_to_module(db, path)?;
|
||||
|
||||
// The code below is to handle the addition of `__init__.py` files.
|
||||
// When an `__init__.py` file is added, we need to remove all modules that are part of the same package.
|
||||
// For example, an `__init__.py` is added to `foo`, we need to remove `foo.bar`, `foo.baz`, etc.
|
||||
// because they were namespace packages before and could have been from different search paths.
|
||||
let Some(filename) = path.file_name() else {
|
||||
return Some((module, Vec::new()));
|
||||
};
|
||||
|
||||
if !matches!(filename.to_str(), Some("__init__.py" | "__init__.pyi")) {
|
||||
return Some((module, Vec::new()));
|
||||
}
|
||||
|
||||
let Some(parent_name) = module.name(db).parent() else {
|
||||
return Some((module, Vec::new()));
|
||||
};
|
||||
|
||||
let mut to_remove = Vec::new();
|
||||
|
||||
let jar = db.jar_mut();
|
||||
let modules = &mut jar.module_resolver;
|
||||
|
||||
modules.by_path.retain(|_, id| {
|
||||
if modules
|
||||
.modules
|
||||
.get(id)
|
||||
.unwrap()
|
||||
.name
|
||||
.starts_with(&parent_name)
|
||||
{
|
||||
to_remove.push(*id);
|
||||
false
|
||||
} else {
|
||||
true
|
||||
}
|
||||
});
|
||||
|
||||
// TODO remove need for this vec
|
||||
let mut removed = Vec::with_capacity(to_remove.len());
|
||||
for id in &to_remove {
|
||||
removed.push(modules.remove_module_by_id(*id));
|
||||
}
|
||||
|
||||
Some((module, removed))
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ModuleResolver {
|
||||
/// The search paths where modules are located (and searched). Corresponds to `sys.path` at runtime.
|
||||
search_paths: Vec<ModuleSearchPath>,
|
||||
|
||||
// Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary
|
||||
// lookup method. Acquiring locks in any other ordering can result in deadlocks.
|
||||
/// Resolves a module name to it's module id.
|
||||
by_name: FxDashMap<ModuleName, Module>,
|
||||
|
||||
/// All known modules, indexed by the module id.
|
||||
modules: FxDashMap<Module, Arc<ModuleData>>,
|
||||
|
||||
/// Lookup from absolute path to module.
|
||||
/// The same module might be reachable from different paths when symlinks are involved.
|
||||
by_path: FxDashMap<PathBuf, Module>,
|
||||
next_module_id: AtomicU32,
|
||||
}
|
||||
|
||||
impl ModuleResolver {
|
||||
pub fn new(search_paths: Vec<ModuleSearchPath>) -> Self {
|
||||
Self {
|
||||
search_paths,
|
||||
modules: FxDashMap::default(),
|
||||
by_name: FxDashMap::default(),
|
||||
by_path: FxDashMap::default(),
|
||||
next_module_id: AtomicU32::new(0),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn remove_module(&mut self, path: &Path) {
|
||||
// No locking is required because we're holding a mutable reference to `self`.
|
||||
let Some((_, id)) = self.by_path.remove(path) else {
|
||||
return;
|
||||
};
|
||||
|
||||
self.remove_module_by_id(id);
|
||||
}
|
||||
|
||||
fn remove_module_by_id(&mut self, id: Module) -> Arc<ModuleData> {
|
||||
let (_, module) = self.modules.remove(&id).unwrap();
|
||||
|
||||
self.by_name.remove(&module.name).unwrap();
|
||||
|
||||
// It's possible that multiple paths map to the same id. Search all other paths referencing the same module id.
|
||||
self.by_path.retain(|_, current_id| *current_id != id);
|
||||
|
||||
module
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::missing_fields_in_debug)]
|
||||
impl std::fmt::Debug for ModuleResolver {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
f.debug_struct("ModuleResolver")
|
||||
.field("search_paths", &self.search_paths)
|
||||
.field("modules", &self.by_name)
|
||||
.finish()
|
||||
}
|
||||
}
|
||||
|
||||
/// The resolved path of a module.
|
||||
///
|
||||
/// It should be highly likely that the file still exists when accessing but it isn't 100% guaranteed
|
||||
/// because the file could have been deleted between resolving the module name and accessing it.
|
||||
#[derive(Clone, Debug, PartialEq, Eq)]
|
||||
pub struct ModulePath {
|
||||
root: ModuleSearchPath,
|
||||
file_id: FileId,
|
||||
}
|
||||
|
||||
impl ModulePath {
|
||||
pub fn new(root: ModuleSearchPath, file_id: FileId) -> Self {
|
||||
Self { root, file_id }
|
||||
}
|
||||
|
||||
pub fn root(&self) -> &ModuleSearchPath {
|
||||
&self.root
|
||||
}
|
||||
|
||||
pub fn file(&self) -> FileId {
|
||||
self.file_id
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_name(
|
||||
name: &ModuleName,
|
||||
search_paths: &[ModuleSearchPath],
|
||||
) -> Option<(ModuleSearchPath, PathBuf)> {
|
||||
for search_path in search_paths {
|
||||
let mut components = name.components();
|
||||
let module_name = components.next_back()?;
|
||||
|
||||
match resolve_package(search_path, components) {
|
||||
Ok(resolved_package) => {
|
||||
let mut package_path = resolved_package.path;
|
||||
|
||||
package_path.push(module_name);
|
||||
|
||||
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
|
||||
if package_path.is_dir() {
|
||||
package_path.push("__init__");
|
||||
}
|
||||
|
||||
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
|
||||
let stub = package_path.with_extension("pyi");
|
||||
|
||||
if stub.is_file() {
|
||||
return Some((search_path.clone(), stub));
|
||||
}
|
||||
|
||||
let module = package_path.with_extension("py");
|
||||
|
||||
if module.is_file() {
|
||||
return Some((search_path.clone(), module));
|
||||
}
|
||||
|
||||
// For regular packages, don't search the next search path. All files of that
|
||||
// package must be in the same location
|
||||
if resolved_package.kind.is_regular_package() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Err(parent_kind) => {
|
||||
if parent_kind.is_regular_package() {
|
||||
// For regular packages, don't search the next search path.
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn resolve_package<'a, I>(
|
||||
module_search_path: &ModuleSearchPath,
|
||||
components: I,
|
||||
) -> Result<ResolvedPackage, PackageKind>
|
||||
where
|
||||
I: Iterator<Item = &'a str>,
|
||||
{
|
||||
let mut package_path = module_search_path.path().to_path_buf();
|
||||
|
||||
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
|
||||
// Namespace packages are special because they can be spread across multiple search paths.
|
||||
// https://peps.python.org/pep-0420/
|
||||
let mut in_namespace_package = false;
|
||||
|
||||
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
|
||||
let mut in_sub_package = false;
|
||||
|
||||
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
|
||||
for folder in components {
|
||||
package_path.push(folder);
|
||||
|
||||
let has_init_py = package_path.join("__init__.py").is_file()
|
||||
|| package_path.join("__init__.pyi").is_file();
|
||||
|
||||
if has_init_py {
|
||||
in_namespace_package = false;
|
||||
} else if package_path.is_dir() {
|
||||
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
|
||||
in_namespace_package = true;
|
||||
} else if in_namespace_package {
|
||||
// Package not found but it is part of a namespace package.
|
||||
return Err(PackageKind::Namespace);
|
||||
} else if in_sub_package {
|
||||
// A regular sub package wasn't found.
|
||||
return Err(PackageKind::Regular);
|
||||
} else {
|
||||
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
|
||||
return Err(PackageKind::Root);
|
||||
}
|
||||
|
||||
in_sub_package = true;
|
||||
}
|
||||
|
||||
let kind = if in_namespace_package {
|
||||
PackageKind::Namespace
|
||||
} else if in_sub_package {
|
||||
PackageKind::Regular
|
||||
} else {
|
||||
PackageKind::Root
|
||||
};
|
||||
|
||||
Ok(ResolvedPackage {
|
||||
kind,
|
||||
path: package_path,
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ResolvedPackage {
|
||||
path: PathBuf,
|
||||
kind: PackageKind,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
|
||||
enum PackageKind {
|
||||
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
|
||||
Root,
|
||||
|
||||
/// A regular sub-package where the parent contains an `__init__.py`. For example `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
|
||||
Regular,
|
||||
|
||||
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
|
||||
///
|
||||
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
|
||||
Namespace,
|
||||
}
|
||||
|
||||
impl PackageKind {
|
||||
const fn is_regular_package(self) -> bool {
|
||||
matches!(self, PackageKind::Regular)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::db::tests::TestDb;
|
||||
use crate::db::{SemanticDb, SourceDb};
|
||||
use crate::module::{ModuleName, ModuleSearchPath, ModuleSearchPathKind};
|
||||
|
||||
struct TestCase {
|
||||
temp_dir: tempfile::TempDir,
|
||||
db: TestDb,
|
||||
|
||||
src: ModuleSearchPath,
|
||||
site_packages: ModuleSearchPath,
|
||||
}
|
||||
|
||||
fn create_resolver() -> std::io::Result<TestCase> {
|
||||
let temp_dir = tempfile::tempdir()?;
|
||||
|
||||
let src = temp_dir.path().join("src");
|
||||
let site_packages = temp_dir.path().join("site_packages");
|
||||
|
||||
std::fs::create_dir(&src)?;
|
||||
std::fs::create_dir(&site_packages)?;
|
||||
|
||||
let src = ModuleSearchPath::new(src.canonicalize()?, ModuleSearchPathKind::FirstParty);
|
||||
let site_packages = ModuleSearchPath::new(
|
||||
site_packages.canonicalize()?,
|
||||
ModuleSearchPathKind::ThirdParty,
|
||||
);
|
||||
|
||||
let roots = vec![src.clone(), site_packages.clone()];
|
||||
|
||||
let mut db = TestDb::default();
|
||||
db.set_module_search_paths(roots);
|
||||
|
||||
Ok(TestCase {
|
||||
temp_dir,
|
||||
db,
|
||||
src,
|
||||
site_packages,
|
||||
})
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn first_party_module() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
src,
|
||||
temp_dir: _temp_dir,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo_path = src.path().join("foo.py");
|
||||
std::fs::write(&foo_path, "print('Hello, world!')")?;
|
||||
|
||||
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
|
||||
assert_eq!(Some(foo_module), db.resolve_module(ModuleName::new("foo")));
|
||||
|
||||
assert_eq!(ModuleName::new("foo"), foo_module.name(&db));
|
||||
assert_eq!(&src, foo_module.path(&db).root());
|
||||
assert_eq!(&foo_path, &*db.file_path(foo_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo_module), db.path_to_module(&foo_path));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn resolve_package() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
src,
|
||||
mut db,
|
||||
temp_dir: _temp_dir,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo_dir = src.path().join("foo");
|
||||
let foo_path = foo_dir.join("__init__.py");
|
||||
std::fs::create_dir(&foo_dir)?;
|
||||
std::fs::write(&foo_path, "print('Hello, world!')")?;
|
||||
|
||||
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
|
||||
assert_eq!(ModuleName::new("foo"), foo_module.name(&db));
|
||||
assert_eq!(&src, foo_module.path(&db).root());
|
||||
assert_eq!(&foo_path, &*db.file_path(foo_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo_module), db.path_to_module(&foo_path));
|
||||
|
||||
// Resolving by directory doesn't resolve to the init file.
|
||||
assert_eq!(None, db.path_to_module(&foo_dir));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn package_priority_over_module() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
temp_dir: _temp_dir,
|
||||
src,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo_dir = src.path().join("foo");
|
||||
let foo_init = foo_dir.join("__init__.py");
|
||||
std::fs::create_dir(&foo_dir)?;
|
||||
std::fs::write(&foo_init, "print('Hello, world!')")?;
|
||||
|
||||
let foo_py = src.path().join("foo.py");
|
||||
std::fs::write(&foo_py, "print('Hello, world!')")?;
|
||||
|
||||
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
|
||||
assert_eq!(&src, foo_module.path(&db).root());
|
||||
assert_eq!(&foo_init, &*db.file_path(foo_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo_module), db.path_to_module(&foo_init));
|
||||
assert_eq!(None, db.path_to_module(&foo_py));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn typing_stub_over_module() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
src,
|
||||
temp_dir: _temp_dir,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo_stub = src.path().join("foo.pyi");
|
||||
let foo_py = src.path().join("foo.py");
|
||||
std::fs::write(&foo_stub, "x: int")?;
|
||||
std::fs::write(&foo_py, "print('Hello, world!')")?;
|
||||
|
||||
let foo = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
|
||||
assert_eq!(&src, foo.path(&db).root());
|
||||
assert_eq!(&foo_stub, &*db.file_path(foo.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo), db.path_to_module(&foo_stub));
|
||||
assert_eq!(None, db.path_to_module(&foo_py));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn sub_packages() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
src,
|
||||
temp_dir: _temp_dir,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo = src.path().join("foo");
|
||||
let bar = foo.join("bar");
|
||||
let baz = bar.join("baz.py");
|
||||
|
||||
std::fs::create_dir_all(&bar)?;
|
||||
std::fs::write(foo.join("__init__.py"), "")?;
|
||||
std::fs::write(bar.join("__init__.py"), "")?;
|
||||
std::fs::write(&baz, "print('Hello, world!')")?;
|
||||
|
||||
let baz_module = db.resolve_module(ModuleName::new("foo.bar.baz")).unwrap();
|
||||
|
||||
assert_eq!(&src, baz_module.path(&db).root());
|
||||
assert_eq!(&baz, &*db.file_path(baz_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(baz_module), db.path_to_module(&baz));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn namespace_package() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
temp_dir: _,
|
||||
src,
|
||||
site_packages,
|
||||
} = create_resolver()?;
|
||||
|
||||
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
|
||||
// But uses `src` for `project1` and `site_packages2` for `project2`.
|
||||
// ```
|
||||
// src
|
||||
// parent
|
||||
// child
|
||||
// one.py
|
||||
// site_packages
|
||||
// parent
|
||||
// child
|
||||
// two.py
|
||||
// ```
|
||||
|
||||
let parent1 = src.path().join("parent");
|
||||
let child1 = parent1.join("child");
|
||||
let one = child1.join("one.py");
|
||||
|
||||
std::fs::create_dir_all(child1)?;
|
||||
std::fs::write(&one, "print('Hello, world!')")?;
|
||||
|
||||
let parent2 = site_packages.path().join("parent");
|
||||
let child2 = parent2.join("child");
|
||||
let two = child2.join("two.py");
|
||||
|
||||
std::fs::create_dir_all(&child2)?;
|
||||
std::fs::write(&two, "print('Hello, world!')")?;
|
||||
|
||||
let one_module = db
|
||||
.resolve_module(ModuleName::new("parent.child.one"))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(Some(one_module), db.path_to_module(&one));
|
||||
|
||||
let two_module = db
|
||||
.resolve_module(ModuleName::new("parent.child.two"))
|
||||
.unwrap();
|
||||
assert_eq!(Some(two_module), db.path_to_module(&two));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn regular_package_in_namespace_package() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
temp_dir: _,
|
||||
src,
|
||||
site_packages,
|
||||
} = create_resolver()?;
|
||||
|
||||
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
|
||||
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
|
||||
// ```
|
||||
// src
|
||||
// parent
|
||||
// child
|
||||
// one.py
|
||||
// site_packages
|
||||
// parent
|
||||
// child
|
||||
// two.py
|
||||
// ```
|
||||
|
||||
let parent1 = src.path().join("parent");
|
||||
let child1 = parent1.join("child");
|
||||
let one = child1.join("one.py");
|
||||
|
||||
std::fs::create_dir_all(&child1)?;
|
||||
std::fs::write(child1.join("__init__.py"), "print('Hello, world!')")?;
|
||||
std::fs::write(&one, "print('Hello, world!')")?;
|
||||
|
||||
let parent2 = site_packages.path().join("parent");
|
||||
let child2 = parent2.join("child");
|
||||
let two = child2.join("two.py");
|
||||
|
||||
std::fs::create_dir_all(&child2)?;
|
||||
std::fs::write(two, "print('Hello, world!')")?;
|
||||
|
||||
let one_module = db
|
||||
.resolve_module(ModuleName::new("parent.child.one"))
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(Some(one_module), db.path_to_module(&one));
|
||||
|
||||
assert_eq!(None, db.resolve_module(ModuleName::new("parent.child.two")));
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn module_search_path_priority() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
src,
|
||||
site_packages,
|
||||
temp_dir: _temp_dir,
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo_src = src.path().join("foo.py");
|
||||
let foo_site_packages = site_packages.path().join("foo.py");
|
||||
|
||||
std::fs::write(&foo_src, "")?;
|
||||
std::fs::write(&foo_site_packages, "")?;
|
||||
|
||||
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
|
||||
assert_eq!(&src, foo_module.path(&db).root());
|
||||
assert_eq!(&foo_src, &*db.file_path(foo_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo_module), db.path_to_module(&foo_src));
|
||||
assert_eq!(None, db.path_to_module(&foo_site_packages));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_family = "unix")]
|
||||
fn symlink() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
mut db,
|
||||
src,
|
||||
temp_dir: _temp_dir,
|
||||
..
|
||||
} = create_resolver()?;
|
||||
|
||||
let foo = src.path().join("foo.py");
|
||||
let bar = src.path().join("bar.py");
|
||||
|
||||
std::fs::write(&foo, "")?;
|
||||
std::os::unix::fs::symlink(&foo, &bar)?;
|
||||
|
||||
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
|
||||
let bar_module = db.resolve_module(ModuleName::new("bar")).unwrap();
|
||||
|
||||
assert_ne!(foo_module, bar_module);
|
||||
|
||||
assert_eq!(&src, foo_module.path(&db).root());
|
||||
assert_eq!(&foo, &*db.file_path(foo_module.path(&db).file()));
|
||||
|
||||
// Bar has a different name but it should point to the same file.
|
||||
|
||||
assert_eq!(&src, bar_module.path(&db).root());
|
||||
assert_eq!(foo_module.path(&db).file(), bar_module.path(&db).file());
|
||||
assert_eq!(&foo, &*db.file_path(bar_module.path(&db).file()));
|
||||
|
||||
assert_eq!(Some(foo_module), db.path_to_module(&foo));
|
||||
assert_eq!(Some(bar_module), db.path_to_module(&bar));
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
95
crates/red_knot/src/parse.rs
Normal file
95
crates/red_knot/src/parse.rs
Normal file
|
@ -0,0 +1,95 @@
|
|||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use ruff_python_ast as ast;
|
||||
use ruff_python_parser::{Mode, ParseError};
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::cache::KeyValueCache;
|
||||
use crate::db::{HasJar, SourceDb, SourceJar};
|
||||
use crate::files::FileId;
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Parsed {
|
||||
inner: Arc<ParsedInner>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
struct ParsedInner {
|
||||
ast: ast::ModModule,
|
||||
errors: Vec<ParseError>,
|
||||
}
|
||||
|
||||
impl Parsed {
|
||||
fn new(ast: ast::ModModule, errors: Vec<ParseError>) -> Self {
|
||||
Self {
|
||||
inner: Arc::new(ParsedInner { ast, errors }),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn from_text(text: &str) -> Self {
|
||||
let result = ruff_python_parser::parse(text, Mode::Module);
|
||||
|
||||
let (module, errors) = match result {
|
||||
Ok(ast::Mod::Module(module)) => (module, vec![]),
|
||||
Ok(ast::Mod::Expression(expression)) => (
|
||||
ast::ModModule {
|
||||
range: expression.range(),
|
||||
body: vec![ast::Stmt::Expr(ast::StmtExpr {
|
||||
range: expression.range(),
|
||||
value: expression.body,
|
||||
})],
|
||||
},
|
||||
vec![],
|
||||
),
|
||||
Err(errors) => (
|
||||
ast::ModModule {
|
||||
range: TextRange::default(),
|
||||
body: Vec::new(),
|
||||
},
|
||||
vec![errors],
|
||||
),
|
||||
};
|
||||
|
||||
Parsed::new(module, errors)
|
||||
}
|
||||
|
||||
pub fn ast(&self) -> &ast::ModModule {
|
||||
&self.inner.ast
|
||||
}
|
||||
|
||||
pub fn errors(&self) -> &[ParseError] {
|
||||
&self.inner.errors
|
||||
}
|
||||
}
|
||||
|
||||
#[tracing::instrument(level = "trace", skip(db))]
|
||||
pub(crate) fn parse<Db>(db: &Db, file_id: FileId) -> Parsed
|
||||
where
|
||||
Db: SourceDb + HasJar<SourceJar>,
|
||||
{
|
||||
let parsed = db.jar();
|
||||
|
||||
parsed.parsed.get(&file_id, |file_id| {
|
||||
let source = db.source(*file_id);
|
||||
|
||||
Parsed::from_text(source.text())
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct ParsedStorage(KeyValueCache<FileId, Parsed>);
|
||||
|
||||
impl Deref for ParsedStorage {
|
||||
type Target = KeyValueCache<FileId, Parsed>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for ParsedStorage {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
154
crates/red_knot/src/program/mod.rs
Normal file
154
crates/red_knot/src/program/mod.rs
Normal file
|
@ -0,0 +1,154 @@
|
|||
use std::path::Path;
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::db::{Db, HasJar, SemanticDb, SemanticJar, SourceDb, SourceJar};
|
||||
use crate::files::{FileId, Files};
|
||||
use crate::lint::{lint_syntax, Diagnostics, LintSyntaxStorage};
|
||||
use crate::module::{
|
||||
add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData,
|
||||
ModuleName, ModuleResolver, ModuleSearchPath,
|
||||
};
|
||||
use crate::parse::{parse, Parsed, ParsedStorage};
|
||||
use crate::source::{source_text, Source, SourceStorage};
|
||||
use crate::symbols::{symbol_table, SymbolId, SymbolTable, SymbolTablesStorage};
|
||||
use crate::types::{infer_symbol_type, Type, TypeStore};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Program {
|
||||
files: Files,
|
||||
source: SourceJar,
|
||||
semantic: SemanticJar,
|
||||
}
|
||||
|
||||
impl Program {
|
||||
pub fn new(module_search_paths: Vec<ModuleSearchPath>, files: Files) -> Self {
|
||||
Self {
|
||||
source: SourceJar {
|
||||
sources: SourceStorage::default(),
|
||||
parsed: ParsedStorage::default(),
|
||||
lint_syntax: LintSyntaxStorage::default(),
|
||||
},
|
||||
semantic: SemanticJar {
|
||||
module_resolver: ModuleResolver::new(module_search_paths),
|
||||
symbol_tables: SymbolTablesStorage::default(),
|
||||
type_store: TypeStore::default(),
|
||||
},
|
||||
files,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn apply_changes<I>(&mut self, changes: I)
|
||||
where
|
||||
I: IntoIterator<Item = FileChange>,
|
||||
{
|
||||
for change in changes {
|
||||
self.semantic
|
||||
.module_resolver
|
||||
.remove_module(&self.file_path(change.id));
|
||||
self.semantic.symbol_tables.remove(&change.id);
|
||||
self.source.sources.remove(&change.id);
|
||||
self.source.parsed.remove(&change.id);
|
||||
self.source.lint_syntax.remove(&change.id);
|
||||
// TODO: remove all dependent modules as well
|
||||
self.semantic.type_store.remove_module(change.id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl SourceDb for Program {
|
||||
fn file_id(&self, path: &Path) -> FileId {
|
||||
self.files.intern(path)
|
||||
}
|
||||
|
||||
fn file_path(&self, file_id: FileId) -> Arc<Path> {
|
||||
self.files.path(file_id)
|
||||
}
|
||||
|
||||
fn source(&self, file_id: FileId) -> Source {
|
||||
source_text(self, file_id)
|
||||
}
|
||||
|
||||
fn parse(&self, file_id: FileId) -> Parsed {
|
||||
parse(self, file_id)
|
||||
}
|
||||
|
||||
fn lint_syntax(&self, file_id: FileId) -> Diagnostics {
|
||||
lint_syntax(self, file_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl SemanticDb for Program {
|
||||
fn resolve_module(&self, name: ModuleName) -> Option<Module> {
|
||||
resolve_module(self, name)
|
||||
}
|
||||
|
||||
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable> {
|
||||
symbol_table(self, file_id)
|
||||
}
|
||||
|
||||
// Mutations
|
||||
fn path_to_module(&mut self, path: &Path) -> Option<Module> {
|
||||
path_to_module(self, path)
|
||||
}
|
||||
|
||||
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
|
||||
add_module(self, path)
|
||||
}
|
||||
|
||||
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>) {
|
||||
set_module_search_paths(self, paths);
|
||||
}
|
||||
|
||||
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type {
|
||||
infer_symbol_type(self, file_id, symbol_id)
|
||||
}
|
||||
}
|
||||
|
||||
impl Db for Program {}
|
||||
|
||||
impl HasJar<SourceJar> for Program {
|
||||
fn jar(&self) -> &SourceJar {
|
||||
&self.source
|
||||
}
|
||||
|
||||
fn jar_mut(&mut self) -> &mut SourceJar {
|
||||
&mut self.source
|
||||
}
|
||||
}
|
||||
|
||||
impl HasJar<SemanticJar> for Program {
|
||||
fn jar(&self) -> &SemanticJar {
|
||||
&self.semantic
|
||||
}
|
||||
|
||||
fn jar_mut(&mut self) -> &mut SemanticJar {
|
||||
&mut self.semantic
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
pub struct FileChange {
|
||||
id: FileId,
|
||||
kind: FileChangeKind,
|
||||
}
|
||||
|
||||
impl FileChange {
|
||||
pub fn new(file_id: FileId, kind: FileChangeKind) -> Self {
|
||||
Self { id: file_id, kind }
|
||||
}
|
||||
|
||||
pub fn file_id(&self) -> FileId {
|
||||
self.id
|
||||
}
|
||||
|
||||
pub fn kind(&self) -> FileChangeKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
|
||||
pub enum FileChangeKind {
|
||||
Created,
|
||||
Modified,
|
||||
Deleted,
|
||||
}
|
98
crates/red_knot/src/source.rs
Normal file
98
crates/red_knot/src/source.rs
Normal file
|
@ -0,0 +1,98 @@
|
|||
use crate::cache::KeyValueCache;
|
||||
use crate::db::{HasJar, SourceDb, SourceJar};
|
||||
use ruff_notebook::Notebook;
|
||||
use ruff_python_ast::PySourceType;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use crate::files::FileId;
|
||||
|
||||
#[tracing::instrument(level = "trace", skip(db))]
|
||||
pub(crate) fn source_text<Db>(db: &Db, file_id: FileId) -> Source
|
||||
where
|
||||
Db: SourceDb + HasJar<SourceJar>,
|
||||
{
|
||||
let sources = &db.jar().sources;
|
||||
|
||||
sources.get(&file_id, |file_id| {
|
||||
tracing::trace!("Reading source text for file_id={:?}.", file_id);
|
||||
|
||||
let path = db.file_path(*file_id);
|
||||
|
||||
let source_text = std::fs::read_to_string(&path).unwrap_or_else(|err| {
|
||||
tracing::error!("Failed to read file '{path:?}: {err}'. Falling back to empty text");
|
||||
String::new()
|
||||
});
|
||||
|
||||
let python_ty = PySourceType::from(&path);
|
||||
|
||||
let kind = match python_ty {
|
||||
PySourceType::Python => {
|
||||
SourceKind::Python(Arc::from(source_text))
|
||||
}
|
||||
PySourceType::Stub => SourceKind::Stub(Arc::from(source_text)),
|
||||
PySourceType::Ipynb => {
|
||||
let notebook = Notebook::from_source_code(&source_text).unwrap_or_else(|err| {
|
||||
// TODO should this be changed to never fail?
|
||||
// or should we instead add a diagnostic somewhere? But what would we return in this case?
|
||||
tracing::error!(
|
||||
"Failed to parse notebook '{path:?}: {err}'. Falling back to an empty notebook"
|
||||
);
|
||||
Notebook::from_source_code("").unwrap()
|
||||
});
|
||||
|
||||
SourceKind::IpyNotebook(Arc::new(notebook))
|
||||
}
|
||||
};
|
||||
|
||||
Source { kind }
|
||||
})
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum SourceKind {
|
||||
Python(Arc<str>),
|
||||
Stub(Arc<str>),
|
||||
IpyNotebook(Arc<Notebook>),
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub struct Source {
|
||||
kind: SourceKind,
|
||||
}
|
||||
|
||||
impl Source {
|
||||
pub fn python<T: Into<Arc<str>>>(source: T) -> Self {
|
||||
Self {
|
||||
kind: SourceKind::Python(source.into()),
|
||||
}
|
||||
}
|
||||
pub fn kind(&self) -> &SourceKind {
|
||||
&self.kind
|
||||
}
|
||||
|
||||
pub fn text(&self) -> &str {
|
||||
match &self.kind {
|
||||
SourceKind::Python(text) => text,
|
||||
SourceKind::Stub(text) => text,
|
||||
SourceKind::IpyNotebook(notebook) => notebook.source_code(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SourceStorage(pub(crate) KeyValueCache<FileId, Source>);
|
||||
|
||||
impl Deref for SourceStorage {
|
||||
type Target = KeyValueCache<FileId, Source>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for SourceStorage {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
765
crates/red_knot/src/symbols.rs
Normal file
765
crates/red_knot/src/symbols.rs
Normal file
|
@ -0,0 +1,765 @@
|
|||
#![allow(dead_code)]
|
||||
|
||||
use std::hash::{Hash, Hasher};
|
||||
use std::iter::{Copied, DoubleEndedIterator, FusedIterator};
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::sync::Arc;
|
||||
|
||||
use hashbrown::hash_map::{Keys, RawEntryMut};
|
||||
use rustc_hash::{FxHashMap, FxHasher};
|
||||
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
use ruff_python_ast as ast;
|
||||
use ruff_python_ast::visitor::preorder::PreorderVisitor;
|
||||
|
||||
use crate::ast_ids::TypedNodeKey;
|
||||
use crate::cache::KeyValueCache;
|
||||
use crate::db::{HasJar, SemanticDb, SemanticJar};
|
||||
use crate::files::FileId;
|
||||
use crate::Name;
|
||||
|
||||
#[allow(unreachable_pub)]
|
||||
#[tracing::instrument(level = "trace", skip(db))]
|
||||
pub fn symbol_table<Db>(db: &Db, file_id: FileId) -> Arc<SymbolTable>
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let jar = db.jar();
|
||||
|
||||
jar.symbol_tables.get(&file_id, |_| {
|
||||
let parsed = db.parse(file_id);
|
||||
Arc::from(SymbolTable::from_ast(parsed.ast()))
|
||||
})
|
||||
}
|
||||
|
||||
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
|
||||
|
||||
#[newtype_index]
|
||||
pub(crate) struct ScopeId;
|
||||
|
||||
impl ScopeId {
|
||||
pub(crate) fn scope(self, table: &SymbolTable) -> &Scope {
|
||||
&table.scopes_by_id[self]
|
||||
}
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
pub struct SymbolId;
|
||||
|
||||
impl SymbolId {
|
||||
pub(crate) fn symbol(self, table: &SymbolTable) -> &Symbol {
|
||||
&table.symbols_by_id[self]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, PartialEq)]
|
||||
pub(crate) enum ScopeKind {
|
||||
Module,
|
||||
Annotation,
|
||||
Class,
|
||||
Function,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Scope {
|
||||
name: Name,
|
||||
kind: ScopeKind,
|
||||
child_scopes: Vec<ScopeId>,
|
||||
// symbol IDs, hashed by symbol name
|
||||
symbols_by_name: Map<SymbolId, ()>,
|
||||
}
|
||||
|
||||
impl Scope {
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
|
||||
pub(crate) fn kind(&self) -> ScopeKind {
|
||||
self.kind
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct Symbol {
|
||||
name: Name,
|
||||
}
|
||||
|
||||
impl Symbol {
|
||||
pub(crate) fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
// TODO storing TypedNodeKey for definitions means we have to search to find them again in the AST;
|
||||
// this is at best O(log n). If looking up definitions is a bottleneck we should look for
|
||||
// alternatives here.
|
||||
#[derive(Debug)]
|
||||
pub(crate) enum Definition {
|
||||
// For the import cases, we don't need reference to any arbitrary AST subtrees (annotations,
|
||||
// RHS), and referencing just the import statement node is imprecise (a single import statement
|
||||
// can assign many symbols, we'd have to re-search for the one we care about), so we just copy
|
||||
// the small amount of information we need from the AST.
|
||||
Import(ImportDefinition),
|
||||
ImportFrom(ImportFromDefinition),
|
||||
ClassDef(TypedNodeKey<ast::StmtClassDef>),
|
||||
FunctionDef(TypedNodeKey<ast::StmtFunctionDef>),
|
||||
Assignment(TypedNodeKey<ast::StmtAssign>),
|
||||
AnnotatedAssignment(TypedNodeKey<ast::StmtAnnAssign>),
|
||||
// TODO with statements, except handlers, function args...
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ImportDefinition {
|
||||
pub(crate) module: String,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ImportFromDefinition {
|
||||
pub(crate) module: Option<String>,
|
||||
pub(crate) name: String,
|
||||
pub(crate) level: u32,
|
||||
}
|
||||
|
||||
/// Table of all symbols in all scopes for a module.
|
||||
#[derive(Debug)]
|
||||
pub struct SymbolTable {
|
||||
scopes_by_id: IndexVec<ScopeId, Scope>,
|
||||
symbols_by_id: IndexVec<SymbolId, Symbol>,
|
||||
defs: FxHashMap<SymbolId, Vec<Definition>>,
|
||||
}
|
||||
|
||||
impl SymbolTable {
|
||||
pub(crate) fn from_ast(module: &ast::ModModule) -> Self {
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let mut builder = SymbolTableBuilder {
|
||||
table: SymbolTable::new(),
|
||||
scopes: vec![root_scope_id],
|
||||
};
|
||||
builder.visit_body(&module.body);
|
||||
builder.table
|
||||
}
|
||||
|
||||
pub(crate) fn new() -> Self {
|
||||
let mut table = SymbolTable {
|
||||
scopes_by_id: IndexVec::new(),
|
||||
symbols_by_id: IndexVec::new(),
|
||||
defs: FxHashMap::default(),
|
||||
};
|
||||
table.scopes_by_id.push(Scope {
|
||||
name: Name::new("<module>"),
|
||||
kind: ScopeKind::Module,
|
||||
child_scopes: Vec::new(),
|
||||
symbols_by_name: Map::default(),
|
||||
});
|
||||
table
|
||||
}
|
||||
|
||||
pub(crate) const fn root_scope_id() -> ScopeId {
|
||||
ScopeId::from_usize(0)
|
||||
}
|
||||
|
||||
pub(crate) fn root_scope(&self) -> &Scope {
|
||||
&self.scopes_by_id[SymbolTable::root_scope_id()]
|
||||
}
|
||||
|
||||
pub(crate) fn symbol_ids_for_scope(&self, scope_id: ScopeId) -> Copied<Keys<SymbolId, ()>> {
|
||||
self.scopes_by_id[scope_id].symbols_by_name.keys().copied()
|
||||
}
|
||||
|
||||
pub(crate) fn symbols_for_scope(
|
||||
&self,
|
||||
scope_id: ScopeId,
|
||||
) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
|
||||
SymbolIterator {
|
||||
table: self,
|
||||
ids: self.symbol_ids_for_scope(scope_id),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn root_symbol_ids(&self) -> Copied<Keys<SymbolId, ()>> {
|
||||
self.symbol_ids_for_scope(SymbolTable::root_scope_id())
|
||||
}
|
||||
|
||||
pub(crate) fn root_symbols(&self) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
|
||||
self.symbols_for_scope(SymbolTable::root_scope_id())
|
||||
}
|
||||
|
||||
pub(crate) fn child_scope_ids_of(&self, scope_id: ScopeId) -> &[ScopeId] {
|
||||
&self.scopes_by_id[scope_id].child_scopes
|
||||
}
|
||||
|
||||
pub(crate) fn child_scopes_of(&self, scope_id: ScopeId) -> ScopeIterator<&[ScopeId]> {
|
||||
ScopeIterator {
|
||||
table: self,
|
||||
ids: self.child_scope_ids_of(scope_id),
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn root_child_scope_ids(&self) -> &[ScopeId] {
|
||||
self.child_scope_ids_of(SymbolTable::root_scope_id())
|
||||
}
|
||||
|
||||
pub(crate) fn root_child_scopes(&self) -> ScopeIterator<&[ScopeId]> {
|
||||
self.child_scopes_of(SymbolTable::root_scope_id())
|
||||
}
|
||||
|
||||
pub(crate) fn symbol_id_by_name(&self, scope_id: ScopeId, name: &str) -> Option<SymbolId> {
|
||||
let scope = &self.scopes_by_id[scope_id];
|
||||
let hash = SymbolTable::hash_name(name);
|
||||
let name = Name::new(name);
|
||||
scope
|
||||
.symbols_by_name
|
||||
.raw_entry()
|
||||
.from_hash(hash, |symid| self.symbols_by_id[*symid].name == name)
|
||||
.map(|(symbol_id, ())| *symbol_id)
|
||||
}
|
||||
|
||||
pub(crate) fn symbol_by_name(&self, scope_id: ScopeId, name: &str) -> Option<&Symbol> {
|
||||
Some(&self.symbols_by_id[self.symbol_id_by_name(scope_id, name)?])
|
||||
}
|
||||
|
||||
pub(crate) fn root_symbol_id_by_name(&self, name: &str) -> Option<SymbolId> {
|
||||
self.symbol_id_by_name(SymbolTable::root_scope_id(), name)
|
||||
}
|
||||
|
||||
pub(crate) fn root_symbol_by_name(&self, name: &str) -> Option<&Symbol> {
|
||||
self.symbol_by_name(SymbolTable::root_scope_id(), name)
|
||||
}
|
||||
|
||||
pub(crate) fn defs(&self, symbol_id: SymbolId) -> &[Definition] {
|
||||
self.defs
|
||||
.get(&symbol_id)
|
||||
.map(std::vec::Vec::as_slice)
|
||||
.unwrap_or_default()
|
||||
}
|
||||
|
||||
fn add_symbol_to_scope(&mut self, scope_id: ScopeId, name: &str) -> SymbolId {
|
||||
let hash = SymbolTable::hash_name(name);
|
||||
let scope = &mut self.scopes_by_id[scope_id];
|
||||
let name = Name::new(name);
|
||||
|
||||
let entry = scope
|
||||
.symbols_by_name
|
||||
.raw_entry_mut()
|
||||
.from_hash(hash, |existing| self.symbols_by_id[*existing].name == name);
|
||||
|
||||
match entry {
|
||||
RawEntryMut::Occupied(entry) => *entry.key(),
|
||||
RawEntryMut::Vacant(entry) => {
|
||||
let id = self.symbols_by_id.push(Symbol { name });
|
||||
entry.insert_with_hasher(hash, id, (), |_| hash);
|
||||
id
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn add_child_scope(
|
||||
&mut self,
|
||||
parent_scope_id: ScopeId,
|
||||
name: &str,
|
||||
kind: ScopeKind,
|
||||
) -> ScopeId {
|
||||
let new_scope_id = self.scopes_by_id.push(Scope {
|
||||
name: Name::new(name),
|
||||
kind,
|
||||
child_scopes: Vec::new(),
|
||||
symbols_by_name: Map::default(),
|
||||
});
|
||||
let parent_scope = &mut self.scopes_by_id[parent_scope_id];
|
||||
parent_scope.child_scopes.push(new_scope_id);
|
||||
new_scope_id
|
||||
}
|
||||
|
||||
fn hash_name(name: &str) -> u64 {
|
||||
let mut hasher = FxHasher::default();
|
||||
name.hash(&mut hasher);
|
||||
hasher.finish()
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct SymbolIterator<'a, I> {
|
||||
table: &'a SymbolTable,
|
||||
ids: I,
|
||||
}
|
||||
|
||||
impl<'a, I> Iterator for SymbolIterator<'a, I>
|
||||
where
|
||||
I: Iterator<Item = SymbolId>,
|
||||
{
|
||||
type Item = &'a Symbol;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let id = self.ids.next()?;
|
||||
Some(&self.table.symbols_by_id[id])
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.ids.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I> FusedIterator for SymbolIterator<'a, I> where
|
||||
I: Iterator<Item = SymbolId> + FusedIterator
|
||||
{
|
||||
}
|
||||
|
||||
impl<'a, I> DoubleEndedIterator for SymbolIterator<'a, I>
|
||||
where
|
||||
I: Iterator<Item = SymbolId> + DoubleEndedIterator,
|
||||
{
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
let id = self.ids.next_back()?;
|
||||
Some(&self.table.symbols_by_id[id])
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) struct ScopeIterator<'a, I> {
|
||||
table: &'a SymbolTable,
|
||||
ids: I,
|
||||
}
|
||||
|
||||
impl<'a, I> Iterator for ScopeIterator<'a, I>
|
||||
where
|
||||
I: Iterator<Item = ScopeId>,
|
||||
{
|
||||
type Item = &'a Scope;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let id = self.ids.next()?;
|
||||
Some(&self.table.scopes_by_id[id])
|
||||
}
|
||||
|
||||
fn size_hint(&self) -> (usize, Option<usize>) {
|
||||
self.ids.size_hint()
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, I> FusedIterator for ScopeIterator<'a, I> where I: Iterator<Item = ScopeId> + FusedIterator {}
|
||||
|
||||
impl<'a, I> DoubleEndedIterator for ScopeIterator<'a, I>
|
||||
where
|
||||
I: Iterator<Item = ScopeId> + DoubleEndedIterator,
|
||||
{
|
||||
fn next_back(&mut self) -> Option<Self::Item> {
|
||||
let id = self.ids.next_back()?;
|
||||
Some(&self.table.scopes_by_id[id])
|
||||
}
|
||||
}
|
||||
|
||||
struct SymbolTableBuilder {
|
||||
table: SymbolTable,
|
||||
scopes: Vec<ScopeId>,
|
||||
}
|
||||
|
||||
impl SymbolTableBuilder {
|
||||
fn add_symbol(&mut self, identifier: &str) -> SymbolId {
|
||||
self.table.add_symbol_to_scope(self.cur_scope(), identifier)
|
||||
}
|
||||
|
||||
fn add_symbol_with_def(&mut self, identifier: &str, definition: Definition) -> SymbolId {
|
||||
let symbol_id = self.add_symbol(identifier);
|
||||
self.table
|
||||
.defs
|
||||
.entry(symbol_id)
|
||||
.or_default()
|
||||
.push(definition);
|
||||
symbol_id
|
||||
}
|
||||
|
||||
fn push_scope(&mut self, child_of: ScopeId, name: &str, kind: ScopeKind) -> ScopeId {
|
||||
let scope_id = self.table.add_child_scope(child_of, name, kind);
|
||||
self.scopes.push(scope_id);
|
||||
scope_id
|
||||
}
|
||||
|
||||
fn pop_scope(&mut self) -> ScopeId {
|
||||
self.scopes
|
||||
.pop()
|
||||
.expect("Scope stack should never be empty")
|
||||
}
|
||||
|
||||
fn cur_scope(&self) -> ScopeId {
|
||||
*self
|
||||
.scopes
|
||||
.last()
|
||||
.expect("Scope stack should never be empty")
|
||||
}
|
||||
|
||||
fn with_type_params(
|
||||
&mut self,
|
||||
name: &str,
|
||||
params: &Option<Box<ast::TypeParams>>,
|
||||
nested: impl FnOnce(&mut Self),
|
||||
) {
|
||||
if let Some(type_params) = params {
|
||||
self.push_scope(self.cur_scope(), name, ScopeKind::Annotation);
|
||||
for type_param in &type_params.type_params {
|
||||
let name = match type_param {
|
||||
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
|
||||
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
|
||||
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
|
||||
};
|
||||
self.add_symbol(name);
|
||||
}
|
||||
}
|
||||
nested(self);
|
||||
if params.is_some() {
|
||||
self.pop_scope();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl PreorderVisitor<'_> for SymbolTableBuilder {
|
||||
fn visit_expr(&mut self, expr: &ast::Expr) {
|
||||
if let ast::Expr::Name(ast::ExprName { id, .. }) = expr {
|
||||
self.add_symbol(id);
|
||||
}
|
||||
ast::visitor::preorder::walk_expr(self, expr);
|
||||
}
|
||||
|
||||
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
|
||||
// TODO need to capture more definition statements here
|
||||
match stmt {
|
||||
ast::Stmt::ClassDef(node) => {
|
||||
let def = Definition::ClassDef(TypedNodeKey::from_node(node));
|
||||
self.add_symbol_with_def(&node.name, def);
|
||||
self.with_type_params(&node.name, &node.type_params, |builder| {
|
||||
builder.push_scope(builder.cur_scope(), &node.name, ScopeKind::Class);
|
||||
ast::visitor::preorder::walk_stmt(builder, stmt);
|
||||
builder.pop_scope();
|
||||
});
|
||||
}
|
||||
ast::Stmt::FunctionDef(node) => {
|
||||
let def = Definition::FunctionDef(TypedNodeKey::from_node(node));
|
||||
self.add_symbol_with_def(&node.name, def);
|
||||
self.with_type_params(&node.name, &node.type_params, |builder| {
|
||||
builder.push_scope(builder.cur_scope(), &node.name, ScopeKind::Function);
|
||||
ast::visitor::preorder::walk_stmt(builder, stmt);
|
||||
builder.pop_scope();
|
||||
});
|
||||
}
|
||||
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
|
||||
for alias in names {
|
||||
let symbol_name = if let Some(asname) = &alias.asname {
|
||||
asname.id.as_str()
|
||||
} else {
|
||||
alias.name.id.split('.').next().unwrap()
|
||||
};
|
||||
let def = Definition::Import(ImportDefinition {
|
||||
module: alias.name.id.clone(),
|
||||
});
|
||||
self.add_symbol_with_def(symbol_name, def);
|
||||
}
|
||||
}
|
||||
ast::Stmt::ImportFrom(ast::StmtImportFrom {
|
||||
module,
|
||||
names,
|
||||
level,
|
||||
..
|
||||
}) => {
|
||||
for alias in names {
|
||||
let symbol_name = if let Some(asname) = &alias.asname {
|
||||
asname.id.as_str()
|
||||
} else {
|
||||
alias.name.id.as_str()
|
||||
};
|
||||
let def = Definition::ImportFrom(ImportFromDefinition {
|
||||
module: module.as_ref().map(|m| m.id.clone()),
|
||||
name: alias.name.id.clone(),
|
||||
level: *level,
|
||||
});
|
||||
self.add_symbol_with_def(symbol_name, def);
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
ast::visitor::preorder::walk_stmt(self, stmt);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct SymbolTablesStorage(KeyValueCache<FileId, Arc<SymbolTable>>);
|
||||
|
||||
impl Deref for SymbolTablesStorage {
|
||||
type Target = KeyValueCache<FileId, Arc<SymbolTable>>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for SymbolTablesStorage {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use textwrap::dedent;
|
||||
|
||||
use crate::parse::Parsed;
|
||||
use crate::symbols::ScopeKind;
|
||||
|
||||
use super::{SymbolId, SymbolIterator, SymbolTable};
|
||||
|
||||
mod from_ast {
|
||||
use super::*;
|
||||
|
||||
fn parse(code: &str) -> Parsed {
|
||||
Parsed::from_text(&dedent(code))
|
||||
}
|
||||
|
||||
fn names<I>(it: SymbolIterator<I>) -> Vec<&str>
|
||||
where
|
||||
I: Iterator<Item = SymbolId>,
|
||||
{
|
||||
let mut symbols: Vec<_> = it.map(|sym| sym.name.as_str()).collect();
|
||||
symbols.sort_unstable();
|
||||
symbols
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty() {
|
||||
let parsed = parse("");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()).len(), 0);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn simple() {
|
||||
let parsed = parse("x");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["x"]);
|
||||
assert_eq!(
|
||||
table.defs(table.root_symbol_id_by_name("x").unwrap()).len(),
|
||||
0
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn annotation_only() {
|
||||
let parsed = parse("x: int");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["int", "x"]);
|
||||
// TODO record definition
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import() {
|
||||
let parsed = parse("import foo");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["foo"]);
|
||||
assert_eq!(
|
||||
table
|
||||
.defs(table.root_symbol_id_by_name("foo").unwrap())
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_sub() {
|
||||
let parsed = parse("import foo.bar");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["foo"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_as() {
|
||||
let parsed = parse("import foo.bar as baz");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["baz"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn import_from() {
|
||||
let parsed = parse("from bar import foo");
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["foo"]);
|
||||
assert_eq!(
|
||||
table
|
||||
.defs(table.root_symbol_id_by_name("foo").unwrap())
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn class_scope() {
|
||||
let parsed = parse(
|
||||
"
|
||||
class C:
|
||||
x = 1
|
||||
y = 2
|
||||
",
|
||||
);
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["C", "y"]);
|
||||
let scopes = table.root_child_scope_ids();
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let c_scope = scopes[0].scope(&table);
|
||||
assert_eq!(c_scope.kind(), ScopeKind::Class);
|
||||
assert_eq!(c_scope.name(), "C");
|
||||
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
|
||||
assert_eq!(
|
||||
table.defs(table.root_symbol_id_by_name("C").unwrap()).len(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn func_scope() {
|
||||
let parsed = parse(
|
||||
"
|
||||
def func():
|
||||
x = 1
|
||||
y = 2
|
||||
",
|
||||
);
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["func", "y"]);
|
||||
let scopes = table.root_child_scope_ids();
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let func_scope = scopes[0].scope(&table);
|
||||
assert_eq!(func_scope.kind(), ScopeKind::Function);
|
||||
assert_eq!(func_scope.name(), "func");
|
||||
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
|
||||
assert_eq!(
|
||||
table
|
||||
.defs(table.root_symbol_id_by_name("func").unwrap())
|
||||
.len(),
|
||||
1
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn dupes() {
|
||||
let parsed = parse(
|
||||
"
|
||||
def func():
|
||||
x = 1
|
||||
def func():
|
||||
y = 2
|
||||
",
|
||||
);
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["func"]);
|
||||
let scopes = table.root_child_scope_ids();
|
||||
assert_eq!(scopes.len(), 2);
|
||||
let func_scope_1 = scopes[0].scope(&table);
|
||||
let func_scope_2 = scopes[1].scope(&table);
|
||||
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
|
||||
assert_eq!(func_scope_1.name(), "func");
|
||||
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
|
||||
assert_eq!(func_scope_2.name(), "func");
|
||||
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
|
||||
assert_eq!(names(table.symbols_for_scope(scopes[1])), vec!["y"]);
|
||||
assert_eq!(
|
||||
table
|
||||
.defs(table.root_symbol_id_by_name("func").unwrap())
|
||||
.len(),
|
||||
2
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generic_func() {
|
||||
let parsed = parse(
|
||||
"
|
||||
def func[T]():
|
||||
x = 1
|
||||
",
|
||||
);
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["func"]);
|
||||
let scopes = table.root_child_scope_ids();
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let ann_scope_id = scopes[0];
|
||||
let ann_scope = ann_scope_id.scope(&table);
|
||||
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
|
||||
assert_eq!(ann_scope.name(), "func");
|
||||
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
|
||||
let scopes = table.child_scope_ids_of(ann_scope_id);
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let func_scope_id = scopes[0];
|
||||
let func_scope = func_scope_id.scope(&table);
|
||||
assert_eq!(func_scope.kind(), ScopeKind::Function);
|
||||
assert_eq!(func_scope.name(), "func");
|
||||
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn generic_class() {
|
||||
let parsed = parse(
|
||||
"
|
||||
class C[T]:
|
||||
x = 1
|
||||
",
|
||||
);
|
||||
let table = SymbolTable::from_ast(parsed.ast());
|
||||
assert_eq!(names(table.root_symbols()), vec!["C"]);
|
||||
let scopes = table.root_child_scope_ids();
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let ann_scope_id = scopes[0];
|
||||
let ann_scope = ann_scope_id.scope(&table);
|
||||
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
|
||||
assert_eq!(ann_scope.name(), "C");
|
||||
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
|
||||
let scopes = table.child_scope_ids_of(ann_scope_id);
|
||||
assert_eq!(scopes.len(), 1);
|
||||
let func_scope_id = scopes[0];
|
||||
let func_scope = func_scope_id.scope(&table);
|
||||
assert_eq!(func_scope.kind(), ScopeKind::Class);
|
||||
assert_eq!(func_scope.name(), "C");
|
||||
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_same_name_symbol_twice() {
|
||||
let mut table = SymbolTable::new();
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let symbol_id_1 = table.add_symbol_to_scope(root_scope_id, "foo");
|
||||
let symbol_id_2 = table.add_symbol_to_scope(root_scope_id, "foo");
|
||||
assert_eq!(symbol_id_1, symbol_id_2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn insert_different_named_symbols() {
|
||||
let mut table = SymbolTable::new();
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let symbol_id_1 = table.add_symbol_to_scope(root_scope_id, "foo");
|
||||
let symbol_id_2 = table.add_symbol_to_scope(root_scope_id, "bar");
|
||||
assert_ne!(symbol_id_1, symbol_id_2);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_child_scope_with_symbol() {
|
||||
let mut table = SymbolTable::new();
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let foo_symbol_top = table.add_symbol_to_scope(root_scope_id, "foo");
|
||||
let c_scope = table.add_child_scope(root_scope_id, "C", ScopeKind::Class);
|
||||
let foo_symbol_inner = table.add_symbol_to_scope(c_scope, "foo");
|
||||
assert_ne!(foo_symbol_top, foo_symbol_inner);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn scope_from_id() {
|
||||
let table = SymbolTable::new();
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let scope = root_scope_id.scope(&table);
|
||||
assert_eq!(scope.name.as_str(), "<module>");
|
||||
assert_eq!(scope.kind, ScopeKind::Module);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn symbol_from_id() {
|
||||
let mut table = SymbolTable::new();
|
||||
let root_scope_id = SymbolTable::root_scope_id();
|
||||
let foo_symbol_id = table.add_symbol_to_scope(root_scope_id, "foo");
|
||||
let symbol = foo_symbol_id.symbol(&table);
|
||||
assert_eq!(symbol.name.as_str(), "foo");
|
||||
}
|
||||
}
|
519
crates/red_knot/src/types.rs
Normal file
519
crates/red_knot/src/types.rs
Normal file
|
@ -0,0 +1,519 @@
|
|||
#![allow(dead_code)]
|
||||
use crate::ast_ids::NodeKey;
|
||||
use crate::files::FileId;
|
||||
use crate::symbols::SymbolId;
|
||||
use crate::{FxDashMap, FxIndexSet, Name};
|
||||
use ruff_index::{newtype_index, IndexVec};
|
||||
use rustc_hash::FxHashMap;
|
||||
|
||||
pub(crate) mod infer;
|
||||
|
||||
pub(crate) use infer::infer_symbol_type;
|
||||
|
||||
/// unique ID for a type
|
||||
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub enum Type {
|
||||
/// the dynamic or gradual type: a statically-unknown set of values
|
||||
Any,
|
||||
/// the empty set of values
|
||||
Never,
|
||||
/// unknown type (no annotation)
|
||||
/// equivalent to Any, or to object in strict mode
|
||||
Unknown,
|
||||
/// name is not bound to any value
|
||||
Unbound,
|
||||
/// a specific function
|
||||
Function(FunctionTypeId),
|
||||
/// the set of Python objects with a given class in their __class__'s method resolution order
|
||||
Class(ClassTypeId),
|
||||
Union(UnionTypeId),
|
||||
Intersection(IntersectionTypeId),
|
||||
// TODO protocols, callable types, overloads, generics, type vars
|
||||
}
|
||||
|
||||
impl Type {
|
||||
fn display<'a>(&'a self, store: &'a TypeStore) -> DisplayType<'a> {
|
||||
DisplayType { ty: self, store }
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: currently calling `get_function` et al and holding on to the `FunctionTypeRef` will lock a
|
||||
// shard of this dashmap, for as long as you hold the reference. This may be a problem. We could
|
||||
// switch to having all the arenas hold Arc, or we could see if we can split up ModuleTypeStore,
|
||||
// and/or give it inner mutability and finer-grained internal locking.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct TypeStore {
|
||||
modules: FxDashMap<FileId, ModuleTypeStore>,
|
||||
}
|
||||
|
||||
impl TypeStore {
|
||||
pub fn remove_module(&mut self, file_id: FileId) {
|
||||
self.modules.remove(&file_id);
|
||||
}
|
||||
|
||||
pub fn cache_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId, ty: Type) {
|
||||
self.add_or_get_module(file_id)
|
||||
.symbol_types
|
||||
.insert(symbol_id, ty);
|
||||
}
|
||||
|
||||
pub fn cache_node_type(&mut self, file_id: FileId, node_key: NodeKey, ty: Type) {
|
||||
self.add_or_get_module(file_id)
|
||||
.node_types
|
||||
.insert(node_key, ty);
|
||||
}
|
||||
|
||||
pub fn get_cached_symbol_type(&self, file_id: FileId, symbol_id: SymbolId) -> Option<Type> {
|
||||
self.try_get_module(file_id)?
|
||||
.symbol_types
|
||||
.get(&symbol_id)
|
||||
.copied()
|
||||
}
|
||||
|
||||
pub fn get_cached_node_type(&self, file_id: FileId, node_key: &NodeKey) -> Option<Type> {
|
||||
self.try_get_module(file_id)?
|
||||
.node_types
|
||||
.get(node_key)
|
||||
.copied()
|
||||
}
|
||||
|
||||
fn add_or_get_module(&mut self, file_id: FileId) -> ModuleStoreRefMut {
|
||||
self.modules
|
||||
.entry(file_id)
|
||||
.or_insert_with(|| ModuleTypeStore::new(file_id))
|
||||
}
|
||||
|
||||
fn get_module(&self, file_id: FileId) -> ModuleStoreRef {
|
||||
self.try_get_module(file_id).expect("module should exist")
|
||||
}
|
||||
|
||||
fn try_get_module(&self, file_id: FileId) -> Option<ModuleStoreRef> {
|
||||
self.modules.get(&file_id)
|
||||
}
|
||||
|
||||
fn add_function(&mut self, file_id: FileId, name: &str) -> FunctionTypeId {
|
||||
self.add_or_get_module(file_id).add_function(name)
|
||||
}
|
||||
|
||||
fn add_class(&mut self, file_id: FileId, name: &str) -> ClassTypeId {
|
||||
self.add_or_get_module(file_id).add_class(name)
|
||||
}
|
||||
|
||||
fn add_union(&mut self, file_id: FileId, elems: &[Type]) -> UnionTypeId {
|
||||
self.add_or_get_module(file_id).add_union(elems)
|
||||
}
|
||||
|
||||
fn add_intersection(
|
||||
&mut self,
|
||||
file_id: FileId,
|
||||
positive: &[Type],
|
||||
negative: &[Type],
|
||||
) -> IntersectionTypeId {
|
||||
self.add_or_get_module(file_id)
|
||||
.add_intersection(positive, negative)
|
||||
}
|
||||
|
||||
fn get_function(&self, id: FunctionTypeId) -> FunctionTypeRef {
|
||||
FunctionTypeRef {
|
||||
module_store: self.get_module(id.file_id),
|
||||
function_id: id.func_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_class(&self, id: ClassTypeId) -> ClassTypeRef {
|
||||
ClassTypeRef {
|
||||
module_store: self.get_module(id.file_id),
|
||||
class_id: id.class_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_union(&self, id: UnionTypeId) -> UnionTypeRef {
|
||||
UnionTypeRef {
|
||||
module_store: self.get_module(id.file_id),
|
||||
union_id: id.union_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_intersection(&self, id: IntersectionTypeId) -> IntersectionTypeRef {
|
||||
IntersectionTypeRef {
|
||||
module_store: self.get_module(id.file_id),
|
||||
intersection_id: id.intersection_id,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
type ModuleStoreRef<'a> = dashmap::mapref::one::Ref<
|
||||
'a,
|
||||
FileId,
|
||||
ModuleTypeStore,
|
||||
std::hash::BuildHasherDefault<rustc_hash::FxHasher>,
|
||||
>;
|
||||
|
||||
type ModuleStoreRefMut<'a> = dashmap::mapref::one::RefMut<
|
||||
'a,
|
||||
FileId,
|
||||
ModuleTypeStore,
|
||||
std::hash::BuildHasherDefault<rustc_hash::FxHasher>,
|
||||
>;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct FunctionTypeRef<'a> {
|
||||
module_store: ModuleStoreRef<'a>,
|
||||
function_id: ModuleFunctionTypeId,
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for FunctionTypeRef<'a> {
|
||||
type Target = FunctionType;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.module_store.get_function(self.function_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ClassTypeRef<'a> {
|
||||
module_store: ModuleStoreRef<'a>,
|
||||
class_id: ModuleClassTypeId,
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for ClassTypeRef<'a> {
|
||||
type Target = ClassType;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.module_store.get_class(self.class_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnionTypeRef<'a> {
|
||||
module_store: ModuleStoreRef<'a>,
|
||||
union_id: ModuleUnionTypeId,
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for UnionTypeRef<'a> {
|
||||
type Target = UnionType;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.module_store.get_union(self.union_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct IntersectionTypeRef<'a> {
|
||||
module_store: ModuleStoreRef<'a>,
|
||||
intersection_id: ModuleIntersectionTypeId,
|
||||
}
|
||||
|
||||
impl<'a> std::ops::Deref for IntersectionTypeRef<'a> {
|
||||
type Target = IntersectionType;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
self.module_store.get_intersection(self.intersection_id)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
|
||||
pub struct FunctionTypeId {
|
||||
file_id: FileId,
|
||||
func_id: ModuleFunctionTypeId,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
|
||||
pub struct ClassTypeId {
|
||||
file_id: FileId,
|
||||
class_id: ModuleClassTypeId,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
|
||||
pub struct UnionTypeId {
|
||||
file_id: FileId,
|
||||
union_id: ModuleUnionTypeId,
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
|
||||
pub struct IntersectionTypeId {
|
||||
file_id: FileId,
|
||||
intersection_id: ModuleIntersectionTypeId,
|
||||
}
|
||||
|
||||
#[newtype_index]
|
||||
struct ModuleFunctionTypeId;
|
||||
|
||||
#[newtype_index]
|
||||
struct ModuleClassTypeId;
|
||||
|
||||
#[newtype_index]
|
||||
struct ModuleUnionTypeId;
|
||||
|
||||
#[newtype_index]
|
||||
struct ModuleIntersectionTypeId;
|
||||
|
||||
#[derive(Debug)]
|
||||
struct ModuleTypeStore {
|
||||
file_id: FileId,
|
||||
/// arena of all function types defined in this module
|
||||
functions: IndexVec<ModuleFunctionTypeId, FunctionType>,
|
||||
/// arena of all class types defined in this module
|
||||
classes: IndexVec<ModuleClassTypeId, ClassType>,
|
||||
/// arenda of all union types created in this module
|
||||
unions: IndexVec<ModuleUnionTypeId, UnionType>,
|
||||
/// arena of all intersection types created in this module
|
||||
intersections: IndexVec<ModuleIntersectionTypeId, IntersectionType>,
|
||||
/// cached types of symbols in this module
|
||||
symbol_types: FxHashMap<SymbolId, Type>,
|
||||
/// cached types of AST nodes in this module
|
||||
node_types: FxHashMap<NodeKey, Type>,
|
||||
}
|
||||
|
||||
impl ModuleTypeStore {
|
||||
fn new(file_id: FileId) -> Self {
|
||||
Self {
|
||||
file_id,
|
||||
functions: IndexVec::default(),
|
||||
classes: IndexVec::default(),
|
||||
unions: IndexVec::default(),
|
||||
intersections: IndexVec::default(),
|
||||
symbol_types: FxHashMap::default(),
|
||||
node_types: FxHashMap::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn add_function(&mut self, name: &str) -> FunctionTypeId {
|
||||
let func_id = self.functions.push(FunctionType {
|
||||
name: Name::new(name),
|
||||
});
|
||||
FunctionTypeId {
|
||||
file_id: self.file_id,
|
||||
func_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_class(&mut self, name: &str) -> ClassTypeId {
|
||||
let class_id = self.classes.push(ClassType {
|
||||
name: Name::new(name),
|
||||
});
|
||||
ClassTypeId {
|
||||
file_id: self.file_id,
|
||||
class_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_union(&mut self, elems: &[Type]) -> UnionTypeId {
|
||||
let union_id = self.unions.push(UnionType {
|
||||
elements: elems.iter().copied().collect(),
|
||||
});
|
||||
UnionTypeId {
|
||||
file_id: self.file_id,
|
||||
union_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn add_intersection(&mut self, positive: &[Type], negative: &[Type]) -> IntersectionTypeId {
|
||||
let intersection_id = self.intersections.push(IntersectionType {
|
||||
positive: positive.iter().copied().collect(),
|
||||
negative: negative.iter().copied().collect(),
|
||||
});
|
||||
IntersectionTypeId {
|
||||
file_id: self.file_id,
|
||||
intersection_id,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_function(&self, func_id: ModuleFunctionTypeId) -> &FunctionType {
|
||||
&self.functions[func_id]
|
||||
}
|
||||
|
||||
fn get_class(&self, class_id: ModuleClassTypeId) -> &ClassType {
|
||||
&self.classes[class_id]
|
||||
}
|
||||
|
||||
fn get_union(&self, union_id: ModuleUnionTypeId) -> &UnionType {
|
||||
&self.unions[union_id]
|
||||
}
|
||||
|
||||
fn get_intersection(&self, intersection_id: ModuleIntersectionTypeId) -> &IntersectionType {
|
||||
&self.intersections[intersection_id]
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Copy, Clone, Debug)]
|
||||
struct DisplayType<'a> {
|
||||
ty: &'a Type,
|
||||
store: &'a TypeStore,
|
||||
}
|
||||
|
||||
impl std::fmt::Display for DisplayType<'_> {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
match self.ty {
|
||||
Type::Any => f.write_str("Any"),
|
||||
Type::Never => f.write_str("Never"),
|
||||
Type::Unknown => f.write_str("Unknown"),
|
||||
Type::Unbound => f.write_str("Unbound"),
|
||||
Type::Class(class_id) => f.write_str(self.store.get_class(*class_id).name()),
|
||||
Type::Function(func_id) => f.write_str(self.store.get_function(*func_id).name()),
|
||||
Type::Union(union_id) => self
|
||||
.store
|
||||
.get_module(union_id.file_id)
|
||||
.get_union(union_id.union_id)
|
||||
.display(f, self.store),
|
||||
Type::Intersection(int_id) => self
|
||||
.store
|
||||
.get_module(int_id.file_id)
|
||||
.get_intersection(int_id.intersection_id)
|
||||
.display(f, self.store),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct ClassType {
|
||||
name: Name,
|
||||
}
|
||||
|
||||
impl ClassType {
|
||||
fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct FunctionType {
|
||||
name: Name,
|
||||
}
|
||||
|
||||
impl FunctionType {
|
||||
fn name(&self) -> &str {
|
||||
self.name.as_str()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct UnionType {
|
||||
// the union type includes values in any of these types
|
||||
elements: FxIndexSet<Type>,
|
||||
}
|
||||
|
||||
impl UnionType {
|
||||
fn display(&self, f: &mut std::fmt::Formatter<'_>, store: &TypeStore) -> std::fmt::Result {
|
||||
f.write_str("(")?;
|
||||
let mut first = true;
|
||||
for ty in &self.elements {
|
||||
if !first {
|
||||
f.write_str(" | ")?;
|
||||
};
|
||||
first = false;
|
||||
write!(f, "{}", ty.display(store))?;
|
||||
}
|
||||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
|
||||
// Negation types aren't expressible in annotations, and are most likely to arise from type
|
||||
// narrowing along with intersections (e.g. `if not isinstance(...)`), so we represent them
|
||||
// directly in intersections rather than as a separate type. This sacrifices some efficiency in the
|
||||
// case where a Not appears outside an intersection (unclear when that could even happen, but we'd
|
||||
// have to represent it as a single-element intersection if it did) in exchange for better
|
||||
// efficiency in the not-within-intersection case.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct IntersectionType {
|
||||
// the intersection type includes only values in all of these types
|
||||
positive: FxIndexSet<Type>,
|
||||
// negated elements of the intersection, e.g.
|
||||
negative: FxIndexSet<Type>,
|
||||
}
|
||||
|
||||
impl IntersectionType {
|
||||
fn display(&self, f: &mut std::fmt::Formatter<'_>, store: &TypeStore) -> std::fmt::Result {
|
||||
f.write_str("(")?;
|
||||
let mut first = true;
|
||||
for (neg, ty) in self
|
||||
.positive
|
||||
.iter()
|
||||
.map(|ty| (false, ty))
|
||||
.chain(self.negative.iter().map(|ty| (true, ty)))
|
||||
{
|
||||
if !first {
|
||||
f.write_str(" & ")?;
|
||||
};
|
||||
first = false;
|
||||
if neg {
|
||||
f.write_str("~")?;
|
||||
};
|
||||
write!(f, "{}", ty.display(store))?;
|
||||
}
|
||||
f.write_str(")")
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::files::Files;
|
||||
use crate::types::{Type, TypeStore};
|
||||
use crate::FxIndexSet;
|
||||
use std::path::Path;
|
||||
|
||||
#[test]
|
||||
fn add_class() {
|
||||
let mut store = TypeStore::default();
|
||||
let files = Files::default();
|
||||
let file_id = files.intern(Path::new("/foo"));
|
||||
let id = store.add_class(file_id, "C");
|
||||
assert_eq!(store.get_class(id).name(), "C");
|
||||
let class = Type::Class(id);
|
||||
assert_eq!(format!("{}", class.display(&store)), "C");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_function() {
|
||||
let mut store = TypeStore::default();
|
||||
let files = Files::default();
|
||||
let file_id = files.intern(Path::new("/foo"));
|
||||
let id = store.add_function(file_id, "func");
|
||||
assert_eq!(store.get_function(id).name(), "func");
|
||||
let func = Type::Function(id);
|
||||
assert_eq!(format!("{}", func.display(&store)), "func");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_union() {
|
||||
let mut store = TypeStore::default();
|
||||
let files = Files::default();
|
||||
let file_id = files.intern(Path::new("/foo"));
|
||||
let c1 = store.add_class(file_id, "C1");
|
||||
let c2 = store.add_class(file_id, "C2");
|
||||
let elems = vec![Type::Class(c1), Type::Class(c2)];
|
||||
let id = store.add_union(file_id, &elems);
|
||||
assert_eq!(
|
||||
store.get_union(id).elements,
|
||||
elems.into_iter().collect::<FxIndexSet<_>>()
|
||||
);
|
||||
let union = Type::Union(id);
|
||||
assert_eq!(format!("{}", union.display(&store)), "(C1 | C2)");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn add_intersection() {
|
||||
let mut store = TypeStore::default();
|
||||
let files = Files::default();
|
||||
let file_id = files.intern(Path::new("/foo"));
|
||||
let c1 = store.add_class(file_id, "C1");
|
||||
let c2 = store.add_class(file_id, "C2");
|
||||
let c3 = store.add_class(file_id, "C3");
|
||||
let pos = vec![Type::Class(c1), Type::Class(c2)];
|
||||
let neg = vec![Type::Class(c3)];
|
||||
let id = store.add_intersection(file_id, &pos, &neg);
|
||||
assert_eq!(
|
||||
store.get_intersection(id).positive,
|
||||
pos.into_iter().collect::<FxIndexSet<_>>()
|
||||
);
|
||||
assert_eq!(
|
||||
store.get_intersection(id).negative,
|
||||
neg.into_iter().collect::<FxIndexSet<_>>()
|
||||
);
|
||||
let intersection = Type::Intersection(id);
|
||||
assert_eq!(
|
||||
format!("{}", intersection.display(&store)),
|
||||
"(C1 & C2 & ~C3)"
|
||||
);
|
||||
}
|
||||
}
|
141
crates/red_knot/src/types/infer.rs
Normal file
141
crates/red_knot/src/types/infer.rs
Normal file
|
@ -0,0 +1,141 @@
|
|||
#![allow(dead_code)]
|
||||
use crate::db::{HasJar, SemanticDb, SemanticJar};
|
||||
use crate::module::ModuleName;
|
||||
use crate::symbols::{Definition, ImportFromDefinition, SymbolId};
|
||||
use crate::types::Type;
|
||||
use crate::FileId;
|
||||
use ruff_python_ast::AstNode;
|
||||
|
||||
// TODO this should not take a &mut db, it should be a query, not a mutation. This means we'll need
|
||||
// to use interior mutability in TypeStore instead, and avoid races in populating the cache.
|
||||
#[tracing::instrument(level = "trace", skip(db))]
|
||||
pub fn infer_symbol_type<Db>(db: &mut Db, file_id: FileId, symbol_id: SymbolId) -> Type
|
||||
where
|
||||
Db: SemanticDb + HasJar<SemanticJar>,
|
||||
{
|
||||
let symbols = db.symbol_table(file_id);
|
||||
let defs = symbols.defs(symbol_id);
|
||||
|
||||
if let Some(ty) = db
|
||||
.jar()
|
||||
.type_store
|
||||
.get_cached_symbol_type(file_id, symbol_id)
|
||||
{
|
||||
return ty;
|
||||
}
|
||||
|
||||
// TODO handle multiple defs, conditional defs...
|
||||
assert_eq!(defs.len(), 1);
|
||||
|
||||
let ty = match &defs[0] {
|
||||
Definition::ImportFrom(ImportFromDefinition {
|
||||
module,
|
||||
name,
|
||||
level,
|
||||
}) => {
|
||||
// TODO relative imports
|
||||
assert!(matches!(level, 0));
|
||||
let module_name = ModuleName::new(module.as_ref().expect("TODO relative imports"));
|
||||
if let Some(module) = db.resolve_module(module_name) {
|
||||
let remote_file_id = module.path(db).file();
|
||||
let remote_symbols = db.symbol_table(remote_file_id);
|
||||
if let Some(remote_symbol_id) = remote_symbols.root_symbol_id_by_name(name) {
|
||||
db.infer_symbol_type(remote_file_id, remote_symbol_id)
|
||||
} else {
|
||||
Type::Unknown
|
||||
}
|
||||
} else {
|
||||
Type::Unknown
|
||||
}
|
||||
}
|
||||
Definition::ClassDef(node_key) => {
|
||||
if let Some(ty) = db
|
||||
.jar()
|
||||
.type_store
|
||||
.get_cached_node_type(file_id, node_key.erased())
|
||||
{
|
||||
ty
|
||||
} else {
|
||||
let parsed = db.parse(file_id);
|
||||
let ast = parsed.ast();
|
||||
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
|
||||
|
||||
let store = &mut db.jar_mut().type_store;
|
||||
let ty = Type::Class(store.add_class(file_id, &node.name.id));
|
||||
store.cache_node_type(file_id, *node_key.erased(), ty);
|
||||
ty
|
||||
}
|
||||
}
|
||||
_ => todo!("other kinds of definitions"),
|
||||
};
|
||||
|
||||
db.jar_mut()
|
||||
.type_store
|
||||
.cache_symbol_type(file_id, symbol_id, ty);
|
||||
// TODO record dependencies
|
||||
ty
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use crate::db::tests::TestDb;
|
||||
use crate::db::{HasJar, SemanticDb, SemanticJar};
|
||||
use crate::module::{ModuleName, ModuleSearchPath, ModuleSearchPathKind};
|
||||
use crate::types::Type;
|
||||
|
||||
// TODO with virtual filesystem we shouldn't have to write files to disk for these
|
||||
// tests
|
||||
|
||||
struct TestCase {
|
||||
temp_dir: tempfile::TempDir,
|
||||
db: TestDb,
|
||||
|
||||
src: ModuleSearchPath,
|
||||
}
|
||||
|
||||
fn create_test() -> std::io::Result<TestCase> {
|
||||
let temp_dir = tempfile::tempdir()?;
|
||||
|
||||
let src = temp_dir.path().join("src");
|
||||
std::fs::create_dir(&src)?;
|
||||
let src = ModuleSearchPath::new(src.canonicalize()?, ModuleSearchPathKind::FirstParty);
|
||||
|
||||
let roots = vec![src.clone()];
|
||||
|
||||
let mut db = TestDb::default();
|
||||
db.set_module_search_paths(roots);
|
||||
|
||||
Ok(TestCase { temp_dir, db, src })
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn follow_import_to_class() -> std::io::Result<()> {
|
||||
let TestCase {
|
||||
src,
|
||||
mut db,
|
||||
temp_dir: _temp_dir,
|
||||
} = create_test()?;
|
||||
|
||||
let a_path = src.path().join("a.py");
|
||||
let b_path = src.path().join("b.py");
|
||||
std::fs::write(a_path, "from b import C as D")?;
|
||||
std::fs::write(b_path, "class C: pass")?;
|
||||
let a_file = db
|
||||
.resolve_module(ModuleName::new("a"))
|
||||
.expect("module should be found")
|
||||
.path(&db)
|
||||
.file();
|
||||
let a_syms = db.symbol_table(a_file);
|
||||
let d_sym = a_syms
|
||||
.root_symbol_id_by_name("D")
|
||||
.expect("D symbol should be found");
|
||||
|
||||
let ty = db.infer_symbol_type(a_file, d_sym);
|
||||
|
||||
let jar = HasJar::<SemanticJar>::jar(&db);
|
||||
|
||||
assert!(matches!(ty, Type::Class(_)));
|
||||
assert_eq!(format!("{}", ty.display(&jar.type_store)), "C");
|
||||
Ok(())
|
||||
}
|
||||
}
|
78
crates/red_knot/src/watch.rs
Normal file
78
crates/red_knot/src/watch.rs
Normal file
|
@ -0,0 +1,78 @@
|
|||
use anyhow::Context;
|
||||
use std::path::Path;
|
||||
|
||||
use crate::files::Files;
|
||||
use crate::program::{FileChange, FileChangeKind};
|
||||
use notify::event::{CreateKind, RemoveKind};
|
||||
use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
|
||||
|
||||
pub struct FileWatcher {
|
||||
watcher: RecommendedWatcher,
|
||||
}
|
||||
|
||||
pub trait EventHandler: Send + 'static {
|
||||
fn handle(&self, changes: Vec<FileChange>);
|
||||
}
|
||||
|
||||
impl<F> EventHandler for F
|
||||
where
|
||||
F: Fn(Vec<FileChange>) + Send + 'static,
|
||||
{
|
||||
fn handle(&self, changes: Vec<FileChange>) {
|
||||
let f = self;
|
||||
f(changes);
|
||||
}
|
||||
}
|
||||
|
||||
impl FileWatcher {
|
||||
pub fn new<E>(handler: E, files: Files) -> anyhow::Result<Self>
|
||||
where
|
||||
E: EventHandler,
|
||||
{
|
||||
Self::from_handler(Box::new(handler), files)
|
||||
}
|
||||
|
||||
fn from_handler(handler: Box<dyn EventHandler>, files: Files) -> anyhow::Result<Self> {
|
||||
let watcher = recommended_watcher(move |changes: notify::Result<Event>| {
|
||||
match changes {
|
||||
Ok(event) => {
|
||||
// TODO verify that this handles all events correctly
|
||||
let change_kind = match event.kind {
|
||||
EventKind::Create(CreateKind::File) => FileChangeKind::Created,
|
||||
EventKind::Modify(_) => FileChangeKind::Modified,
|
||||
EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted,
|
||||
_ => {
|
||||
return;
|
||||
}
|
||||
};
|
||||
|
||||
let mut changes = Vec::new();
|
||||
|
||||
for path in event.paths {
|
||||
if path.is_file() {
|
||||
let id = files.intern(&path);
|
||||
changes.push(FileChange::new(id, change_kind));
|
||||
}
|
||||
}
|
||||
|
||||
if !changes.is_empty() {
|
||||
handler.handle(changes);
|
||||
}
|
||||
}
|
||||
// TODO proper error handling
|
||||
Err(err) => {
|
||||
panic!("Error: {err}");
|
||||
}
|
||||
}
|
||||
})
|
||||
.context("Failed to create file watcher.")?;
|
||||
|
||||
Ok(Self { watcher })
|
||||
}
|
||||
|
||||
pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> {
|
||||
self.watcher.watch(path, RecursiveMode::Recursive)?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
|
@ -11,9 +11,9 @@ repository = { workspace = true }
|
|||
license = { workspace = true }
|
||||
|
||||
[dependencies]
|
||||
itertools = { workspace = true }
|
||||
glob = { workspace = true }
|
||||
globset = { workspace = true }
|
||||
itertools = { workspace = true }
|
||||
regex = { workspace = true }
|
||||
filetime = { workspace = true }
|
||||
seahash = { workspace = true }
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -1,11 +1,10 @@
|
|||
#![allow(clippy::derive_partial_eq_without_eq)]
|
||||
|
||||
use std::cell::OnceCell;
|
||||
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
use std::slice::{Iter, IterMut};
|
||||
use std::sync::OnceLock;
|
||||
|
||||
use bitflags::bitflags;
|
||||
use itertools::Itertools;
|
||||
|
@ -1420,7 +1419,7 @@ impl StringLiteralValue {
|
|||
Self {
|
||||
inner: StringLiteralValueInner::Concatenated(ConcatenatedStringLiteral {
|
||||
strings,
|
||||
value: OnceCell::new(),
|
||||
value: OnceLock::new(),
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
@ -1782,7 +1781,7 @@ struct ConcatenatedStringLiteral {
|
|||
/// Each string literal that makes up the concatenated string.
|
||||
strings: Vec<StringLiteral>,
|
||||
/// The concatenated string value.
|
||||
value: OnceCell<Box<str>>,
|
||||
value: OnceLock<Box<str>>,
|
||||
}
|
||||
|
||||
impl ConcatenatedStringLiteral {
|
||||
|
@ -4168,7 +4167,7 @@ mod tests {
|
|||
assert_eq!(std::mem::size_of::<ExprSetComp>(), 40);
|
||||
assert_eq!(std::mem::size_of::<ExprSlice>(), 32);
|
||||
assert_eq!(std::mem::size_of::<ExprStarred>(), 24);
|
||||
assert_eq!(std::mem::size_of::<ExprStringLiteral>(), 48);
|
||||
assert_eq!(std::mem::size_of::<ExprStringLiteral>(), 56);
|
||||
assert_eq!(std::mem::size_of::<ExprSubscript>(), 32);
|
||||
assert_eq!(std::mem::size_of::<ExprTuple>(), 40);
|
||||
assert_eq!(std::mem::size_of::<ExprUnaryOp>(), 24);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue