Kick off Red-knot (#10849)

Co-authored-by: Carl Meyer <carl@oddbird.net>
Co-authored-by: Carl Meyer <carl@astral.sh>
This commit is contained in:
Micha Reiser 2024-04-27 10:34:00 +02:00 committed by GitHub
parent 845ba7cf5f
commit 7cd065e4a2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
24 changed files with 6282 additions and 103 deletions

135
Cargo.lock generated
View file

@ -36,6 +36,12 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "allocator-api2"
version = "0.2.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0942ffc6dcaadf03badf6e6a2d0228460359d5e34b57ccdc720b7382dfbd5ec5"
[[package]] [[package]]
name = "android-tzdata" name = "android-tzdata"
version = "0.1.1" version = "0.1.1"
@ -535,6 +541,16 @@ version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7" checksum = "7a81dae078cea95a014a339291cec439d2f232ebe854a9d672b796c6afafa9b7"
[[package]]
name = "ctrlc"
version = "3.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "672465ae37dc1bc6380a6547a8883d5dd397b0f1faaad4f265726cc7042a5345"
dependencies = [
"nix",
"windows-sys 0.52.0",
]
[[package]] [[package]]
name = "darling" name = "darling"
version = "0.20.8" version = "0.20.8"
@ -570,6 +586,19 @@ dependencies = [
"syn 2.0.60", "syn 2.0.60",
] ]
[[package]]
name = "dashmap"
version = "5.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "978747c1d849a7d2ee5e8adc0159961c48fb7e5db2f06af6723b80123bb53856"
dependencies = [
"cfg-if",
"hashbrown 0.14.3",
"lock_api",
"once_cell",
"parking_lot_core",
]
[[package]] [[package]]
name = "diff" name = "diff"
version = "0.1.13" version = "0.1.13"
@ -812,6 +841,10 @@ name = "hashbrown"
version = "0.14.3" version = "0.14.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604" checksum = "290f1a1d9242c78d09ce40a5e87e7554ee637af1351968159f4952f028f75604"
dependencies = [
"ahash",
"allocator-api2",
]
[[package]] [[package]]
name = "heck" name = "heck"
@ -1214,6 +1247,16 @@ version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c" checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
[[package]]
name = "lock_api"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.21" version = "0.4.21"
@ -1441,6 +1484,29 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets 0.48.5",
]
[[package]] [[package]]
name = "paste" name = "paste"
version = "1.0.14" version = "1.0.14"
@ -1732,6 +1798,37 @@ dependencies = [
"crossbeam-utils", "crossbeam-utils",
] ]
[[package]]
name = "red_knot"
version = "0.1.0"
dependencies = [
"anyhow",
"bitflags 2.5.0",
"crossbeam-channel",
"ctrlc",
"dashmap",
"hashbrown 0.14.3",
"indexmap",
"log",
"notify",
"parking_lot",
"rayon",
"ruff_index",
"ruff_notebook",
"ruff_python_ast",
"ruff_python_parser",
"ruff_python_trivia",
"ruff_text_size",
"rustc-hash",
"smallvec",
"smol_str",
"tempfile",
"textwrap",
"tracing",
"tracing-subscriber",
"tracing-tree",
]
[[package]] [[package]]
name = "redox_syscall" name = "redox_syscall"
version = "0.4.1" version = "0.4.1"
@ -2475,6 +2572,12 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]] [[package]]
name = "seahash" name = "seahash"
version = "4.1.0" version = "4.1.0"
@ -2628,6 +2731,21 @@ version = "1.13.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67"
[[package]]
name = "smawk"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b7c388c1b5e93756d0c740965c41e8822f866621d41acbdf6336a6a168f8840c"
[[package]]
name = "smol_str"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6845563ada680337a52d43bb0b29f396f2d911616f6573012645b9e3d048a49"
dependencies = [
"serde",
]
[[package]] [[package]]
name = "spin" name = "spin"
version = "0.9.8" version = "0.9.8"
@ -2779,6 +2897,17 @@ dependencies = [
"test-case-core", "test-case-core",
] ]
[[package]]
name = "textwrap"
version = "0.16.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "23d434d3f8967a09480fb04132ebe0a3e088c173e6d0ee7897abbdf4eab0f8b9"
dependencies = [
"smawk",
"unicode-linebreak",
"unicode-width",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.59" version = "1.0.59"
@ -3034,6 +3163,12 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
[[package]]
name = "unicode-linebreak"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3b09c83c3c29d37506a3e260c08c03743a6bb66a9cd432c6934ab501a190571f"
[[package]] [[package]]
name = "unicode-normalization" name = "unicode-normalization"
version = "0.1.23" version = "0.1.23"

View file

@ -31,6 +31,7 @@ console_log = { version = "1.0.0" }
countme = { version = "3.0.1" } countme = { version = "3.0.1" }
criterion = { version = "0.5.1", default-features = false } criterion = { version = "0.5.1", default-features = false }
crossbeam-channel = { version = "0.5.12" } crossbeam-channel = { version = "0.5.12" }
dashmap = { version = "5.5.3" }
dirs = { version = "5.0.0" } dirs = { version = "5.0.0" }
drop_bomb = { version = "0.1.5" } drop_bomb = { version = "0.1.5" }
env_logger = { version = "0.11.0" } env_logger = { version = "0.11.0" }
@ -39,10 +40,12 @@ filetime = { version = "0.2.23" }
fs-err = { version = "2.11.0" } fs-err = { version = "2.11.0" }
glob = { version = "0.3.1" } glob = { version = "0.3.1" }
globset = { version = "0.4.14" } globset = { version = "0.4.14" }
hashbrown = "0.14.3"
hexf-parse = { version = "0.2.1" } hexf-parse = { version = "0.2.1" }
ignore = { version = "0.4.22" } ignore = { version = "0.4.22" }
imara-diff = { version = "0.1.5" } imara-diff = { version = "0.1.5" }
imperative = { version = "1.0.4" } imperative = { version = "1.0.4" }
indexmap = { version = "2.2.6" }
indicatif = { version = "0.17.8" } indicatif = { version = "0.17.8" }
indoc = { version = "2.0.4" } indoc = { version = "2.0.4" }
insta = { version = "1.35.1", feature = ["filters", "glob"] } insta = { version = "1.35.1", feature = ["filters", "glob"] }
@ -68,6 +71,7 @@ once_cell = { version = "1.19.0" }
path-absolutize = { version = "3.1.1" } path-absolutize = { version = "3.1.1" }
path-slash = { version = "0.2.1" } path-slash = { version = "0.2.1" }
pathdiff = { version = "0.2.1" } pathdiff = { version = "0.2.1" }
parking_lot = "0.12.1"
pep440_rs = { version = "0.6.0", features = ["serde"] } pep440_rs = { version = "0.6.0", features = ["serde"] }
pretty_assertions = "1.3.0" pretty_assertions = "1.3.0"
proc-macro2 = { version = "1.0.79" } proc-macro2 = { version = "1.0.79" }

View file

@ -0,0 +1,45 @@
[package]
name = "red_knot"
version = "0.1.0"
edition.workspace = true
rust-version.workspace = true
homepage.workspace = true
documentation.workspace = true
repository.workspace = true
authors.workspace = true
license.workspace = true
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
ruff_python_parser = { path = "../ruff_python_parser" }
ruff_python_ast = { path = "../ruff_python_ast" }
ruff_python_trivia = { path = "../ruff_python_trivia" }
ruff_text_size = { path = "../ruff_text_size" }
ruff_index = { path = "../ruff_index" }
ruff_notebook = { path = "../ruff_notebook" }
anyhow = { workspace = true }
bitflags = { workspace = true }
ctrlc = "3.4.4"
crossbeam-channel = { workspace = true }
dashmap = { workspace = true }
hashbrown = { workspace = true }
indexmap = { workspace = true }
log = { workspace = true }
notify = { workspace = true }
parking_lot = { workspace = true }
rayon = { workspace = true }
rustc-hash = { workspace = true }
smallvec = { workspace = true }
smol_str = "0.2.1"
tracing = { workspace = true }
tracing-subscriber = { workspace = true }
tracing-tree = { workspace = true }
[dev-dependencies]
textwrap = "0.16.1"
tempfile = { workspace = true }
[lints]
workspace = true

View file

@ -0,0 +1,415 @@
use std::any::type_name;
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::marker::PhantomData;
use rustc_hash::FxHashMap;
use ruff_index::{Idx, IndexVec};
use ruff_python_ast::visitor::preorder;
use ruff_python_ast::visitor::preorder::{PreorderVisitor, TraversalSignal};
use ruff_python_ast::{
AnyNodeRef, AstNode, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule,
NodeKind, Parameter, Stmt, StmtAnnAssign, StmtAssign, StmtAugAssign, StmtClassDef,
StmtFunctionDef, StmtGlobal, StmtImport, StmtImportFrom, StmtNonlocal, StmtTypeAlias,
TypeParam, TypeParamParamSpec, TypeParamTypeVar, TypeParamTypeVarTuple, WithItem,
};
use ruff_text_size::{Ranged, TextRange};
/// A type agnostic ID that uniquely identifies an AST node in a file.
#[ruff_index::newtype_index]
pub struct AstId;
/// A typed ID that uniquely identifies an AST node in a file.
///
/// This is different from [`AstId`] in that it is a combination of ID and the type of the node the ID identifies.
/// Typing the ID prevents mixing IDs of different node types and allows to restrict the API to only accept
/// nodes for which an ID has been created (not all AST nodes get an ID).
pub struct TypedAstId<N: HasAstId> {
erased: AstId,
_marker: PhantomData<fn() -> N>,
}
impl<N: HasAstId> TypedAstId<N> {
/// Upcasts this ID from a more specific node type to a more general node type.
pub fn upcast<M: HasAstId>(self) -> TypedAstId<M>
where
N: Into<M>,
{
TypedAstId {
erased: self.erased,
_marker: PhantomData,
}
}
}
impl<N: HasAstId> Copy for TypedAstId<N> {}
impl<N: HasAstId> Clone for TypedAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for TypedAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.erased == other.erased
}
}
impl<N: HasAstId> Eq for TypedAstId<N> {}
impl<N: HasAstId> Hash for TypedAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.erased.hash(state);
}
}
impl<N: HasAstId> Debug for TypedAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_tuple("TypedAstId")
.field(&self.erased)
.field(&type_name::<N>())
.finish()
}
}
pub struct AstIds {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
}
impl AstIds {
// TODO rust analyzer doesn't allocate an ID for every node. It only allocates ids for
// nodes with a corresponding HIR element, that is nodes that are definitions.
pub fn from_module(module: &ModModule) -> Self {
let mut visitor = AstIdsVisitor::default();
// TODO: visit_module?
// Make sure we visit the root
visitor.create_id(module);
visitor.visit_body(&module.body);
while let Some(deferred) = visitor.deferred.pop() {
match deferred {
DeferredNode::FunctionDefinition(def) => {
def.visit_preorder(&mut visitor);
}
DeferredNode::ClassDefinition(def) => def.visit_preorder(&mut visitor),
}
}
AstIds {
ids: visitor.ids,
reverse: visitor.reverse,
}
}
/// Returns the ID to the root node.
pub fn root(&self) -> NodeKey {
self.ids[AstId::new(0)]
}
/// Returns the [`TypedAstId`] for a node.
pub fn ast_id<N: HasAstId>(&self, node: &N) -> TypedAstId<N> {
let key = node.syntax_node_key();
TypedAstId {
erased: self.reverse.get(&key).copied().unwrap(),
_marker: PhantomData,
}
}
/// Returns the [`TypedAstId`] for the node identified with the given [`TypedNodeKey`].
pub fn ast_id_for_key<N: HasAstId>(&self, node: &TypedNodeKey<N>) -> TypedAstId<N> {
let ast_id = self.ast_id_for_node_key(node.inner);
TypedAstId {
erased: ast_id,
_marker: PhantomData,
}
}
/// Returns the untyped [`AstId`] for the node identified by the given `node` key.
pub fn ast_id_for_node_key(&self, node: NodeKey) -> AstId {
self.reverse
.get(&node)
.copied()
.expect("Can't find node in AstIds map.")
}
/// Returns the [`TypedNodeKey`] for the node identified by the given [`TypedAstId`].
pub fn key<N: HasAstId>(&self, id: TypedAstId<N>) -> TypedNodeKey<N> {
let syntax_key = self.ids[id.erased];
TypedNodeKey::new(syntax_key).unwrap()
}
pub fn node_key<H: HasAstId>(&self, id: TypedAstId<H>) -> NodeKey {
self.ids[id.erased]
}
}
impl std::fmt::Debug for AstIds {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut map = f.debug_map();
for (key, value) in self.ids.iter_enumerated() {
map.entry(&key, &value);
}
map.finish()
}
}
impl PartialEq for AstIds {
fn eq(&self, other: &Self) -> bool {
self.ids == other.ids
}
}
impl Eq for AstIds {}
#[derive(Default)]
struct AstIdsVisitor<'a> {
ids: IndexVec<AstId, NodeKey>,
reverse: FxHashMap<NodeKey, AstId>,
deferred: Vec<DeferredNode<'a>>,
}
impl<'a> AstIdsVisitor<'a> {
fn create_id<A: HasAstId>(&mut self, node: &A) {
let node_key = node.syntax_node_key();
let id = self.ids.push(node_key);
self.reverse.insert(node_key, id);
}
}
impl<'a> PreorderVisitor<'a> for AstIdsVisitor<'a> {
fn visit_stmt(&mut self, stmt: &'a Stmt) {
match stmt {
Stmt::FunctionDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::FunctionDefinition(def));
return;
}
// TODO defer visiting the assignment body, type alias parameters etc?
Stmt::ClassDef(def) => {
self.create_id(def);
self.deferred.push(DeferredNode::ClassDefinition(def));
return;
}
Stmt::Expr(_) => {
// Skip
return;
}
Stmt::Return(_) => {}
Stmt::Delete(_) => {}
Stmt::Assign(assignment) => self.create_id(assignment),
Stmt::AugAssign(assignment) => {
self.create_id(assignment);
}
Stmt::AnnAssign(assignment) => self.create_id(assignment),
Stmt::TypeAlias(assignment) => self.create_id(assignment),
Stmt::For(_) => {}
Stmt::While(_) => {}
Stmt::If(_) => {}
Stmt::With(_) => {}
Stmt::Match(_) => {}
Stmt::Raise(_) => {}
Stmt::Try(_) => {}
Stmt::Assert(_) => {}
Stmt::Import(import) => self.create_id(import),
Stmt::ImportFrom(import_from) => self.create_id(import_from),
Stmt::Global(global) => self.create_id(global),
Stmt::Nonlocal(non_local) => self.create_id(non_local),
Stmt::Pass(_) => {}
Stmt::Break(_) => {}
Stmt::Continue(_) => {}
Stmt::IpyEscapeCommand(_) => {}
}
preorder::walk_stmt(self, stmt);
}
fn visit_expr(&mut self, _expr: &'a Expr) {}
fn visit_parameter(&mut self, parameter: &'a Parameter) {
self.create_id(parameter);
preorder::walk_parameter(self, parameter);
}
fn visit_except_handler(&mut self, except_handler: &'a ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.create_id(except_handler);
}
}
preorder::walk_except_handler(self, except_handler);
}
fn visit_with_item(&mut self, with_item: &'a WithItem) {
self.create_id(with_item);
preorder::walk_with_item(self, with_item);
}
fn visit_match_case(&mut self, match_case: &'a MatchCase) {
self.create_id(match_case);
preorder::walk_match_case(self, match_case);
}
fn visit_type_param(&mut self, type_param: &'a TypeParam) {
self.create_id(type_param);
}
}
enum DeferredNode<'a> {
FunctionDefinition(&'a StmtFunctionDef),
ClassDefinition(&'a StmtClassDef),
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct TypedNodeKey<N: AstNode> {
/// The type erased node key.
inner: NodeKey,
_marker: PhantomData<fn() -> N>,
}
impl<N: AstNode> TypedNodeKey<N> {
pub fn from_node(node: &N) -> Self {
let inner = NodeKey {
kind: node.as_any_node_ref().kind(),
range: node.range(),
};
Self {
inner,
_marker: PhantomData,
}
}
pub fn new(node_key: NodeKey) -> Option<Self> {
N::can_cast(node_key.kind).then_some(TypedNodeKey {
inner: node_key,
_marker: PhantomData,
})
}
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<N::Ref<'a>> {
let node_ref = self.inner.resolve(root)?;
Some(N::cast_ref(node_ref).unwrap())
}
pub fn resolve_unwrap<'a>(&self, root: AnyNodeRef<'a>) -> N::Ref<'a> {
self.resolve(root).expect("node should resolve")
}
pub fn erased(&self) -> &NodeKey {
&self.inner
}
}
struct FindNodeKeyVisitor<'a> {
key: NodeKey,
result: Option<AnyNodeRef<'a>>,
}
impl<'a> PreorderVisitor<'a> for FindNodeKeyVisitor<'a> {
fn enter_node(&mut self, node: AnyNodeRef<'a>) -> TraversalSignal {
if self.result.is_some() {
return TraversalSignal::Skip;
}
if node.range() == self.key.range && node.kind() == self.key.kind {
self.result = Some(node);
TraversalSignal::Skip
} else if node.range().contains_range(self.key.range) {
TraversalSignal::Traverse
} else {
TraversalSignal::Skip
}
}
fn visit_body(&mut self, body: &'a [Stmt]) {
// TODO it would be more efficient to use binary search instead of linear
for stmt in body {
if stmt.range().start() > self.key.range.end() {
break;
}
self.visit_stmt(stmt);
}
}
}
// TODO an alternative to this is to have a `NodeId` on each node (in increasing order depending on the position).
// This would allow to reduce the size of this to a u32.
// What would be nice if we could use an `Arc::weak_ref` here but that only works if we use
// `Arc` internally
// TODO: Implement the logic to resolve a node, given a db (and the correct file).
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct NodeKey {
kind: NodeKind,
range: TextRange,
}
impl NodeKey {
pub fn resolve<'a>(&self, root: AnyNodeRef<'a>) -> Option<AnyNodeRef<'a>> {
// We need to do a binary search here. Only traverse into a node if the range is withint the node
let mut visitor = FindNodeKeyVisitor {
key: *self,
result: None,
};
if visitor.enter_node(root) == TraversalSignal::Traverse {
root.visit_preorder(&mut visitor);
}
visitor.result
}
}
/// Marker trait implemented by AST nodes for which we extract the `AstId`.
pub trait HasAstId: AstNode {
fn node_key(&self) -> TypedNodeKey<Self>
where
Self: Sized,
{
TypedNodeKey {
inner: self.syntax_node_key(),
_marker: PhantomData,
}
}
fn syntax_node_key(&self) -> NodeKey {
NodeKey {
kind: self.as_any_node_ref().kind(),
range: self.range(),
}
}
}
impl HasAstId for StmtFunctionDef {}
impl HasAstId for StmtClassDef {}
impl HasAstId for StmtAnnAssign {}
impl HasAstId for StmtAugAssign {}
impl HasAstId for StmtAssign {}
impl HasAstId for StmtTypeAlias {}
impl HasAstId for ModModule {}
impl HasAstId for StmtImport {}
impl HasAstId for StmtImportFrom {}
impl HasAstId for Parameter {}
impl HasAstId for TypeParam {}
impl HasAstId for Stmt {}
impl HasAstId for TypeParamTypeVar {}
impl HasAstId for TypeParamTypeVarTuple {}
impl HasAstId for TypeParamParamSpec {}
impl HasAstId for StmtGlobal {}
impl HasAstId for StmtNonlocal {}
impl HasAstId for ExceptHandlerExceptHandler {}
impl HasAstId for WithItem {}
impl HasAstId for MatchCase {}

View file

@ -0,0 +1,152 @@
use std::fmt::Formatter;
use std::hash::Hash;
use std::sync::atomic::{AtomicUsize, Ordering};
use dashmap::mapref::entry::Entry;
use crate::FxDashMap;
/// Simple key value cache that locks on a per-key level.
pub struct KeyValueCache<K, V> {
map: FxDashMap<K, V>,
statistics: CacheStatistics,
}
impl<K, V> KeyValueCache<K, V>
where
K: Eq + Hash + Clone,
V: Clone,
{
pub fn try_get(&self, key: &K) -> Option<V> {
if let Some(existing) = self.map.get(key) {
self.statistics.hit();
Some(existing.clone())
} else {
self.statistics.miss();
None
}
}
pub fn get<F>(&self, key: &K, compute: F) -> V
where
F: FnOnce(&K) -> V,
{
match self.map.entry(key.clone()) {
Entry::Occupied(cached) => {
self.statistics.hit();
cached.get().clone()
}
Entry::Vacant(vacant) => {
self.statistics.miss();
let value = compute(key);
vacant.insert(value.clone());
value
}
}
}
pub fn set(&mut self, key: K, value: V) {
self.map.insert(key, value);
}
pub fn remove(&mut self, key: &K) -> Option<V> {
self.map.remove(key).map(|(_, value)| value)
}
pub fn clear(&mut self) {
self.map.clear();
self.map.shrink_to_fit();
}
pub fn statistics(&self) -> Option<Statistics> {
self.statistics.to_statistics()
}
}
impl<K, V> Default for KeyValueCache<K, V>
where
K: Eq + Hash,
V: Clone,
{
fn default() -> Self {
Self {
map: FxDashMap::default(),
statistics: CacheStatistics::default(),
}
}
}
impl<K, V> std::fmt::Debug for KeyValueCache<K, V>
where
K: std::fmt::Debug + Eq + Hash,
V: std::fmt::Debug,
{
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let mut debug = f.debug_map();
for entry in &self.map {
debug.entry(&entry.value(), &entry.key());
}
debug.finish()
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Statistics {
pub hits: usize,
pub misses: usize,
}
impl Statistics {
#[allow(clippy::cast_precision_loss)]
pub fn hit_rate(&self) -> Option<f64> {
if self.hits + self.misses == 0 {
return None;
}
Some((self.hits as f64) / (self.hits + self.misses) as f64)
}
}
#[cfg(debug_assertions)]
pub type CacheStatistics = DebugStatistics;
#[cfg(not(debug_assertions))]
pub type CacheStatistics = ReleaseStatistics;
#[derive(Debug, Default)]
pub struct DebugStatistics {
hits: AtomicUsize,
misses: AtomicUsize,
}
impl DebugStatistics {
// TODO figure out appropriate Ordering
pub fn hit(&self) {
self.hits.fetch_add(1, Ordering::SeqCst);
}
pub fn miss(&self) {
self.misses.fetch_add(1, Ordering::SeqCst);
}
pub fn to_statistics(&self) -> Option<Statistics> {
let hits = self.hits.load(Ordering::SeqCst);
let misses = self.misses.load(Ordering::SeqCst);
Some(Statistics { hits, misses })
}
}
#[derive(Debug, Default)]
pub struct ReleaseStatistics;
impl ReleaseStatistics {
#[inline]
pub fn to_statistics(&self) -> Option<Statistics> {
None
}
}

View file

@ -0,0 +1,65 @@
use std::sync::{Arc, Condvar, Mutex};
#[derive(Debug, Default)]
pub struct CancellationSource {
signal: Arc<(Mutex<bool>, Condvar)>,
}
impl CancellationSource {
pub fn new() -> Self {
Self {
signal: Arc::new((Mutex::new(false), Condvar::default())),
}
}
pub fn cancel(&self) {
let (cancelled, condvar) = &*self.signal;
let mut cancelled = cancelled.lock().unwrap();
if *cancelled {
return;
}
*cancelled = true;
condvar.notify_all();
}
pub fn is_cancelled(&self) -> bool {
let (cancelled, _) = &*self.signal;
*cancelled.lock().unwrap()
}
pub fn token(&self) -> CancellationToken {
CancellationToken {
signal: self.signal.clone(),
}
}
}
#[derive(Clone, Debug)]
pub struct CancellationToken {
signal: Arc<(Mutex<bool>, Condvar)>,
}
impl CancellationToken {
/// Returns `true` if cancellation has been requested.
pub fn is_cancelled(&self) -> bool {
let (cancelled, _) = &*self.signal;
*cancelled.lock().unwrap()
}
pub fn wait(&self) {
let (bool, condvar) = &*self.signal;
let lock = condvar
.wait_while(bool.lock().unwrap(), |bool| !*bool)
.unwrap();
debug_assert!(*lock);
drop(lock);
}
}

171
crates/red_knot/src/db.rs Normal file
View file

@ -0,0 +1,171 @@
use std::path::Path;
use std::sync::Arc;
use crate::files::FileId;
use crate::lint::{Diagnostics, LintSyntaxStorage};
use crate::module::{Module, ModuleData, ModuleName, ModuleResolver, ModuleSearchPath};
use crate::parse::{Parsed, ParsedStorage};
use crate::source::{Source, SourceStorage};
use crate::symbols::{SymbolId, SymbolTable, SymbolTablesStorage};
use crate::types::{Type, TypeStore};
pub trait SourceDb {
// queries
fn file_id(&self, path: &std::path::Path) -> FileId;
fn file_path(&self, file_id: FileId) -> Arc<std::path::Path>;
fn source(&self, file_id: FileId) -> Source;
fn parse(&self, file_id: FileId) -> Parsed;
fn lint_syntax(&self, file_id: FileId) -> Diagnostics;
}
pub trait SemanticDb: SourceDb {
// queries
fn resolve_module(&self, name: ModuleName) -> Option<Module>;
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable>;
// mutations
fn path_to_module(&mut self, path: &Path) -> Option<Module>;
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)>;
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>);
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type;
}
pub trait Db: SemanticDb {}
#[derive(Debug, Default)]
pub struct SourceJar {
pub sources: SourceStorage,
pub parsed: ParsedStorage,
pub lint_syntax: LintSyntaxStorage,
}
#[derive(Debug, Default)]
pub struct SemanticJar {
pub module_resolver: ModuleResolver,
pub symbol_tables: SymbolTablesStorage,
pub type_store: TypeStore,
}
/// Gives access to a specific jar in the database.
///
/// Nope, the terminology isn't borrowed from Java but from Salsa <https://salsa-rs.github.io/salsa/>,
/// which is an analogy to storing the salsa in different jars.
///
/// The basic idea is that each crate can define its own jar and the jars can be combined to a single
/// database in the top level crate. Each crate also defines its own `Database` trait. The combination of
/// `Database` trait and the jar allows to write queries in isolation without having to know how they get composed at the upper levels.
///
/// Salsa further defines a `HasIngredient` trait which slices the jar to a specific storage (e.g. a specific cache).
/// We don't need this just jet because we write our queries by hand. We may want a similar trait if we decide
/// to use a macro to generate the queries.
pub trait HasJar<T> {
/// Gives a read-only reference to the jar.
fn jar(&self) -> &T;
/// Gives a mutable reference to the jar.
fn jar_mut(&mut self) -> &mut T;
}
#[cfg(test)]
pub(crate) mod tests {
use crate::db::{HasJar, SourceDb, SourceJar};
use crate::files::{FileId, Files};
use crate::lint::{lint_syntax, Diagnostics};
use crate::module::{
add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData,
ModuleName, ModuleSearchPath,
};
use crate::parse::{parse, Parsed};
use crate::source::{source_text, Source};
use crate::symbols::{symbol_table, SymbolId, SymbolTable};
use crate::types::{infer_symbol_type, Type};
use std::path::Path;
use std::sync::Arc;
use super::{SemanticDb, SemanticJar};
// This can be a partial database used in a single crate for testing.
// It would hold fewer data than the full database.
#[derive(Debug, Default)]
pub(crate) struct TestDb {
files: Files,
source: SourceJar,
semantic: SemanticJar,
}
impl HasJar<SourceJar> for TestDb {
fn jar(&self) -> &SourceJar {
&self.source
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.source
}
}
impl HasJar<SemanticJar> for TestDb {
fn jar(&self) -> &SemanticJar {
&self.semantic
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.semantic
}
}
impl SourceDb for TestDb {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
fn source(&self, file_id: FileId) -> Source {
source_text(self, file_id)
}
fn parse(&self, file_id: FileId) -> Parsed {
parse(self, file_id)
}
fn lint_syntax(&self, file_id: FileId) -> Diagnostics {
lint_syntax(self, file_id)
}
}
impl SemanticDb for TestDb {
fn resolve_module(&self, name: ModuleName) -> Option<Module> {
resolve_module(self, name)
}
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable> {
symbol_table(self, file_id)
}
fn path_to_module(&mut self, path: &Path) -> Option<Module> {
path_to_module(self, path)
}
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
add_module(self, path)
}
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>) {
set_module_search_paths(self, paths);
}
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type {
infer_symbol_type(self, file_id, symbol_id)
}
}
}

View file

@ -0,0 +1,148 @@
use std::fmt::{Debug, Formatter};
use std::hash::{Hash, Hasher};
use std::path::Path;
use std::sync::Arc;
use hashbrown::hash_map::RawEntryMut;
use parking_lot::RwLock;
use rustc_hash::FxHasher;
use ruff_index::{newtype_index, IndexVec};
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub struct FileId;
// TODO we'll need a higher level virtual file system abstraction that allows testing if a file exists
// or retrieving its content (ideally lazily and in a way that the memory can be retained later)
// I suspect that we'll end up with a FileSystem trait and our own Path abstraction.
#[derive(Clone, Default)]
pub struct Files {
inner: Arc<RwLock<FilesInner>>,
}
impl Files {
#[tracing::instrument(level = "trace", skip(path))]
pub fn intern(&self, path: &Path) -> FileId {
self.inner.write().intern(path)
}
pub fn try_get(&self, path: &Path) -> Option<FileId> {
self.inner.read().try_get(path)
}
// TODO Can we avoid using an `Arc` here? salsa can return references for some reason.
pub fn path(&self, id: FileId) -> Arc<Path> {
self.inner.read().path(id)
}
}
impl Debug for Files {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
let files = self.inner.read();
let mut debug = f.debug_map();
for item in files.iter() {
debug.entry(&item.0, &item.1);
}
debug.finish()
}
}
impl PartialEq for Files {
fn eq(&self, other: &Self) -> bool {
self.inner.read().eq(&other.inner.read())
}
}
impl Eq for Files {}
#[derive(Default)]
struct FilesInner {
by_path: Map<FileId, ()>,
// TODO should we use a map here to reclaim the space for removed files?
// TODO I think we should use our own path abstraction here to avoid having to normalize paths
// and dealing with non-utf paths everywhere.
by_id: IndexVec<FileId, Arc<Path>>,
}
impl FilesInner {
/// Inserts the path and returns a new id for it or returns the id if it is an existing path.
// TODO should this accept Path or PathBuf?
pub(crate) fn intern(&mut self, path: &Path) -> FileId {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
let hash = hasher.finish();
let entry = self
.by_path
.raw_entry_mut()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path);
match entry {
RawEntryMut::Occupied(entry) => *entry.key(),
RawEntryMut::Vacant(entry) => {
let id = self.by_id.push(Arc::from(path));
entry.insert_with_hasher(hash, id, (), |_| hash);
id
}
}
}
pub(crate) fn try_get(&self, path: &Path) -> Option<FileId> {
let mut hasher = FxHasher::default();
path.hash(&mut hasher);
let hash = hasher.finish();
Some(
*self
.by_path
.raw_entry()
.from_hash(hash, |existing_file| &*self.by_id[*existing_file] == path)?
.0,
)
}
/// Returns the path for the file with the given id.
pub(crate) fn path(&self, id: FileId) -> Arc<Path> {
self.by_id[id].clone()
}
pub(crate) fn iter(&self) -> impl Iterator<Item = (FileId, Arc<Path>)> + '_ {
self.by_path.keys().map(|id| (*id, self.by_id[*id].clone()))
}
}
impl PartialEq for FilesInner {
fn eq(&self, other: &Self) -> bool {
self.by_id == other.by_id
}
}
impl Eq for FilesInner {}
#[cfg(test)]
mod tests {
use super::*;
use std::path::PathBuf;
#[test]
fn insert_path_twice_same_id() {
let files = Files::default();
let path = PathBuf::from("foo/bar");
let id1 = files.intern(&path);
let id2 = files.intern(&path);
assert_eq!(id1, id2);
}
#[test]
fn insert_different_paths_different_ids() {
let files = Files::default();
let path1 = PathBuf::from("foo/bar");
let path2 = PathBuf::from("foo/bar/baz");
let id1 = files.intern(&path1);
let id2 = files.intern(&path2);
assert_ne!(id1, id2);
}
}

View file

@ -0,0 +1,67 @@
//! Key observations
//!
//! The HIR avoids allocations to large extends by:
//! * Using an arena per node type
//! * using ids and id ranges to reference items.
//!
//! Using separate arena per node type has the advantage that the IDs are relatively stable, because
//! they only change when a node of the same kind has been added or removed. (What's unclear is if that matters or if
//! it still triggers a re-compute because the AST-id in the node has changed).
//!
//! The HIR does not store all details. It mainly stores the *public* interface. There's a reference
//! back to the AST node to get more details.
//!
//!
use crate::ast_ids::{HasAstId, TypedAstId};
use crate::files::FileId;
use std::fmt::Formatter;
use std::hash::{Hash, Hasher};
pub struct HirAstId<N: HasAstId> {
file_id: FileId,
node_id: TypedAstId<N>,
}
impl<N: HasAstId> Copy for HirAstId<N> {}
impl<N: HasAstId> Clone for HirAstId<N> {
fn clone(&self) -> Self {
*self
}
}
impl<N: HasAstId> PartialEq for HirAstId<N> {
fn eq(&self, other: &Self) -> bool {
self.file_id == other.file_id && self.node_id == other.node_id
}
}
impl<N: HasAstId> Eq for HirAstId<N> {}
impl<N: HasAstId> std::fmt::Debug for HirAstId<N> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("HirAstId")
.field("file_id", &self.file_id)
.field("node_id", &self.node_id)
.finish()
}
}
impl<N: HasAstId> Hash for HirAstId<N> {
fn hash<H: Hasher>(&self, state: &mut H) {
self.file_id.hash(state);
self.node_id.hash(state);
}
}
impl<N: HasAstId> HirAstId<N> {
pub fn upcast<M: HasAstId>(self) -> HirAstId<M>
where
N: Into<M>,
{
HirAstId {
file_id: self.file_id,
node_id: self.node_id.upcast(),
}
}
}

View file

@ -0,0 +1,556 @@
use std::ops::{Index, Range};
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast::visitor::preorder;
use ruff_python_ast::visitor::preorder::PreorderVisitor;
use ruff_python_ast::{
Decorator, ExceptHandler, ExceptHandlerExceptHandler, Expr, MatchCase, ModModule, Stmt,
StmtAnnAssign, StmtAssign, StmtClassDef, StmtFunctionDef, StmtGlobal, StmtImport,
StmtImportFrom, StmtNonlocal, StmtTypeAlias, TypeParam, TypeParamParamSpec, TypeParamTypeVar,
TypeParamTypeVarTuple, WithItem,
};
use crate::ast_ids::{AstIds, HasAstId};
use crate::files::FileId;
use crate::hir::HirAstId;
use crate::Name;
#[newtype_index]
pub struct FunctionId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Function {
ast_id: HirAstId<StmtFunctionDef>,
name: Name,
parameters: Range<ParameterId>,
type_parameters: Range<TypeParameterId>, // TODO: type_parameters, return expression, decorators
}
#[newtype_index]
pub struct ParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Parameter {
kind: ParameterKind,
name: Name,
default: Option<()>, // TODO use expression HIR
ast_id: HirAstId<ruff_python_ast::Parameter>,
}
// TODO or should `Parameter` be an enum?
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ParameterKind {
PositionalOnly,
Arguments,
Vararg,
KeywordOnly,
Kwarg,
}
#[newtype_index]
pub struct ClassId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Class {
name: Name,
ast_id: HirAstId<StmtClassDef>,
// TODO type parameters, inheritance, decorators, members
}
#[newtype_index]
pub struct AssignmentId;
// This can have more than one name...
// but that means we can't implement `name()` on `ModuleItem`.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Assignment {
// TODO: Handle multiple names / targets
name: Name,
ast_id: HirAstId<StmtAssign>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct AnnotatedAssignment {
name: Name,
ast_id: HirAstId<StmtAnnAssign>,
}
#[newtype_index]
pub struct AnnotatedAssignmentId;
#[newtype_index]
pub struct TypeAliasId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeAlias {
name: Name,
ast_id: HirAstId<StmtTypeAlias>,
parameters: Range<TypeParameterId>,
}
#[newtype_index]
pub struct TypeParameterId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub enum TypeParameter {
TypeVar(TypeParameterTypeVar),
ParamSpec(TypeParameterParamSpec),
TypeVarTuple(TypeParameterTypeVarTuple),
}
impl TypeParameter {
pub fn ast_id(&self) -> HirAstId<TypeParam> {
match self {
TypeParameter::TypeVar(type_var) => type_var.ast_id.upcast(),
TypeParameter::ParamSpec(param_spec) => param_spec.ast_id.upcast(),
TypeParameter::TypeVarTuple(type_var_tuple) => type_var_tuple.ast_id.upcast(),
}
}
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVar {
name: Name,
ast_id: HirAstId<TypeParamTypeVar>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterParamSpec {
name: Name,
ast_id: HirAstId<TypeParamParamSpec>,
}
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct TypeParameterTypeVarTuple {
name: Name,
ast_id: HirAstId<TypeParamTypeVarTuple>,
}
#[newtype_index]
pub struct GlobalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct Global {
// TODO track names
ast_id: HirAstId<StmtGlobal>,
}
#[newtype_index]
pub struct NonLocalId;
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct NonLocal {
// TODO track names
ast_id: HirAstId<StmtNonlocal>,
}
pub enum DefinitionId {
Function(FunctionId),
Parameter(ParameterId),
Class(ClassId),
Assignment(AssignmentId),
AnnotatedAssignment(AnnotatedAssignmentId),
Global(GlobalId),
NonLocal(NonLocalId),
TypeParameter(TypeParameterId),
TypeAlias(TypeAlias),
}
pub enum DefinitionItem {
Function(Function),
Parameter(Parameter),
Class(Class),
Assignment(Assignment),
AnnotatedAssignment(AnnotatedAssignment),
Global(Global),
NonLocal(NonLocal),
TypeParameter(TypeParameter),
TypeAlias(TypeAlias),
}
// The closest is rust-analyzers item-tree. It only represents "Items" which make the public interface of a module
// (it excludes any other statement or expressions). rust-analyzer uses it as the main input to the name resolution
// algorithm
// > It is the input to the name resolution algorithm, as well as to the queries defined in `adt.rs`,
// > `data.rs`, and most things in `attr.rs`.
//
// > One important purpose of this layer is to provide an "invalidation barrier" for incremental
// > computations: when typing inside an item body, the `ItemTree` of the modified file is typically
// > unaffected, so we don't have to recompute name resolution results or item data (see `data.rs`).
//
// I haven't fully figured this out but I think that this composes the "public" interface of a module?
// But maybe that's too optimistic.
//
//
#[derive(Debug, Clone, Default, Eq, PartialEq)]
pub struct Definitions {
functions: IndexVec<FunctionId, Function>,
parameters: IndexVec<ParameterId, Parameter>,
classes: IndexVec<ClassId, Class>,
assignments: IndexVec<AssignmentId, Assignment>,
annotated_assignments: IndexVec<AnnotatedAssignmentId, AnnotatedAssignment>,
type_aliases: IndexVec<TypeAliasId, TypeAlias>,
type_parameters: IndexVec<TypeParameterId, TypeParameter>,
globals: IndexVec<GlobalId, Global>,
non_locals: IndexVec<NonLocalId, NonLocal>,
}
impl Definitions {
pub fn from_module(module: &ModModule, ast_ids: &AstIds, file_id: FileId) -> Self {
let mut visitor = DefinitionsVisitor {
definitions: Definitions::default(),
ast_ids,
file_id,
};
visitor.visit_body(&module.body);
visitor.definitions
}
}
impl Index<FunctionId> for Definitions {
type Output = Function;
fn index(&self, index: FunctionId) -> &Self::Output {
&self.functions[index]
}
}
impl Index<ParameterId> for Definitions {
type Output = Parameter;
fn index(&self, index: ParameterId) -> &Self::Output {
&self.parameters[index]
}
}
impl Index<ClassId> for Definitions {
type Output = Class;
fn index(&self, index: ClassId) -> &Self::Output {
&self.classes[index]
}
}
impl Index<AssignmentId> for Definitions {
type Output = Assignment;
fn index(&self, index: AssignmentId) -> &Self::Output {
&self.assignments[index]
}
}
impl Index<AnnotatedAssignmentId> for Definitions {
type Output = AnnotatedAssignment;
fn index(&self, index: AnnotatedAssignmentId) -> &Self::Output {
&self.annotated_assignments[index]
}
}
impl Index<TypeAliasId> for Definitions {
type Output = TypeAlias;
fn index(&self, index: TypeAliasId) -> &Self::Output {
&self.type_aliases[index]
}
}
impl Index<GlobalId> for Definitions {
type Output = Global;
fn index(&self, index: GlobalId) -> &Self::Output {
&self.globals[index]
}
}
impl Index<NonLocalId> for Definitions {
type Output = NonLocal;
fn index(&self, index: NonLocalId) -> &Self::Output {
&self.non_locals[index]
}
}
impl Index<TypeParameterId> for Definitions {
type Output = TypeParameter;
fn index(&self, index: TypeParameterId) -> &Self::Output {
&self.type_parameters[index]
}
}
struct DefinitionsVisitor<'a> {
definitions: Definitions,
ast_ids: &'a AstIds,
file_id: FileId,
}
impl DefinitionsVisitor<'_> {
fn ast_id<N: HasAstId>(&self, node: &N) -> HirAstId<N> {
HirAstId {
file_id: self.file_id,
node_id: self.ast_ids.ast_id(node),
}
}
fn lower_function_def(&mut self, function: &StmtFunctionDef) -> FunctionId {
let name = Name::new(&function.name);
let first_type_parameter_id = self.definitions.type_parameters.next_index();
let mut last_type_parameter_id = first_type_parameter_id;
if let Some(type_params) = &function.type_params {
for parameter in &type_params.type_params {
let id = self.lower_type_parameter(parameter);
last_type_parameter_id = id;
}
}
let parameters = self.lower_parameters(&function.parameters);
self.definitions.functions.push(Function {
name,
ast_id: self.ast_id(function),
parameters,
type_parameters: first_type_parameter_id..last_type_parameter_id,
})
}
fn lower_parameters(&mut self, parameters: &ruff_python_ast::Parameters) -> Range<ParameterId> {
let first_parameter_id = self.definitions.parameters.next_index();
let mut last_parameter_id = first_parameter_id;
for parameter in &parameters.posonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::PositionalOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(vararg) = &parameters.vararg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::Vararg,
name: Name::new(&vararg.name),
default: None,
ast_id: self.ast_id(vararg),
});
}
for parameter in &parameters.kwonlyargs {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&parameter.parameter.name),
default: None,
ast_id: self.ast_id(&parameter.parameter),
});
}
if let Some(kwarg) = &parameters.kwarg {
last_parameter_id = self.definitions.parameters.push(Parameter {
kind: ParameterKind::KeywordOnly,
name: Name::new(&kwarg.name),
default: None,
ast_id: self.ast_id(kwarg),
});
}
first_parameter_id..last_parameter_id
}
fn lower_class_def(&mut self, class: &StmtClassDef) -> ClassId {
let name = Name::new(&class.name);
self.definitions.classes.push(Class {
name,
ast_id: self.ast_id(class),
})
}
fn lower_assignment(&mut self, assignment: &StmtAssign) {
// FIXME handle multiple names
if let Some(Expr::Name(name)) = assignment.targets.first() {
self.definitions.assignments.push(Assignment {
name: Name::new(&name.id),
ast_id: self.ast_id(assignment),
});
}
}
fn lower_annotated_assignment(&mut self, annotated_assignment: &StmtAnnAssign) {
if let Expr::Name(name) = &*annotated_assignment.target {
self.definitions
.annotated_assignments
.push(AnnotatedAssignment {
name: Name::new(&name.id),
ast_id: self.ast_id(annotated_assignment),
});
}
}
fn lower_type_alias(&mut self, type_alias: &StmtTypeAlias) {
if let Expr::Name(name) = &*type_alias.name {
let name = Name::new(&name.id);
let lower_parameters_id = self.definitions.type_parameters.next_index();
let mut last_parameter_id = lower_parameters_id;
if let Some(type_params) = &type_alias.type_params {
for type_parameter in &type_params.type_params {
let id = self.lower_type_parameter(type_parameter);
last_parameter_id = id;
}
}
self.definitions.type_aliases.push(TypeAlias {
name,
ast_id: self.ast_id(type_alias),
parameters: lower_parameters_id..last_parameter_id,
});
}
}
fn lower_type_parameter(&mut self, type_parameter: &TypeParam) -> TypeParameterId {
match type_parameter {
TypeParam::TypeVar(type_var) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVar(TypeParameterTypeVar {
name: Name::new(&type_var.name),
ast_id: self.ast_id(type_var),
}))
}
TypeParam::ParamSpec(param_spec) => {
self.definitions
.type_parameters
.push(TypeParameter::ParamSpec(TypeParameterParamSpec {
name: Name::new(&param_spec.name),
ast_id: self.ast_id(param_spec),
}))
}
TypeParam::TypeVarTuple(type_var_tuple) => {
self.definitions
.type_parameters
.push(TypeParameter::TypeVarTuple(TypeParameterTypeVarTuple {
name: Name::new(&type_var_tuple.name),
ast_id: self.ast_id(type_var_tuple),
}))
}
}
}
fn lower_import(&mut self, _import: &StmtImport) {
// TODO
}
fn lower_import_from(&mut self, _import_from: &StmtImportFrom) {
// TODO
}
fn lower_global(&mut self, global: &StmtGlobal) -> GlobalId {
self.definitions.globals.push(Global {
ast_id: self.ast_id(global),
})
}
fn lower_non_local(&mut self, non_local: &StmtNonlocal) -> NonLocalId {
self.definitions.non_locals.push(NonLocal {
ast_id: self.ast_id(non_local),
})
}
fn lower_except_handler(&mut self, _except_handler: &ExceptHandlerExceptHandler) {
// TODO
}
fn lower_with_item(&mut self, _with_item: &WithItem) {
// TODO
}
fn lower_match_case(&mut self, _match_case: &MatchCase) {
// TODO
}
}
impl PreorderVisitor<'_> for DefinitionsVisitor<'_> {
fn visit_stmt(&mut self, stmt: &Stmt) {
match stmt {
// Definition statements
Stmt::FunctionDef(definition) => {
self.lower_function_def(definition);
self.visit_body(&definition.body);
}
Stmt::ClassDef(definition) => {
self.lower_class_def(definition);
self.visit_body(&definition.body);
}
Stmt::Assign(assignment) => {
self.lower_assignment(assignment);
}
Stmt::AnnAssign(annotated_assignment) => {
self.lower_annotated_assignment(annotated_assignment);
}
Stmt::TypeAlias(type_alias) => {
self.lower_type_alias(type_alias);
}
Stmt::Import(import) => self.lower_import(import),
Stmt::ImportFrom(import_from) => self.lower_import_from(import_from),
Stmt::Global(global) => {
self.lower_global(global);
}
Stmt::Nonlocal(non_local) => {
self.lower_non_local(non_local);
}
// Visit the compound statement bodies because they can contain other definitions.
Stmt::For(_)
| Stmt::While(_)
| Stmt::If(_)
| Stmt::With(_)
| Stmt::Match(_)
| Stmt::Try(_) => {
preorder::walk_stmt(self, stmt);
}
// Skip over simple statements because they can't contain any other definitions.
Stmt::Return(_)
| Stmt::Delete(_)
| Stmt::AugAssign(_)
| Stmt::Raise(_)
| Stmt::Assert(_)
| Stmt::Expr(_)
| Stmt::Pass(_)
| Stmt::Break(_)
| Stmt::Continue(_)
| Stmt::IpyEscapeCommand(_) => {
// No op
}
}
}
fn visit_expr(&mut self, _: &'_ Expr) {}
fn visit_decorator(&mut self, _decorator: &'_ Decorator) {}
fn visit_except_handler(&mut self, except_handler: &'_ ExceptHandler) {
match except_handler {
ExceptHandler::ExceptHandler(except_handler) => {
self.lower_except_handler(except_handler);
}
}
}
fn visit_with_item(&mut self, with_item: &'_ WithItem) {
self.lower_with_item(with_item);
}
fn visit_match_case(&mut self, match_case: &'_ MatchCase) {
self.lower_match_case(match_case);
self.visit_body(&match_case.body);
}
}

View file

@ -0,0 +1,83 @@
use std::hash::BuildHasherDefault;
use std::path::{Path, PathBuf};
use rustc_hash::{FxHashSet, FxHasher};
use crate::files::FileId;
pub mod ast_ids;
pub mod cache;
pub mod cancellation;
pub mod db;
pub mod files;
pub mod hir;
pub mod lint;
pub mod module;
mod parse;
pub mod program;
pub mod source;
mod symbols;
mod types;
pub mod watch;
pub(crate) type FxDashMap<K, V> = dashmap::DashMap<K, V, BuildHasherDefault<FxHasher>>;
#[allow(unused)]
pub(crate) type FxDashSet<V> = dashmap::DashSet<V, BuildHasherDefault<FxHasher>>;
pub(crate) type FxIndexSet<V> = indexmap::set::IndexSet<V, BuildHasherDefault<FxHasher>>;
#[derive(Debug)]
pub struct Workspace {
/// TODO this should be a resolved path. We should probably use a newtype wrapper that guarantees that
/// PATH is a UTF-8 path and is normalized.
root: PathBuf,
/// The files that are open in the workspace.
///
/// * Editor: The files that are actively being edited in the editor (the user has a tab open with the file).
/// * CLI: The resolved files passed as arguments to the CLI.
open_files: FxHashSet<FileId>,
}
impl Workspace {
pub fn new(root: PathBuf) -> Self {
Self {
root,
open_files: FxHashSet::default(),
}
}
pub fn root(&self) -> &Path {
self.root.as_path()
}
// TODO having the content in workspace feels wrong.
pub fn open_file(&mut self, file_id: FileId) {
self.open_files.insert(file_id);
}
pub fn close_file(&mut self, file_id: FileId) {
self.open_files.remove(&file_id);
}
// TODO introduce an `OpenFile` type instead of using an anonymous tuple.
pub fn open_files(&self) -> impl Iterator<Item = FileId> + '_ {
self.open_files.iter().copied()
}
pub fn is_file_open(&self, file_id: FileId) -> bool {
self.open_files.contains(&file_id)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub struct Name(smol_str::SmolStr);
impl Name {
#[inline]
pub fn new(name: &str) -> Self {
Self(smol_str::SmolStr::new(name))
}
pub fn as_str(&self) -> &str {
self.0.as_str()
}
}

124
crates/red_knot/src/lint.rs Normal file
View file

@ -0,0 +1,124 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_python_ast::visitor::Visitor;
use ruff_python_ast::StringLiteral;
use crate::cache::KeyValueCache;
use crate::db::{HasJar, SourceDb, SourceJar};
use crate::files::FileId;
pub(crate) fn lint_syntax<Db>(db: &Db, file_id: FileId) -> Diagnostics
where
Db: SourceDb + HasJar<SourceJar>,
{
let storage = &db.jar().lint_syntax;
storage.get(&file_id, |file_id| {
let mut diagnostics = Vec::new();
let source = db.source(*file_id);
lint_lines(source.text(), &mut diagnostics);
let parsed = db.parse(*file_id);
if parsed.errors().is_empty() {
let ast = parsed.ast();
let mut visitor = SyntaxLintVisitor {
diagnostics,
source: source.text(),
};
visitor.visit_body(&ast.body);
diagnostics = visitor.diagnostics;
} else {
diagnostics.extend(parsed.errors().iter().map(std::string::ToString::to_string));
}
Diagnostics::from(diagnostics)
})
}
pub(crate) fn lint_lines(source: &str, diagnostics: &mut Vec<String>) {
for (line_number, line) in source.lines().enumerate() {
if line.len() < 88 {
continue;
}
let char_count = line.chars().count();
if char_count > 88 {
diagnostics.push(format!(
"Line {} is too long ({} characters)",
line_number + 1,
char_count
));
}
}
}
#[derive(Debug)]
struct SyntaxLintVisitor<'a> {
diagnostics: Vec<String>,
source: &'a str,
}
impl Visitor<'_> for SyntaxLintVisitor<'_> {
fn visit_string_literal(&mut self, string_literal: &'_ StringLiteral) {
// A very naive implementation of use double quotes
let text = &self.source[string_literal.range];
if text.starts_with('\'') {
self.diagnostics
.push("Use double quotes for strings".to_string());
}
}
}
#[derive(Debug, Clone)]
pub enum Diagnostics {
Empty,
List(Arc<Vec<String>>),
}
impl Diagnostics {
pub fn as_slice(&self) -> &[String] {
match self {
Diagnostics::Empty => &[],
Diagnostics::List(list) => list.as_slice(),
}
}
}
impl Deref for Diagnostics {
type Target = [String];
fn deref(&self) -> &Self::Target {
self.as_slice()
}
}
impl From<Vec<String>> for Diagnostics {
fn from(value: Vec<String>) -> Self {
if value.is_empty() {
Diagnostics::Empty
} else {
Diagnostics::List(Arc::new(value))
}
}
}
#[derive(Default, Debug)]
pub struct LintSyntaxStorage(KeyValueCache<FileId, Diagnostics>);
impl Deref for LintSyntaxStorage {
type Target = KeyValueCache<FileId, Diagnostics>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for LintSyntaxStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

399
crates/red_knot/src/main.rs Normal file
View file

@ -0,0 +1,399 @@
use std::collections::hash_map::Entry;
use std::num::NonZeroUsize;
use std::path::Path;
use std::sync::atomic::{AtomicUsize, Ordering};
use std::sync::{Arc, Mutex};
use rustc_hash::FxHashMap;
use tracing::subscriber::Interest;
use tracing::{Level, Metadata};
use tracing_subscriber::filter::LevelFilter;
use tracing_subscriber::layer::{Context, Filter, SubscriberExt};
use tracing_subscriber::{Layer, Registry};
use tracing_tree::time::Uptime;
use red_knot::cancellation::CancellationSource;
use red_knot::db::{HasJar, SourceDb, SourceJar};
use red_knot::files::FileId;
use red_knot::module::{ModuleSearchPath, ModuleSearchPathKind};
use red_knot::program::{FileChange, FileChangeKind, Program};
use red_knot::watch::FileWatcher;
use red_knot::{files, Workspace};
#[allow(
clippy::dbg_macro,
clippy::print_stdout,
clippy::unnecessary_wraps,
clippy::print_stderr
)]
fn main() -> anyhow::Result<()> {
setup_tracing();
let arguments: Vec<_> = std::env::args().collect();
if arguments.len() < 2 {
eprintln!("Usage: red_knot <path>");
return Err(anyhow::anyhow!("Invalid arguments"));
}
let entry_point = Path::new(&arguments[1]);
if !entry_point.exists() {
eprintln!("The entry point does not exist.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
if !entry_point.is_file() {
eprintln!("The entry point is not a file.");
return Err(anyhow::anyhow!("Invalid arguments"));
}
let files = files::Files::default();
let workspace_folder = entry_point.parent().unwrap();
let mut workspace = Workspace::new(workspace_folder.to_path_buf());
let workspace_search_path = ModuleSearchPath::new(
workspace.root().to_path_buf(),
ModuleSearchPathKind::FirstParty,
);
let entry_id = files.intern(entry_point);
let mut program = Program::new(vec![workspace_search_path], files.clone());
workspace.open_file(entry_id);
let (sender, receiver) = crossbeam_channel::bounded(
std::thread::available_parallelism()
.map(NonZeroUsize::get)
.unwrap_or(50)
.max(4), // TODO: Both these numbers are very arbitrary. Pick sensible defaults.
);
// Listen to Ctrl+C and abort the watch mode.
let abort_sender = Mutex::new(Some(sender.clone()));
ctrlc::set_handler(move || {
let mut lock = abort_sender.lock().unwrap();
if let Some(sender) = lock.take() {
sender.send(Message::Exit).unwrap();
}
})?;
// Watch for file changes and re-trigger the analysis.
let file_changes_sender = sender.clone();
let mut file_watcher = FileWatcher::new(
move |changes| {
file_changes_sender
.send(Message::FileChanges(changes))
.unwrap();
},
files.clone(),
)?;
file_watcher.watch_folder(workspace_folder)?;
let files_to_check = vec![entry_id];
// Main loop that runs until the user exits the program
// Runs the analysis for each changed file. Cancels the analysis if a new change is detected.
loop {
let changes = {
tracing::trace!("Main Loop: Tick");
// Token to cancel the analysis if a new change is detected.
let run_cancellation_token_source = CancellationSource::new();
let run_cancellation_token = run_cancellation_token_source.token();
// Tracks the number of pending analysis runs.
let pending_analysis = Arc::new(AtomicUsize::new(0));
// Take read-only references that are copy and Send.
let program = &program;
let workspace = &workspace;
let receiver = receiver.clone();
let started_analysis = pending_analysis.clone();
// Orchestration task. Ideally, we would run this on main but we should start it as soon as possible so that
// we avoid scheduling tasks when we already know that we're about to exit or cancel the analysis because of a file change.
// This uses `std::thread::spawn` because we don't want it to run inside of the thread pool
// or this code deadlocks when using a thread pool of the size 1.
let orchestration_handle = std::thread::spawn(move || {
fn consume_pending_messages(
receiver: &crossbeam_channel::Receiver<Message>,
mut aggregated_changes: AggregatedChanges,
) -> NextTickCommand {
loop {
// Consume possibly incoming file change messages before running a new analysis, but don't wait for more than 100ms.
crossbeam_channel::select! {
recv(receiver) -> message => {
match message {
Ok(Message::Exit) => {
return NextTickCommand::Exit;
}
Ok(Message::FileChanges(file_changes)) => {
aggregated_changes.extend(file_changes);
}
Ok(Message::AnalysisCancelled | Message::AnalysisCompleted(_)) => {
unreachable!(
"All analysis should have been completed at this time"
);
},
Err(_) => {
// There are no more senders, no point in waiting for more messages
break;
}
}
},
default(std::time::Duration::from_millis(100)) => {
break;
}
}
}
NextTickCommand::FileChanges(aggregated_changes)
}
let mut diagnostics = Vec::new();
let mut aggregated_changes = AggregatedChanges::default();
for message in &receiver {
match message {
Message::AnalysisCompleted(file_diagnostics) => {
diagnostics.extend_from_slice(&file_diagnostics);
if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 {
// Analysis completed, print the diagnostics.
dbg!(&diagnostics);
}
}
Message::AnalysisCancelled => {
if pending_analysis.fetch_sub(1, Ordering::SeqCst) == 1 {
return consume_pending_messages(&receiver, aggregated_changes);
}
}
Message::Exit => {
run_cancellation_token_source.cancel();
// Don't consume any outstanding messages because we're exiting anyway.
return NextTickCommand::Exit;
}
Message::FileChanges(changes) => {
// Request cancellation, but wait until all analysis tasks have completed to
// avoid stale messages in the next main loop.
run_cancellation_token_source.cancel();
aggregated_changes.extend(changes);
if pending_analysis.load(Ordering::SeqCst) == 0 {
return consume_pending_messages(&receiver, aggregated_changes);
}
}
}
}
// This can be reached if there's no Ctrl+C and no file watcher handler.
// In that case, assume that we don't run in watch mode and exit.
NextTickCommand::Exit
});
// Star the analysis task on the thread pool and wait until they complete.
rayon::scope(|scope| {
for file in &files_to_check {
let cancellation_token = run_cancellation_token.clone();
if cancellation_token.is_cancelled() {
break;
}
let sender = sender.clone();
started_analysis.fetch_add(1, Ordering::SeqCst);
// TODO: How do we allow the host to control the number of threads used?
// Or should we just assume that each host implements its own main loop,
// I don't think that's entirely unreasonable but we should avoid
// having different main loops per host AND command (e.g. format vs check vs lint)
scope.spawn(move |_| {
if cancellation_token.is_cancelled() {
tracing::trace!("Exit analysis because cancellation was requested.");
sender.send(Message::AnalysisCancelled).unwrap();
return;
}
// TODO schedule the dependencies.
let mut diagnostics = Vec::new();
if workspace.is_file_open(*file) {
diagnostics.extend_from_slice(&program.lint_syntax(*file));
}
sender
.send(Message::AnalysisCompleted(diagnostics))
.unwrap();
});
}
});
// Wait for the orchestration task to complete. This either returns the file changes
// or instructs the main loop to exit.
match orchestration_handle.join().unwrap() {
NextTickCommand::FileChanges(changes) => changes,
NextTickCommand::Exit => {
break;
}
}
};
// We have a mutable reference here and can perform all necessary invalidations.
program.apply_changes(changes.iter());
}
let source_jar: &SourceJar = program.jar();
dbg!(source_jar.parsed.statistics());
dbg!(source_jar.sources.statistics());
Ok(())
}
enum Message {
AnalysisCompleted(Vec<String>),
AnalysisCancelled,
Exit,
FileChanges(Vec<FileChange>),
}
#[derive(Default, Debug)]
struct AggregatedChanges {
changes: FxHashMap<FileId, FileChangeKind>,
}
impl AggregatedChanges {
fn add(&mut self, change: FileChange) {
match self.changes.entry(change.file_id()) {
Entry::Occupied(mut entry) => {
let merged = entry.get_mut();
match (merged, change.kind()) {
(FileChangeKind::Created, FileChangeKind::Deleted) => {
// Deletion after creations means that ruff never saw the file.
entry.remove();
}
(FileChangeKind::Created, FileChangeKind::Modified) => {
// No-op, for ruff, modifying a file that it doesn't yet know that it exists is still considered a creation.
}
(FileChangeKind::Modified, FileChangeKind::Created) => {
// Uhh, that should probably not happen. Continue considering it a modification.
}
(FileChangeKind::Modified, FileChangeKind::Deleted) => {
*entry.get_mut() = FileChangeKind::Deleted;
}
(FileChangeKind::Deleted, FileChangeKind::Created) => {
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Deleted, FileChangeKind::Modified) => {
// That's weird, but let's consider it a modification.
*entry.get_mut() = FileChangeKind::Modified;
}
(FileChangeKind::Created, FileChangeKind::Created)
| (FileChangeKind::Modified, FileChangeKind::Modified)
| (FileChangeKind::Deleted, FileChangeKind::Deleted) => {
// No-op transitions. Some of them should be impossible but we handle them anyway.
}
}
}
Entry::Vacant(entry) => {
entry.insert(change.kind());
}
}
}
fn extend<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileChange>,
I::IntoIter: ExactSizeIterator,
{
let iter = changes.into_iter();
self.changes.reserve(iter.len());
for change in iter {
self.add(change);
}
}
fn iter(&self) -> impl Iterator<Item = FileChange> + '_ {
self.changes
.iter()
.map(|(id, kind)| FileChange::new(*id, *kind))
}
}
enum NextTickCommand {
/// Exit the main loop in the next tick
Exit,
/// Apply the given changes in the next main loop tick.
FileChanges(AggregatedChanges),
}
fn setup_tracing() {
let subscriber = Registry::default().with(
tracing_tree::HierarchicalLayer::default()
.with_indent_lines(true)
.with_indent_amount(2)
.with_bracketed_fields(true)
.with_targets(true)
.with_writer(|| Box::new(std::io::stderr()))
.with_timer(Uptime::default())
.with_filter(LoggingFilter {
trace_level: Level::TRACE,
}),
);
tracing::subscriber::set_global_default(subscriber).unwrap();
}
struct LoggingFilter {
trace_level: Level,
}
impl LoggingFilter {
fn is_enabled(&self, meta: &Metadata<'_>) -> bool {
let filter = if meta.target().starts_with("red_knot") || meta.target().starts_with("ruff") {
self.trace_level
} else {
Level::INFO
};
meta.level() <= &filter
}
}
impl<S> Filter<S> for LoggingFilter {
fn enabled(&self, meta: &Metadata<'_>, _cx: &Context<'_, S>) -> bool {
self.is_enabled(meta)
}
fn callsite_enabled(&self, meta: &'static Metadata<'static>) -> Interest {
if self.is_enabled(meta) {
Interest::always()
} else {
Interest::never()
}
}
fn max_level_hint(&self) -> Option<LevelFilter> {
Some(LevelFilter::from_level(self.trace_level))
}
}

View file

@ -0,0 +1,911 @@
use std::fmt::Formatter;
use std::path::{Path, PathBuf};
use std::sync::atomic::AtomicU32;
use std::sync::Arc;
use dashmap::mapref::entry::Entry;
use crate::db::{HasJar, SemanticDb, SemanticJar};
use crate::files::FileId;
use crate::FxDashMap;
/// ID uniquely identifying a module.
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub struct Module(u32);
impl Module {
pub fn name<Db>(&self, db: &Db) -> ModuleName
where
Db: HasJar<SemanticJar>,
{
let modules = &db.jar().module_resolver;
modules.modules.get(self).unwrap().name.clone()
}
pub fn path<Db>(&self, db: &Db) -> ModulePath
where
Db: HasJar<SemanticJar>,
{
let modules = &db.jar().module_resolver;
modules.modules.get(self).unwrap().path.clone()
}
}
/// A module name, e.g. `foo.bar`.
///
/// Always normalized to the absolute form (never a relative module name).
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
pub struct ModuleName(smol_str::SmolStr);
impl ModuleName {
pub fn new(name: &str) -> Self {
debug_assert!(!name.is_empty());
Self(smol_str::SmolStr::new(name))
}
pub fn relative(_dots: u32, name: &str, _to: &Path) -> Self {
// FIXME: Take `to` and `dots` into account.
Self(smol_str::SmolStr::new(name))
}
pub fn from_relative_path(path: &Path) -> Option<Self> {
let path = if path.ends_with("__init__.py") || path.ends_with("__init__.pyi") {
path.parent()?
} else {
path
};
let name = if let Some(parent) = path.parent() {
let mut name = String::with_capacity(path.as_os_str().len());
for component in parent.components() {
name.push_str(component.as_os_str().to_str()?);
name.push('.');
}
// SAFETY: Unwrap is safe here or `parent` would have returned `None`.
name.push_str(path.file_stem().unwrap().to_str()?);
smol_str::SmolStr::from(name)
} else {
smol_str::SmolStr::new(path.file_stem()?.to_str()?)
};
Some(Self(name))
}
pub fn components(&self) -> impl DoubleEndedIterator<Item = &str> {
self.0.split('.')
}
pub fn parent(&self) -> Option<ModuleName> {
let (_, parent) = self.0.rsplit_once('.')?;
Some(Self(smol_str::SmolStr::new(parent)))
}
pub fn starts_with(&self, other: &ModuleName) -> bool {
self.0.starts_with(other.0.as_str())
}
pub fn as_str(&self) -> &str {
&self.0
}
}
impl std::fmt::Display for ModuleName {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.write_str(&self.0)
}
}
/// A search path in which to search modules.
/// Corresponds to a path in [`sys.path`](https://docs.python.org/3/library/sys_path_init.html) at runtime.
///
/// Cloning a search path is cheap because it's an `Arc`.
#[derive(Clone, PartialEq, Eq)]
pub struct ModuleSearchPath {
inner: Arc<ModuleSearchPathInner>,
}
impl ModuleSearchPath {
pub fn new(path: PathBuf, kind: ModuleSearchPathKind) -> Self {
Self {
inner: Arc::new(ModuleSearchPathInner { path, kind }),
}
}
pub fn kind(&self) -> ModuleSearchPathKind {
self.inner.kind
}
pub fn path(&self) -> &Path {
&self.inner.path
}
}
impl std::fmt::Debug for ModuleSearchPath {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
self.inner.fmt(f)
}
}
#[derive(Debug, Eq, PartialEq)]
struct ModuleSearchPathInner {
path: PathBuf,
kind: ModuleSearchPathKind,
}
#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
pub enum ModuleSearchPathKind {
// Project dependency
FirstParty,
// e.g. site packages
ThirdParty,
// e.g. built-in modules, typeshed
StandardLibrary,
}
#[derive(Debug, Eq, PartialEq)]
pub struct ModuleData {
name: ModuleName,
path: ModulePath,
}
//////////////////////////////////////////////////////
// Queries
//////////////////////////////////////////////////////
/// Resolves a module name to a module id
/// TODO: This would not work with Salsa because `ModuleName` isn't an ingredient and, therefore, cannot be used as part of a query.
/// For this to work with salsa, it would be necessary to intern all `ModuleName`s.
#[tracing::instrument(level = "trace", skip(db))]
pub fn resolve_module<Db>(db: &Db, name: ModuleName) -> Option<Module>
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let jar = db.jar();
let modules = &jar.module_resolver;
let entry = modules.by_name.entry(name.clone());
match entry {
Entry::Occupied(entry) => Some(*entry.get()),
Entry::Vacant(entry) => {
let (root_path, absolute_path) = resolve_name(&name, &modules.search_paths)?;
let normalized = absolute_path.canonicalize().ok()?;
let file_id = db.file_id(&normalized);
let path = ModulePath::new(root_path.clone(), file_id);
let id = Module(
modules
.next_module_id
.fetch_add(1, std::sync::atomic::Ordering::Relaxed),
);
modules
.modules
.insert(id, Arc::from(ModuleData { name, path }));
// A path can map to multiple modules because of symlinks:
// ```
// foo.py
// bar.py -> foo.py
// ```
// Here, both `foo` and `bar` resolve to the same module but through different paths.
// That's why we need to insert the absolute path and not the normalized path here.
modules.by_path.insert(absolute_path, id);
entry.insert_entry(id);
Some(id)
}
}
}
//////////////////////////////////////////////////////
// Mutations
//////////////////////////////////////////////////////
/// Changes the module search paths to `search_paths`.
pub fn set_module_search_paths<Db>(db: &mut Db, search_paths: Vec<ModuleSearchPath>)
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let jar = db.jar_mut();
jar.module_resolver = ModuleResolver::new(search_paths);
}
/// Resolves the module id for the file with the given id.
///
/// Returns `None` if the file is not a module in `sys.path`.
pub fn file_to_module<Db>(db: &mut Db, file: FileId) -> Option<Module>
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let path = db.file_path(file);
path_to_module(db, &path)
}
/// Resolves the module id for the given path.
///
/// Returns `None` if the path is not a module in `sys.path`.
// WARNING!: It's important that this method takes `&mut self`. Without, the implementation is prone to race conditions.
// Note: This won't work with salsa because `Path` is not an ingredient.
pub fn path_to_module<Db>(db: &mut Db, path: &Path) -> Option<Module>
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let jar = db.jar_mut();
let modules = &mut jar.module_resolver;
debug_assert!(path.is_absolute());
if let Some(existing) = modules.by_path.get(path) {
return Some(*existing);
}
let root_path = modules
.search_paths
.iter()
.find(|root| path.starts_with(root.path()))?
.clone();
// SAFETY: `strip_prefix` is guaranteed to succeed because we search the root that is a prefix of the path.
let relative_path = path.strip_prefix(root_path.path()).unwrap();
let module_name = ModuleName::from_relative_path(relative_path)?;
// Resolve the module name to see if Python would resolve the name to the same path.
// If it doesn't, then that means that multiple modules have the same in different
// root paths, but that the module corresponding to the past path is in a lower priority path,
// in which case we ignore it.
let module_id = resolve_module(db, module_name)?;
// Note: Guaranteed to be race-free because we're holding a mutable reference of `self` here.
let module_path = module_id.path(db);
if module_path.root() == &root_path {
let normalized = path.canonicalize().ok()?;
let interned_normalized = db.file_id(&normalized);
if interned_normalized != module_path.file() {
// This path is for a module with the same name but with a different precedence. For example:
// ```
// src/foo.py
// src/foo/__init__.py
// ```
// The module name of `src/foo.py` is `foo`, but the module loaded by Python is `src/foo/__init__.py`.
// That means we need to ignore `src/foo.py` even though it resolves to the same module name.
return None;
}
// Path has been inserted by `resolved`
Some(module_id)
} else {
// This path is for a module with the same name but in a module search path with a lower priority.
// Ignore it.
None
}
}
/// Adds a module to the resolver.
///
/// Returns `None` if the path doesn't resolve to a module.
///
/// Returns `Some` with the id of the module and the ids of the modules that need re-resolving
/// because they were part of a namespace package and might now resolve differently.
/// Note: This won't work with salsa because `Path` is not an ingredient.
pub fn add_module<Db>(db: &mut Db, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)>
where
Db: SemanticDb + HasJar<SemanticJar>,
{
// No locking is required because we're holding a mutable reference to `modules`.
// TODO This needs tests
// Note: Intentionally by-pass caching here. Module should not be in the cache yet.
let module = path_to_module(db, path)?;
// The code below is to handle the addition of `__init__.py` files.
// When an `__init__.py` file is added, we need to remove all modules that are part of the same package.
// For example, an `__init__.py` is added to `foo`, we need to remove `foo.bar`, `foo.baz`, etc.
// because they were namespace packages before and could have been from different search paths.
let Some(filename) = path.file_name() else {
return Some((module, Vec::new()));
};
if !matches!(filename.to_str(), Some("__init__.py" | "__init__.pyi")) {
return Some((module, Vec::new()));
}
let Some(parent_name) = module.name(db).parent() else {
return Some((module, Vec::new()));
};
let mut to_remove = Vec::new();
let jar = db.jar_mut();
let modules = &mut jar.module_resolver;
modules.by_path.retain(|_, id| {
if modules
.modules
.get(id)
.unwrap()
.name
.starts_with(&parent_name)
{
to_remove.push(*id);
false
} else {
true
}
});
// TODO remove need for this vec
let mut removed = Vec::with_capacity(to_remove.len());
for id in &to_remove {
removed.push(modules.remove_module_by_id(*id));
}
Some((module, removed))
}
#[derive(Default)]
pub struct ModuleResolver {
/// The search paths where modules are located (and searched). Corresponds to `sys.path` at runtime.
search_paths: Vec<ModuleSearchPath>,
// Locking: Locking is done by acquiring a (write) lock on `by_name`. This is because `by_name` is the primary
// lookup method. Acquiring locks in any other ordering can result in deadlocks.
/// Resolves a module name to it's module id.
by_name: FxDashMap<ModuleName, Module>,
/// All known modules, indexed by the module id.
modules: FxDashMap<Module, Arc<ModuleData>>,
/// Lookup from absolute path to module.
/// The same module might be reachable from different paths when symlinks are involved.
by_path: FxDashMap<PathBuf, Module>,
next_module_id: AtomicU32,
}
impl ModuleResolver {
pub fn new(search_paths: Vec<ModuleSearchPath>) -> Self {
Self {
search_paths,
modules: FxDashMap::default(),
by_name: FxDashMap::default(),
by_path: FxDashMap::default(),
next_module_id: AtomicU32::new(0),
}
}
pub fn remove_module(&mut self, path: &Path) {
// No locking is required because we're holding a mutable reference to `self`.
let Some((_, id)) = self.by_path.remove(path) else {
return;
};
self.remove_module_by_id(id);
}
fn remove_module_by_id(&mut self, id: Module) -> Arc<ModuleData> {
let (_, module) = self.modules.remove(&id).unwrap();
self.by_name.remove(&module.name).unwrap();
// It's possible that multiple paths map to the same id. Search all other paths referencing the same module id.
self.by_path.retain(|_, current_id| *current_id != id);
module
}
}
#[allow(clippy::missing_fields_in_debug)]
impl std::fmt::Debug for ModuleResolver {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
f.debug_struct("ModuleResolver")
.field("search_paths", &self.search_paths)
.field("modules", &self.by_name)
.finish()
}
}
/// The resolved path of a module.
///
/// It should be highly likely that the file still exists when accessing but it isn't 100% guaranteed
/// because the file could have been deleted between resolving the module name and accessing it.
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct ModulePath {
root: ModuleSearchPath,
file_id: FileId,
}
impl ModulePath {
pub fn new(root: ModuleSearchPath, file_id: FileId) -> Self {
Self { root, file_id }
}
pub fn root(&self) -> &ModuleSearchPath {
&self.root
}
pub fn file(&self) -> FileId {
self.file_id
}
}
fn resolve_name(
name: &ModuleName,
search_paths: &[ModuleSearchPath],
) -> Option<(ModuleSearchPath, PathBuf)> {
for search_path in search_paths {
let mut components = name.components();
let module_name = components.next_back()?;
match resolve_package(search_path, components) {
Ok(resolved_package) => {
let mut package_path = resolved_package.path;
package_path.push(module_name);
// Must be a `__init__.pyi` or `__init__.py` or it isn't a package.
if package_path.is_dir() {
package_path.push("__init__");
}
// TODO Implement full https://peps.python.org/pep-0561/#type-checker-module-resolution-order resolution
let stub = package_path.with_extension("pyi");
if stub.is_file() {
return Some((search_path.clone(), stub));
}
let module = package_path.with_extension("py");
if module.is_file() {
return Some((search_path.clone(), module));
}
// For regular packages, don't search the next search path. All files of that
// package must be in the same location
if resolved_package.kind.is_regular_package() {
return None;
}
}
Err(parent_kind) => {
if parent_kind.is_regular_package() {
// For regular packages, don't search the next search path.
return None;
}
}
}
}
None
}
fn resolve_package<'a, I>(
module_search_path: &ModuleSearchPath,
components: I,
) -> Result<ResolvedPackage, PackageKind>
where
I: Iterator<Item = &'a str>,
{
let mut package_path = module_search_path.path().to_path_buf();
// `true` if inside a folder that is a namespace package (has no `__init__.py`).
// Namespace packages are special because they can be spread across multiple search paths.
// https://peps.python.org/pep-0420/
let mut in_namespace_package = false;
// `true` if resolving a sub-package. For example, `true` when resolving `bar` of `foo.bar`.
let mut in_sub_package = false;
// For `foo.bar.baz`, test that `foo` and `baz` both contain a `__init__.py`.
for folder in components {
package_path.push(folder);
let has_init_py = package_path.join("__init__.py").is_file()
|| package_path.join("__init__.pyi").is_file();
if has_init_py {
in_namespace_package = false;
} else if package_path.is_dir() {
// A directory without an `__init__.py` is a namespace package, continue with the next folder.
in_namespace_package = true;
} else if in_namespace_package {
// Package not found but it is part of a namespace package.
return Err(PackageKind::Namespace);
} else if in_sub_package {
// A regular sub package wasn't found.
return Err(PackageKind::Regular);
} else {
// We couldn't find `foo` for `foo.bar.baz`, search the next search path.
return Err(PackageKind::Root);
}
in_sub_package = true;
}
let kind = if in_namespace_package {
PackageKind::Namespace
} else if in_sub_package {
PackageKind::Regular
} else {
PackageKind::Root
};
Ok(ResolvedPackage {
kind,
path: package_path,
})
}
#[derive(Debug)]
struct ResolvedPackage {
path: PathBuf,
kind: PackageKind,
}
#[derive(Copy, Clone, Eq, PartialEq, Debug)]
enum PackageKind {
/// A root package or module. E.g. `foo` in `foo.bar.baz` or just `foo`.
Root,
/// A regular sub-package where the parent contains an `__init__.py`. For example `bar` in `foo.bar` when the `foo` directory contains an `__init__.py`.
Regular,
/// A sub-package in a namespace package. A namespace package is a package without an `__init__.py`.
///
/// For example, `bar` in `foo.bar` if the `foo` directory contains no `__init__.py`.
Namespace,
}
impl PackageKind {
const fn is_regular_package(self) -> bool {
matches!(self, PackageKind::Regular)
}
}
#[cfg(test)]
mod tests {
use crate::db::tests::TestDb;
use crate::db::{SemanticDb, SourceDb};
use crate::module::{ModuleName, ModuleSearchPath, ModuleSearchPathKind};
struct TestCase {
temp_dir: tempfile::TempDir,
db: TestDb,
src: ModuleSearchPath,
site_packages: ModuleSearchPath,
}
fn create_resolver() -> std::io::Result<TestCase> {
let temp_dir = tempfile::tempdir()?;
let src = temp_dir.path().join("src");
let site_packages = temp_dir.path().join("site_packages");
std::fs::create_dir(&src)?;
std::fs::create_dir(&site_packages)?;
let src = ModuleSearchPath::new(src.canonicalize()?, ModuleSearchPathKind::FirstParty);
let site_packages = ModuleSearchPath::new(
site_packages.canonicalize()?,
ModuleSearchPathKind::ThirdParty,
);
let roots = vec![src.clone(), site_packages.clone()];
let mut db = TestDb::default();
db.set_module_search_paths(roots);
Ok(TestCase {
temp_dir,
db,
src,
site_packages,
})
}
#[test]
fn first_party_module() -> std::io::Result<()> {
let TestCase {
mut db,
src,
temp_dir: _temp_dir,
..
} = create_resolver()?;
let foo_path = src.path().join("foo.py");
std::fs::write(&foo_path, "print('Hello, world!')")?;
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
assert_eq!(Some(foo_module), db.resolve_module(ModuleName::new("foo")));
assert_eq!(ModuleName::new("foo"), foo_module.name(&db));
assert_eq!(&src, foo_module.path(&db).root());
assert_eq!(&foo_path, &*db.file_path(foo_module.path(&db).file()));
assert_eq!(Some(foo_module), db.path_to_module(&foo_path));
Ok(())
}
#[test]
fn resolve_package() -> std::io::Result<()> {
let TestCase {
src,
mut db,
temp_dir: _temp_dir,
..
} = create_resolver()?;
let foo_dir = src.path().join("foo");
let foo_path = foo_dir.join("__init__.py");
std::fs::create_dir(&foo_dir)?;
std::fs::write(&foo_path, "print('Hello, world!')")?;
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
assert_eq!(ModuleName::new("foo"), foo_module.name(&db));
assert_eq!(&src, foo_module.path(&db).root());
assert_eq!(&foo_path, &*db.file_path(foo_module.path(&db).file()));
assert_eq!(Some(foo_module), db.path_to_module(&foo_path));
// Resolving by directory doesn't resolve to the init file.
assert_eq!(None, db.path_to_module(&foo_dir));
Ok(())
}
#[test]
fn package_priority_over_module() -> std::io::Result<()> {
let TestCase {
mut db,
temp_dir: _temp_dir,
src,
..
} = create_resolver()?;
let foo_dir = src.path().join("foo");
let foo_init = foo_dir.join("__init__.py");
std::fs::create_dir(&foo_dir)?;
std::fs::write(&foo_init, "print('Hello, world!')")?;
let foo_py = src.path().join("foo.py");
std::fs::write(&foo_py, "print('Hello, world!')")?;
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
assert_eq!(&src, foo_module.path(&db).root());
assert_eq!(&foo_init, &*db.file_path(foo_module.path(&db).file()));
assert_eq!(Some(foo_module), db.path_to_module(&foo_init));
assert_eq!(None, db.path_to_module(&foo_py));
Ok(())
}
#[test]
fn typing_stub_over_module() -> std::io::Result<()> {
let TestCase {
mut db,
src,
temp_dir: _temp_dir,
..
} = create_resolver()?;
let foo_stub = src.path().join("foo.pyi");
let foo_py = src.path().join("foo.py");
std::fs::write(&foo_stub, "x: int")?;
std::fs::write(&foo_py, "print('Hello, world!')")?;
let foo = db.resolve_module(ModuleName::new("foo")).unwrap();
assert_eq!(&src, foo.path(&db).root());
assert_eq!(&foo_stub, &*db.file_path(foo.path(&db).file()));
assert_eq!(Some(foo), db.path_to_module(&foo_stub));
assert_eq!(None, db.path_to_module(&foo_py));
Ok(())
}
#[test]
fn sub_packages() -> std::io::Result<()> {
let TestCase {
mut db,
src,
temp_dir: _temp_dir,
..
} = create_resolver()?;
let foo = src.path().join("foo");
let bar = foo.join("bar");
let baz = bar.join("baz.py");
std::fs::create_dir_all(&bar)?;
std::fs::write(foo.join("__init__.py"), "")?;
std::fs::write(bar.join("__init__.py"), "")?;
std::fs::write(&baz, "print('Hello, world!')")?;
let baz_module = db.resolve_module(ModuleName::new("foo.bar.baz")).unwrap();
assert_eq!(&src, baz_module.path(&db).root());
assert_eq!(&baz, &*db.file_path(baz_module.path(&db).file()));
assert_eq!(Some(baz_module), db.path_to_module(&baz));
Ok(())
}
#[test]
fn namespace_package() -> std::io::Result<()> {
let TestCase {
mut db,
temp_dir: _,
src,
site_packages,
} = create_resolver()?;
// From [PEP420](https://peps.python.org/pep-0420/#nested-namespace-packages).
// But uses `src` for `project1` and `site_packages2` for `project2`.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.path().join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
std::fs::create_dir_all(child1)?;
std::fs::write(&one, "print('Hello, world!')")?;
let parent2 = site_packages.path().join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
std::fs::create_dir_all(&child2)?;
std::fs::write(&two, "print('Hello, world!')")?;
let one_module = db
.resolve_module(ModuleName::new("parent.child.one"))
.unwrap();
assert_eq!(Some(one_module), db.path_to_module(&one));
let two_module = db
.resolve_module(ModuleName::new("parent.child.two"))
.unwrap();
assert_eq!(Some(two_module), db.path_to_module(&two));
Ok(())
}
#[test]
fn regular_package_in_namespace_package() -> std::io::Result<()> {
let TestCase {
mut db,
temp_dir: _,
src,
site_packages,
} = create_resolver()?;
// Adopted test case from the [PEP420 examples](https://peps.python.org/pep-0420/#nested-namespace-packages).
// The `src/parent/child` package is a regular package. Therefore, `site_packages/parent/child/two.py` should not be resolved.
// ```
// src
// parent
// child
// one.py
// site_packages
// parent
// child
// two.py
// ```
let parent1 = src.path().join("parent");
let child1 = parent1.join("child");
let one = child1.join("one.py");
std::fs::create_dir_all(&child1)?;
std::fs::write(child1.join("__init__.py"), "print('Hello, world!')")?;
std::fs::write(&one, "print('Hello, world!')")?;
let parent2 = site_packages.path().join("parent");
let child2 = parent2.join("child");
let two = child2.join("two.py");
std::fs::create_dir_all(&child2)?;
std::fs::write(two, "print('Hello, world!')")?;
let one_module = db
.resolve_module(ModuleName::new("parent.child.one"))
.unwrap();
assert_eq!(Some(one_module), db.path_to_module(&one));
assert_eq!(None, db.resolve_module(ModuleName::new("parent.child.two")));
Ok(())
}
#[test]
fn module_search_path_priority() -> std::io::Result<()> {
let TestCase {
mut db,
src,
site_packages,
temp_dir: _temp_dir,
} = create_resolver()?;
let foo_src = src.path().join("foo.py");
let foo_site_packages = site_packages.path().join("foo.py");
std::fs::write(&foo_src, "")?;
std::fs::write(&foo_site_packages, "")?;
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
assert_eq!(&src, foo_module.path(&db).root());
assert_eq!(&foo_src, &*db.file_path(foo_module.path(&db).file()));
assert_eq!(Some(foo_module), db.path_to_module(&foo_src));
assert_eq!(None, db.path_to_module(&foo_site_packages));
Ok(())
}
#[test]
#[cfg(target_family = "unix")]
fn symlink() -> std::io::Result<()> {
let TestCase {
mut db,
src,
temp_dir: _temp_dir,
..
} = create_resolver()?;
let foo = src.path().join("foo.py");
let bar = src.path().join("bar.py");
std::fs::write(&foo, "")?;
std::os::unix::fs::symlink(&foo, &bar)?;
let foo_module = db.resolve_module(ModuleName::new("foo")).unwrap();
let bar_module = db.resolve_module(ModuleName::new("bar")).unwrap();
assert_ne!(foo_module, bar_module);
assert_eq!(&src, foo_module.path(&db).root());
assert_eq!(&foo, &*db.file_path(foo_module.path(&db).file()));
// Bar has a different name but it should point to the same file.
assert_eq!(&src, bar_module.path(&db).root());
assert_eq!(foo_module.path(&db).file(), bar_module.path(&db).file());
assert_eq!(&foo, &*db.file_path(bar_module.path(&db).file()));
assert_eq!(Some(foo_module), db.path_to_module(&foo));
assert_eq!(Some(bar_module), db.path_to_module(&bar));
Ok(())
}
}

View file

@ -0,0 +1,95 @@
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use ruff_python_ast as ast;
use ruff_python_parser::{Mode, ParseError};
use ruff_text_size::{Ranged, TextRange};
use crate::cache::KeyValueCache;
use crate::db::{HasJar, SourceDb, SourceJar};
use crate::files::FileId;
#[derive(Debug, Clone, PartialEq)]
pub struct Parsed {
inner: Arc<ParsedInner>,
}
#[derive(Debug, PartialEq)]
struct ParsedInner {
ast: ast::ModModule,
errors: Vec<ParseError>,
}
impl Parsed {
fn new(ast: ast::ModModule, errors: Vec<ParseError>) -> Self {
Self {
inner: Arc::new(ParsedInner { ast, errors }),
}
}
pub(crate) fn from_text(text: &str) -> Self {
let result = ruff_python_parser::parse(text, Mode::Module);
let (module, errors) = match result {
Ok(ast::Mod::Module(module)) => (module, vec![]),
Ok(ast::Mod::Expression(expression)) => (
ast::ModModule {
range: expression.range(),
body: vec![ast::Stmt::Expr(ast::StmtExpr {
range: expression.range(),
value: expression.body,
})],
},
vec![],
),
Err(errors) => (
ast::ModModule {
range: TextRange::default(),
body: Vec::new(),
},
vec![errors],
),
};
Parsed::new(module, errors)
}
pub fn ast(&self) -> &ast::ModModule {
&self.inner.ast
}
pub fn errors(&self) -> &[ParseError] {
&self.inner.errors
}
}
#[tracing::instrument(level = "trace", skip(db))]
pub(crate) fn parse<Db>(db: &Db, file_id: FileId) -> Parsed
where
Db: SourceDb + HasJar<SourceJar>,
{
let parsed = db.jar();
parsed.parsed.get(&file_id, |file_id| {
let source = db.source(*file_id);
Parsed::from_text(source.text())
})
}
#[derive(Debug, Default)]
pub struct ParsedStorage(KeyValueCache<FileId, Parsed>);
impl Deref for ParsedStorage {
type Target = KeyValueCache<FileId, Parsed>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for ParsedStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View file

@ -0,0 +1,154 @@
use std::path::Path;
use std::sync::Arc;
use crate::db::{Db, HasJar, SemanticDb, SemanticJar, SourceDb, SourceJar};
use crate::files::{FileId, Files};
use crate::lint::{lint_syntax, Diagnostics, LintSyntaxStorage};
use crate::module::{
add_module, path_to_module, resolve_module, set_module_search_paths, Module, ModuleData,
ModuleName, ModuleResolver, ModuleSearchPath,
};
use crate::parse::{parse, Parsed, ParsedStorage};
use crate::source::{source_text, Source, SourceStorage};
use crate::symbols::{symbol_table, SymbolId, SymbolTable, SymbolTablesStorage};
use crate::types::{infer_symbol_type, Type, TypeStore};
#[derive(Debug)]
pub struct Program {
files: Files,
source: SourceJar,
semantic: SemanticJar,
}
impl Program {
pub fn new(module_search_paths: Vec<ModuleSearchPath>, files: Files) -> Self {
Self {
source: SourceJar {
sources: SourceStorage::default(),
parsed: ParsedStorage::default(),
lint_syntax: LintSyntaxStorage::default(),
},
semantic: SemanticJar {
module_resolver: ModuleResolver::new(module_search_paths),
symbol_tables: SymbolTablesStorage::default(),
type_store: TypeStore::default(),
},
files,
}
}
pub fn apply_changes<I>(&mut self, changes: I)
where
I: IntoIterator<Item = FileChange>,
{
for change in changes {
self.semantic
.module_resolver
.remove_module(&self.file_path(change.id));
self.semantic.symbol_tables.remove(&change.id);
self.source.sources.remove(&change.id);
self.source.parsed.remove(&change.id);
self.source.lint_syntax.remove(&change.id);
// TODO: remove all dependent modules as well
self.semantic.type_store.remove_module(change.id);
}
}
}
impl SourceDb for Program {
fn file_id(&self, path: &Path) -> FileId {
self.files.intern(path)
}
fn file_path(&self, file_id: FileId) -> Arc<Path> {
self.files.path(file_id)
}
fn source(&self, file_id: FileId) -> Source {
source_text(self, file_id)
}
fn parse(&self, file_id: FileId) -> Parsed {
parse(self, file_id)
}
fn lint_syntax(&self, file_id: FileId) -> Diagnostics {
lint_syntax(self, file_id)
}
}
impl SemanticDb for Program {
fn resolve_module(&self, name: ModuleName) -> Option<Module> {
resolve_module(self, name)
}
fn symbol_table(&self, file_id: FileId) -> Arc<SymbolTable> {
symbol_table(self, file_id)
}
// Mutations
fn path_to_module(&mut self, path: &Path) -> Option<Module> {
path_to_module(self, path)
}
fn add_module(&mut self, path: &Path) -> Option<(Module, Vec<Arc<ModuleData>>)> {
add_module(self, path)
}
fn set_module_search_paths(&mut self, paths: Vec<ModuleSearchPath>) {
set_module_search_paths(self, paths);
}
fn infer_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId) -> Type {
infer_symbol_type(self, file_id, symbol_id)
}
}
impl Db for Program {}
impl HasJar<SourceJar> for Program {
fn jar(&self) -> &SourceJar {
&self.source
}
fn jar_mut(&mut self) -> &mut SourceJar {
&mut self.source
}
}
impl HasJar<SemanticJar> for Program {
fn jar(&self) -> &SemanticJar {
&self.semantic
}
fn jar_mut(&mut self) -> &mut SemanticJar {
&mut self.semantic
}
}
#[derive(Copy, Clone, Debug)]
pub struct FileChange {
id: FileId,
kind: FileChangeKind,
}
impl FileChange {
pub fn new(file_id: FileId, kind: FileChangeKind) -> Self {
Self { id: file_id, kind }
}
pub fn file_id(&self) -> FileId {
self.id
}
pub fn kind(&self) -> FileChangeKind {
self.kind
}
}
#[derive(Copy, Clone, Debug, Eq, PartialEq)]
pub enum FileChangeKind {
Created,
Modified,
Deleted,
}

View file

@ -0,0 +1,98 @@
use crate::cache::KeyValueCache;
use crate::db::{HasJar, SourceDb, SourceJar};
use ruff_notebook::Notebook;
use ruff_python_ast::PySourceType;
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use crate::files::FileId;
#[tracing::instrument(level = "trace", skip(db))]
pub(crate) fn source_text<Db>(db: &Db, file_id: FileId) -> Source
where
Db: SourceDb + HasJar<SourceJar>,
{
let sources = &db.jar().sources;
sources.get(&file_id, |file_id| {
tracing::trace!("Reading source text for file_id={:?}.", file_id);
let path = db.file_path(*file_id);
let source_text = std::fs::read_to_string(&path).unwrap_or_else(|err| {
tracing::error!("Failed to read file '{path:?}: {err}'. Falling back to empty text");
String::new()
});
let python_ty = PySourceType::from(&path);
let kind = match python_ty {
PySourceType::Python => {
SourceKind::Python(Arc::from(source_text))
}
PySourceType::Stub => SourceKind::Stub(Arc::from(source_text)),
PySourceType::Ipynb => {
let notebook = Notebook::from_source_code(&source_text).unwrap_or_else(|err| {
// TODO should this be changed to never fail?
// or should we instead add a diagnostic somewhere? But what would we return in this case?
tracing::error!(
"Failed to parse notebook '{path:?}: {err}'. Falling back to an empty notebook"
);
Notebook::from_source_code("").unwrap()
});
SourceKind::IpyNotebook(Arc::new(notebook))
}
};
Source { kind }
})
}
#[derive(Debug, Clone, PartialEq)]
pub enum SourceKind {
Python(Arc<str>),
Stub(Arc<str>),
IpyNotebook(Arc<Notebook>),
}
#[derive(Debug, Clone, PartialEq)]
pub struct Source {
kind: SourceKind,
}
impl Source {
pub fn python<T: Into<Arc<str>>>(source: T) -> Self {
Self {
kind: SourceKind::Python(source.into()),
}
}
pub fn kind(&self) -> &SourceKind {
&self.kind
}
pub fn text(&self) -> &str {
match &self.kind {
SourceKind::Python(text) => text,
SourceKind::Stub(text) => text,
SourceKind::IpyNotebook(notebook) => notebook.source_code(),
}
}
}
#[derive(Debug, Default)]
pub struct SourceStorage(pub(crate) KeyValueCache<FileId, Source>);
impl Deref for SourceStorage {
type Target = KeyValueCache<FileId, Source>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SourceStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}

View file

@ -0,0 +1,765 @@
#![allow(dead_code)]
use std::hash::{Hash, Hasher};
use std::iter::{Copied, DoubleEndedIterator, FusedIterator};
use std::ops::{Deref, DerefMut};
use std::sync::Arc;
use hashbrown::hash_map::{Keys, RawEntryMut};
use rustc_hash::{FxHashMap, FxHasher};
use ruff_index::{newtype_index, IndexVec};
use ruff_python_ast as ast;
use ruff_python_ast::visitor::preorder::PreorderVisitor;
use crate::ast_ids::TypedNodeKey;
use crate::cache::KeyValueCache;
use crate::db::{HasJar, SemanticDb, SemanticJar};
use crate::files::FileId;
use crate::Name;
#[allow(unreachable_pub)]
#[tracing::instrument(level = "trace", skip(db))]
pub fn symbol_table<Db>(db: &Db, file_id: FileId) -> Arc<SymbolTable>
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let jar = db.jar();
jar.symbol_tables.get(&file_id, |_| {
let parsed = db.parse(file_id);
Arc::from(SymbolTable::from_ast(parsed.ast()))
})
}
type Map<K, V> = hashbrown::HashMap<K, V, ()>;
#[newtype_index]
pub(crate) struct ScopeId;
impl ScopeId {
pub(crate) fn scope(self, table: &SymbolTable) -> &Scope {
&table.scopes_by_id[self]
}
}
#[newtype_index]
pub struct SymbolId;
impl SymbolId {
pub(crate) fn symbol(self, table: &SymbolTable) -> &Symbol {
&table.symbols_by_id[self]
}
}
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) enum ScopeKind {
Module,
Annotation,
Class,
Function,
}
#[derive(Debug)]
pub(crate) struct Scope {
name: Name,
kind: ScopeKind,
child_scopes: Vec<ScopeId>,
// symbol IDs, hashed by symbol name
symbols_by_name: Map<SymbolId, ()>,
}
impl Scope {
pub(crate) fn name(&self) -> &str {
self.name.as_str()
}
pub(crate) fn kind(&self) -> ScopeKind {
self.kind
}
}
#[derive(Debug)]
pub(crate) struct Symbol {
name: Name,
}
impl Symbol {
pub(crate) fn name(&self) -> &str {
self.name.as_str()
}
}
// TODO storing TypedNodeKey for definitions means we have to search to find them again in the AST;
// this is at best O(log n). If looking up definitions is a bottleneck we should look for
// alternatives here.
#[derive(Debug)]
pub(crate) enum Definition {
// For the import cases, we don't need reference to any arbitrary AST subtrees (annotations,
// RHS), and referencing just the import statement node is imprecise (a single import statement
// can assign many symbols, we'd have to re-search for the one we care about), so we just copy
// the small amount of information we need from the AST.
Import(ImportDefinition),
ImportFrom(ImportFromDefinition),
ClassDef(TypedNodeKey<ast::StmtClassDef>),
FunctionDef(TypedNodeKey<ast::StmtFunctionDef>),
Assignment(TypedNodeKey<ast::StmtAssign>),
AnnotatedAssignment(TypedNodeKey<ast::StmtAnnAssign>),
// TODO with statements, except handlers, function args...
}
#[derive(Debug)]
pub(crate) struct ImportDefinition {
pub(crate) module: String,
}
#[derive(Debug)]
pub(crate) struct ImportFromDefinition {
pub(crate) module: Option<String>,
pub(crate) name: String,
pub(crate) level: u32,
}
/// Table of all symbols in all scopes for a module.
#[derive(Debug)]
pub struct SymbolTable {
scopes_by_id: IndexVec<ScopeId, Scope>,
symbols_by_id: IndexVec<SymbolId, Symbol>,
defs: FxHashMap<SymbolId, Vec<Definition>>,
}
impl SymbolTable {
pub(crate) fn from_ast(module: &ast::ModModule) -> Self {
let root_scope_id = SymbolTable::root_scope_id();
let mut builder = SymbolTableBuilder {
table: SymbolTable::new(),
scopes: vec![root_scope_id],
};
builder.visit_body(&module.body);
builder.table
}
pub(crate) fn new() -> Self {
let mut table = SymbolTable {
scopes_by_id: IndexVec::new(),
symbols_by_id: IndexVec::new(),
defs: FxHashMap::default(),
};
table.scopes_by_id.push(Scope {
name: Name::new("<module>"),
kind: ScopeKind::Module,
child_scopes: Vec::new(),
symbols_by_name: Map::default(),
});
table
}
pub(crate) const fn root_scope_id() -> ScopeId {
ScopeId::from_usize(0)
}
pub(crate) fn root_scope(&self) -> &Scope {
&self.scopes_by_id[SymbolTable::root_scope_id()]
}
pub(crate) fn symbol_ids_for_scope(&self, scope_id: ScopeId) -> Copied<Keys<SymbolId, ()>> {
self.scopes_by_id[scope_id].symbols_by_name.keys().copied()
}
pub(crate) fn symbols_for_scope(
&self,
scope_id: ScopeId,
) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
SymbolIterator {
table: self,
ids: self.symbol_ids_for_scope(scope_id),
}
}
pub(crate) fn root_symbol_ids(&self) -> Copied<Keys<SymbolId, ()>> {
self.symbol_ids_for_scope(SymbolTable::root_scope_id())
}
pub(crate) fn root_symbols(&self) -> SymbolIterator<Copied<Keys<SymbolId, ()>>> {
self.symbols_for_scope(SymbolTable::root_scope_id())
}
pub(crate) fn child_scope_ids_of(&self, scope_id: ScopeId) -> &[ScopeId] {
&self.scopes_by_id[scope_id].child_scopes
}
pub(crate) fn child_scopes_of(&self, scope_id: ScopeId) -> ScopeIterator<&[ScopeId]> {
ScopeIterator {
table: self,
ids: self.child_scope_ids_of(scope_id),
}
}
pub(crate) fn root_child_scope_ids(&self) -> &[ScopeId] {
self.child_scope_ids_of(SymbolTable::root_scope_id())
}
pub(crate) fn root_child_scopes(&self) -> ScopeIterator<&[ScopeId]> {
self.child_scopes_of(SymbolTable::root_scope_id())
}
pub(crate) fn symbol_id_by_name(&self, scope_id: ScopeId, name: &str) -> Option<SymbolId> {
let scope = &self.scopes_by_id[scope_id];
let hash = SymbolTable::hash_name(name);
let name = Name::new(name);
scope
.symbols_by_name
.raw_entry()
.from_hash(hash, |symid| self.symbols_by_id[*symid].name == name)
.map(|(symbol_id, ())| *symbol_id)
}
pub(crate) fn symbol_by_name(&self, scope_id: ScopeId, name: &str) -> Option<&Symbol> {
Some(&self.symbols_by_id[self.symbol_id_by_name(scope_id, name)?])
}
pub(crate) fn root_symbol_id_by_name(&self, name: &str) -> Option<SymbolId> {
self.symbol_id_by_name(SymbolTable::root_scope_id(), name)
}
pub(crate) fn root_symbol_by_name(&self, name: &str) -> Option<&Symbol> {
self.symbol_by_name(SymbolTable::root_scope_id(), name)
}
pub(crate) fn defs(&self, symbol_id: SymbolId) -> &[Definition] {
self.defs
.get(&symbol_id)
.map(std::vec::Vec::as_slice)
.unwrap_or_default()
}
fn add_symbol_to_scope(&mut self, scope_id: ScopeId, name: &str) -> SymbolId {
let hash = SymbolTable::hash_name(name);
let scope = &mut self.scopes_by_id[scope_id];
let name = Name::new(name);
let entry = scope
.symbols_by_name
.raw_entry_mut()
.from_hash(hash, |existing| self.symbols_by_id[*existing].name == name);
match entry {
RawEntryMut::Occupied(entry) => *entry.key(),
RawEntryMut::Vacant(entry) => {
let id = self.symbols_by_id.push(Symbol { name });
entry.insert_with_hasher(hash, id, (), |_| hash);
id
}
}
}
fn add_child_scope(
&mut self,
parent_scope_id: ScopeId,
name: &str,
kind: ScopeKind,
) -> ScopeId {
let new_scope_id = self.scopes_by_id.push(Scope {
name: Name::new(name),
kind,
child_scopes: Vec::new(),
symbols_by_name: Map::default(),
});
let parent_scope = &mut self.scopes_by_id[parent_scope_id];
parent_scope.child_scopes.push(new_scope_id);
new_scope_id
}
fn hash_name(name: &str) -> u64 {
let mut hasher = FxHasher::default();
name.hash(&mut hasher);
hasher.finish()
}
}
pub(crate) struct SymbolIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
impl<'a, I> Iterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId>,
{
type Item = &'a Symbol;
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some(&self.table.symbols_by_id[id])
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for SymbolIterator<'a, I> where
I: Iterator<Item = SymbolId> + FusedIterator
{
}
impl<'a, I> DoubleEndedIterator for SymbolIterator<'a, I>
where
I: Iterator<Item = SymbolId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some(&self.table.symbols_by_id[id])
}
}
pub(crate) struct ScopeIterator<'a, I> {
table: &'a SymbolTable,
ids: I,
}
impl<'a, I> Iterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId>,
{
type Item = &'a Scope;
fn next(&mut self) -> Option<Self::Item> {
let id = self.ids.next()?;
Some(&self.table.scopes_by_id[id])
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.ids.size_hint()
}
}
impl<'a, I> FusedIterator for ScopeIterator<'a, I> where I: Iterator<Item = ScopeId> + FusedIterator {}
impl<'a, I> DoubleEndedIterator for ScopeIterator<'a, I>
where
I: Iterator<Item = ScopeId> + DoubleEndedIterator,
{
fn next_back(&mut self) -> Option<Self::Item> {
let id = self.ids.next_back()?;
Some(&self.table.scopes_by_id[id])
}
}
struct SymbolTableBuilder {
table: SymbolTable,
scopes: Vec<ScopeId>,
}
impl SymbolTableBuilder {
fn add_symbol(&mut self, identifier: &str) -> SymbolId {
self.table.add_symbol_to_scope(self.cur_scope(), identifier)
}
fn add_symbol_with_def(&mut self, identifier: &str, definition: Definition) -> SymbolId {
let symbol_id = self.add_symbol(identifier);
self.table
.defs
.entry(symbol_id)
.or_default()
.push(definition);
symbol_id
}
fn push_scope(&mut self, child_of: ScopeId, name: &str, kind: ScopeKind) -> ScopeId {
let scope_id = self.table.add_child_scope(child_of, name, kind);
self.scopes.push(scope_id);
scope_id
}
fn pop_scope(&mut self) -> ScopeId {
self.scopes
.pop()
.expect("Scope stack should never be empty")
}
fn cur_scope(&self) -> ScopeId {
*self
.scopes
.last()
.expect("Scope stack should never be empty")
}
fn with_type_params(
&mut self,
name: &str,
params: &Option<Box<ast::TypeParams>>,
nested: impl FnOnce(&mut Self),
) {
if let Some(type_params) = params {
self.push_scope(self.cur_scope(), name, ScopeKind::Annotation);
for type_param in &type_params.type_params {
let name = match type_param {
ast::TypeParam::TypeVar(ast::TypeParamTypeVar { name, .. }) => name,
ast::TypeParam::ParamSpec(ast::TypeParamParamSpec { name, .. }) => name,
ast::TypeParam::TypeVarTuple(ast::TypeParamTypeVarTuple { name, .. }) => name,
};
self.add_symbol(name);
}
}
nested(self);
if params.is_some() {
self.pop_scope();
}
}
}
impl PreorderVisitor<'_> for SymbolTableBuilder {
fn visit_expr(&mut self, expr: &ast::Expr) {
if let ast::Expr::Name(ast::ExprName { id, .. }) = expr {
self.add_symbol(id);
}
ast::visitor::preorder::walk_expr(self, expr);
}
fn visit_stmt(&mut self, stmt: &ast::Stmt) {
// TODO need to capture more definition statements here
match stmt {
ast::Stmt::ClassDef(node) => {
let def = Definition::ClassDef(TypedNodeKey::from_node(node));
self.add_symbol_with_def(&node.name, def);
self.with_type_params(&node.name, &node.type_params, |builder| {
builder.push_scope(builder.cur_scope(), &node.name, ScopeKind::Class);
ast::visitor::preorder::walk_stmt(builder, stmt);
builder.pop_scope();
});
}
ast::Stmt::FunctionDef(node) => {
let def = Definition::FunctionDef(TypedNodeKey::from_node(node));
self.add_symbol_with_def(&node.name, def);
self.with_type_params(&node.name, &node.type_params, |builder| {
builder.push_scope(builder.cur_scope(), &node.name, ScopeKind::Function);
ast::visitor::preorder::walk_stmt(builder, stmt);
builder.pop_scope();
});
}
ast::Stmt::Import(ast::StmtImport { names, .. }) => {
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.split('.').next().unwrap()
};
let def = Definition::Import(ImportDefinition {
module: alias.name.id.clone(),
});
self.add_symbol_with_def(symbol_name, def);
}
}
ast::Stmt::ImportFrom(ast::StmtImportFrom {
module,
names,
level,
..
}) => {
for alias in names {
let symbol_name = if let Some(asname) = &alias.asname {
asname.id.as_str()
} else {
alias.name.id.as_str()
};
let def = Definition::ImportFrom(ImportFromDefinition {
module: module.as_ref().map(|m| m.id.clone()),
name: alias.name.id.clone(),
level: *level,
});
self.add_symbol_with_def(symbol_name, def);
}
}
_ => {
ast::visitor::preorder::walk_stmt(self, stmt);
}
}
}
}
#[derive(Debug, Default)]
pub struct SymbolTablesStorage(KeyValueCache<FileId, Arc<SymbolTable>>);
impl Deref for SymbolTablesStorage {
type Target = KeyValueCache<FileId, Arc<SymbolTable>>;
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for SymbolTablesStorage {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}
#[cfg(test)]
mod tests {
use textwrap::dedent;
use crate::parse::Parsed;
use crate::symbols::ScopeKind;
use super::{SymbolId, SymbolIterator, SymbolTable};
mod from_ast {
use super::*;
fn parse(code: &str) -> Parsed {
Parsed::from_text(&dedent(code))
}
fn names<I>(it: SymbolIterator<I>) -> Vec<&str>
where
I: Iterator<Item = SymbolId>,
{
let mut symbols: Vec<_> = it.map(|sym| sym.name.as_str()).collect();
symbols.sort_unstable();
symbols
}
#[test]
fn empty() {
let parsed = parse("");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()).len(), 0);
}
#[test]
fn simple() {
let parsed = parse("x");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["x"]);
assert_eq!(
table.defs(table.root_symbol_id_by_name("x").unwrap()).len(),
0
);
}
#[test]
fn annotation_only() {
let parsed = parse("x: int");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["int", "x"]);
// TODO record definition
}
#[test]
fn import() {
let parsed = parse("import foo");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.defs(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
}
#[test]
fn import_sub() {
let parsed = parse("import foo.bar");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["foo"]);
}
#[test]
fn import_as() {
let parsed = parse("import foo.bar as baz");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["baz"]);
}
#[test]
fn import_from() {
let parsed = parse("from bar import foo");
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["foo"]);
assert_eq!(
table
.defs(table.root_symbol_id_by_name("foo").unwrap())
.len(),
1
);
}
#[test]
fn class_scope() {
let parsed = parse(
"
class C:
x = 1
y = 2
",
);
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["C", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let c_scope = scopes[0].scope(&table);
assert_eq!(c_scope.kind(), ScopeKind::Class);
assert_eq!(c_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table.defs(table.root_symbol_id_by_name("C").unwrap()).len(),
1
);
}
#[test]
fn func_scope() {
let parsed = parse(
"
def func():
x = 1
y = 2
",
);
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["func", "y"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let func_scope = scopes[0].scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(
table
.defs(table.root_symbol_id_by_name("func").unwrap())
.len(),
1
);
}
#[test]
fn dupes() {
let parsed = parse(
"
def func():
x = 1
def func():
y = 2
",
);
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 2);
let func_scope_1 = scopes[0].scope(&table);
let func_scope_2 = scopes[1].scope(&table);
assert_eq!(func_scope_1.kind(), ScopeKind::Function);
assert_eq!(func_scope_1.name(), "func");
assert_eq!(func_scope_2.kind(), ScopeKind::Function);
assert_eq!(func_scope_2.name(), "func");
assert_eq!(names(table.symbols_for_scope(scopes[0])), vec!["x"]);
assert_eq!(names(table.symbols_for_scope(scopes[1])), vec!["y"]);
assert_eq!(
table
.defs(table.root_symbol_id_by_name("func").unwrap())
.len(),
2
);
}
#[test]
fn generic_func() {
let parsed = parse(
"
def func[T]():
x = 1
",
);
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["func"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Function);
assert_eq!(func_scope.name(), "func");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
#[test]
fn generic_class() {
let parsed = parse(
"
class C[T]:
x = 1
",
);
let table = SymbolTable::from_ast(parsed.ast());
assert_eq!(names(table.root_symbols()), vec!["C"]);
let scopes = table.root_child_scope_ids();
assert_eq!(scopes.len(), 1);
let ann_scope_id = scopes[0];
let ann_scope = ann_scope_id.scope(&table);
assert_eq!(ann_scope.kind(), ScopeKind::Annotation);
assert_eq!(ann_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(ann_scope_id)), vec!["T"]);
let scopes = table.child_scope_ids_of(ann_scope_id);
assert_eq!(scopes.len(), 1);
let func_scope_id = scopes[0];
let func_scope = func_scope_id.scope(&table);
assert_eq!(func_scope.kind(), ScopeKind::Class);
assert_eq!(func_scope.name(), "C");
assert_eq!(names(table.symbols_for_scope(func_scope_id)), vec!["x"]);
}
}
#[test]
fn insert_same_name_symbol_twice() {
let mut table = SymbolTable::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 = table.add_symbol_to_scope(root_scope_id, "foo");
let symbol_id_2 = table.add_symbol_to_scope(root_scope_id, "foo");
assert_eq!(symbol_id_1, symbol_id_2);
}
#[test]
fn insert_different_named_symbols() {
let mut table = SymbolTable::new();
let root_scope_id = SymbolTable::root_scope_id();
let symbol_id_1 = table.add_symbol_to_scope(root_scope_id, "foo");
let symbol_id_2 = table.add_symbol_to_scope(root_scope_id, "bar");
assert_ne!(symbol_id_1, symbol_id_2);
}
#[test]
fn add_child_scope_with_symbol() {
let mut table = SymbolTable::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_top = table.add_symbol_to_scope(root_scope_id, "foo");
let c_scope = table.add_child_scope(root_scope_id, "C", ScopeKind::Class);
let foo_symbol_inner = table.add_symbol_to_scope(c_scope, "foo");
assert_ne!(foo_symbol_top, foo_symbol_inner);
}
#[test]
fn scope_from_id() {
let table = SymbolTable::new();
let root_scope_id = SymbolTable::root_scope_id();
let scope = root_scope_id.scope(&table);
assert_eq!(scope.name.as_str(), "<module>");
assert_eq!(scope.kind, ScopeKind::Module);
}
#[test]
fn symbol_from_id() {
let mut table = SymbolTable::new();
let root_scope_id = SymbolTable::root_scope_id();
let foo_symbol_id = table.add_symbol_to_scope(root_scope_id, "foo");
let symbol = foo_symbol_id.symbol(&table);
assert_eq!(symbol.name.as_str(), "foo");
}
}

View file

@ -0,0 +1,519 @@
#![allow(dead_code)]
use crate::ast_ids::NodeKey;
use crate::files::FileId;
use crate::symbols::SymbolId;
use crate::{FxDashMap, FxIndexSet, Name};
use ruff_index::{newtype_index, IndexVec};
use rustc_hash::FxHashMap;
pub(crate) mod infer;
pub(crate) use infer::infer_symbol_type;
/// unique ID for a type
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash)]
pub enum Type {
/// the dynamic or gradual type: a statically-unknown set of values
Any,
/// the empty set of values
Never,
/// unknown type (no annotation)
/// equivalent to Any, or to object in strict mode
Unknown,
/// name is not bound to any value
Unbound,
/// a specific function
Function(FunctionTypeId),
/// the set of Python objects with a given class in their __class__'s method resolution order
Class(ClassTypeId),
Union(UnionTypeId),
Intersection(IntersectionTypeId),
// TODO protocols, callable types, overloads, generics, type vars
}
impl Type {
fn display<'a>(&'a self, store: &'a TypeStore) -> DisplayType<'a> {
DisplayType { ty: self, store }
}
}
// TODO: currently calling `get_function` et al and holding on to the `FunctionTypeRef` will lock a
// shard of this dashmap, for as long as you hold the reference. This may be a problem. We could
// switch to having all the arenas hold Arc, or we could see if we can split up ModuleTypeStore,
// and/or give it inner mutability and finer-grained internal locking.
#[derive(Debug, Default)]
pub struct TypeStore {
modules: FxDashMap<FileId, ModuleTypeStore>,
}
impl TypeStore {
pub fn remove_module(&mut self, file_id: FileId) {
self.modules.remove(&file_id);
}
pub fn cache_symbol_type(&mut self, file_id: FileId, symbol_id: SymbolId, ty: Type) {
self.add_or_get_module(file_id)
.symbol_types
.insert(symbol_id, ty);
}
pub fn cache_node_type(&mut self, file_id: FileId, node_key: NodeKey, ty: Type) {
self.add_or_get_module(file_id)
.node_types
.insert(node_key, ty);
}
pub fn get_cached_symbol_type(&self, file_id: FileId, symbol_id: SymbolId) -> Option<Type> {
self.try_get_module(file_id)?
.symbol_types
.get(&symbol_id)
.copied()
}
pub fn get_cached_node_type(&self, file_id: FileId, node_key: &NodeKey) -> Option<Type> {
self.try_get_module(file_id)?
.node_types
.get(node_key)
.copied()
}
fn add_or_get_module(&mut self, file_id: FileId) -> ModuleStoreRefMut {
self.modules
.entry(file_id)
.or_insert_with(|| ModuleTypeStore::new(file_id))
}
fn get_module(&self, file_id: FileId) -> ModuleStoreRef {
self.try_get_module(file_id).expect("module should exist")
}
fn try_get_module(&self, file_id: FileId) -> Option<ModuleStoreRef> {
self.modules.get(&file_id)
}
fn add_function(&mut self, file_id: FileId, name: &str) -> FunctionTypeId {
self.add_or_get_module(file_id).add_function(name)
}
fn add_class(&mut self, file_id: FileId, name: &str) -> ClassTypeId {
self.add_or_get_module(file_id).add_class(name)
}
fn add_union(&mut self, file_id: FileId, elems: &[Type]) -> UnionTypeId {
self.add_or_get_module(file_id).add_union(elems)
}
fn add_intersection(
&mut self,
file_id: FileId,
positive: &[Type],
negative: &[Type],
) -> IntersectionTypeId {
self.add_or_get_module(file_id)
.add_intersection(positive, negative)
}
fn get_function(&self, id: FunctionTypeId) -> FunctionTypeRef {
FunctionTypeRef {
module_store: self.get_module(id.file_id),
function_id: id.func_id,
}
}
fn get_class(&self, id: ClassTypeId) -> ClassTypeRef {
ClassTypeRef {
module_store: self.get_module(id.file_id),
class_id: id.class_id,
}
}
fn get_union(&self, id: UnionTypeId) -> UnionTypeRef {
UnionTypeRef {
module_store: self.get_module(id.file_id),
union_id: id.union_id,
}
}
fn get_intersection(&self, id: IntersectionTypeId) -> IntersectionTypeRef {
IntersectionTypeRef {
module_store: self.get_module(id.file_id),
intersection_id: id.intersection_id,
}
}
}
type ModuleStoreRef<'a> = dashmap::mapref::one::Ref<
'a,
FileId,
ModuleTypeStore,
std::hash::BuildHasherDefault<rustc_hash::FxHasher>,
>;
type ModuleStoreRefMut<'a> = dashmap::mapref::one::RefMut<
'a,
FileId,
ModuleTypeStore,
std::hash::BuildHasherDefault<rustc_hash::FxHasher>,
>;
#[derive(Debug)]
pub(crate) struct FunctionTypeRef<'a> {
module_store: ModuleStoreRef<'a>,
function_id: ModuleFunctionTypeId,
}
impl<'a> std::ops::Deref for FunctionTypeRef<'a> {
type Target = FunctionType;
fn deref(&self) -> &Self::Target {
self.module_store.get_function(self.function_id)
}
}
#[derive(Debug)]
pub(crate) struct ClassTypeRef<'a> {
module_store: ModuleStoreRef<'a>,
class_id: ModuleClassTypeId,
}
impl<'a> std::ops::Deref for ClassTypeRef<'a> {
type Target = ClassType;
fn deref(&self) -> &Self::Target {
self.module_store.get_class(self.class_id)
}
}
#[derive(Debug)]
pub(crate) struct UnionTypeRef<'a> {
module_store: ModuleStoreRef<'a>,
union_id: ModuleUnionTypeId,
}
impl<'a> std::ops::Deref for UnionTypeRef<'a> {
type Target = UnionType;
fn deref(&self) -> &Self::Target {
self.module_store.get_union(self.union_id)
}
}
#[derive(Debug)]
pub(crate) struct IntersectionTypeRef<'a> {
module_store: ModuleStoreRef<'a>,
intersection_id: ModuleIntersectionTypeId,
}
impl<'a> std::ops::Deref for IntersectionTypeRef<'a> {
type Target = IntersectionType;
fn deref(&self) -> &Self::Target {
self.module_store.get_intersection(self.intersection_id)
}
}
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
pub struct FunctionTypeId {
file_id: FileId,
func_id: ModuleFunctionTypeId,
}
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
pub struct ClassTypeId {
file_id: FileId,
class_id: ModuleClassTypeId,
}
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
pub struct UnionTypeId {
file_id: FileId,
union_id: ModuleUnionTypeId,
}
#[derive(Copy, Clone, Debug, Hash, Eq, PartialEq)]
pub struct IntersectionTypeId {
file_id: FileId,
intersection_id: ModuleIntersectionTypeId,
}
#[newtype_index]
struct ModuleFunctionTypeId;
#[newtype_index]
struct ModuleClassTypeId;
#[newtype_index]
struct ModuleUnionTypeId;
#[newtype_index]
struct ModuleIntersectionTypeId;
#[derive(Debug)]
struct ModuleTypeStore {
file_id: FileId,
/// arena of all function types defined in this module
functions: IndexVec<ModuleFunctionTypeId, FunctionType>,
/// arena of all class types defined in this module
classes: IndexVec<ModuleClassTypeId, ClassType>,
/// arenda of all union types created in this module
unions: IndexVec<ModuleUnionTypeId, UnionType>,
/// arena of all intersection types created in this module
intersections: IndexVec<ModuleIntersectionTypeId, IntersectionType>,
/// cached types of symbols in this module
symbol_types: FxHashMap<SymbolId, Type>,
/// cached types of AST nodes in this module
node_types: FxHashMap<NodeKey, Type>,
}
impl ModuleTypeStore {
fn new(file_id: FileId) -> Self {
Self {
file_id,
functions: IndexVec::default(),
classes: IndexVec::default(),
unions: IndexVec::default(),
intersections: IndexVec::default(),
symbol_types: FxHashMap::default(),
node_types: FxHashMap::default(),
}
}
fn add_function(&mut self, name: &str) -> FunctionTypeId {
let func_id = self.functions.push(FunctionType {
name: Name::new(name),
});
FunctionTypeId {
file_id: self.file_id,
func_id,
}
}
fn add_class(&mut self, name: &str) -> ClassTypeId {
let class_id = self.classes.push(ClassType {
name: Name::new(name),
});
ClassTypeId {
file_id: self.file_id,
class_id,
}
}
fn add_union(&mut self, elems: &[Type]) -> UnionTypeId {
let union_id = self.unions.push(UnionType {
elements: elems.iter().copied().collect(),
});
UnionTypeId {
file_id: self.file_id,
union_id,
}
}
fn add_intersection(&mut self, positive: &[Type], negative: &[Type]) -> IntersectionTypeId {
let intersection_id = self.intersections.push(IntersectionType {
positive: positive.iter().copied().collect(),
negative: negative.iter().copied().collect(),
});
IntersectionTypeId {
file_id: self.file_id,
intersection_id,
}
}
fn get_function(&self, func_id: ModuleFunctionTypeId) -> &FunctionType {
&self.functions[func_id]
}
fn get_class(&self, class_id: ModuleClassTypeId) -> &ClassType {
&self.classes[class_id]
}
fn get_union(&self, union_id: ModuleUnionTypeId) -> &UnionType {
&self.unions[union_id]
}
fn get_intersection(&self, intersection_id: ModuleIntersectionTypeId) -> &IntersectionType {
&self.intersections[intersection_id]
}
}
#[derive(Copy, Clone, Debug)]
struct DisplayType<'a> {
ty: &'a Type,
store: &'a TypeStore,
}
impl std::fmt::Display for DisplayType<'_> {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self.ty {
Type::Any => f.write_str("Any"),
Type::Never => f.write_str("Never"),
Type::Unknown => f.write_str("Unknown"),
Type::Unbound => f.write_str("Unbound"),
Type::Class(class_id) => f.write_str(self.store.get_class(*class_id).name()),
Type::Function(func_id) => f.write_str(self.store.get_function(*func_id).name()),
Type::Union(union_id) => self
.store
.get_module(union_id.file_id)
.get_union(union_id.union_id)
.display(f, self.store),
Type::Intersection(int_id) => self
.store
.get_module(int_id.file_id)
.get_intersection(int_id.intersection_id)
.display(f, self.store),
}
}
}
#[derive(Debug)]
pub(crate) struct ClassType {
name: Name,
}
impl ClassType {
fn name(&self) -> &str {
self.name.as_str()
}
}
#[derive(Debug)]
pub(crate) struct FunctionType {
name: Name,
}
impl FunctionType {
fn name(&self) -> &str {
self.name.as_str()
}
}
#[derive(Debug)]
pub(crate) struct UnionType {
// the union type includes values in any of these types
elements: FxIndexSet<Type>,
}
impl UnionType {
fn display(&self, f: &mut std::fmt::Formatter<'_>, store: &TypeStore) -> std::fmt::Result {
f.write_str("(")?;
let mut first = true;
for ty in &self.elements {
if !first {
f.write_str(" | ")?;
};
first = false;
write!(f, "{}", ty.display(store))?;
}
f.write_str(")")
}
}
// Negation types aren't expressible in annotations, and are most likely to arise from type
// narrowing along with intersections (e.g. `if not isinstance(...)`), so we represent them
// directly in intersections rather than as a separate type. This sacrifices some efficiency in the
// case where a Not appears outside an intersection (unclear when that could even happen, but we'd
// have to represent it as a single-element intersection if it did) in exchange for better
// efficiency in the not-within-intersection case.
#[derive(Debug)]
pub(crate) struct IntersectionType {
// the intersection type includes only values in all of these types
positive: FxIndexSet<Type>,
// negated elements of the intersection, e.g.
negative: FxIndexSet<Type>,
}
impl IntersectionType {
fn display(&self, f: &mut std::fmt::Formatter<'_>, store: &TypeStore) -> std::fmt::Result {
f.write_str("(")?;
let mut first = true;
for (neg, ty) in self
.positive
.iter()
.map(|ty| (false, ty))
.chain(self.negative.iter().map(|ty| (true, ty)))
{
if !first {
f.write_str(" & ")?;
};
first = false;
if neg {
f.write_str("~")?;
};
write!(f, "{}", ty.display(store))?;
}
f.write_str(")")
}
}
#[cfg(test)]
mod tests {
use crate::files::Files;
use crate::types::{Type, TypeStore};
use crate::FxIndexSet;
use std::path::Path;
#[test]
fn add_class() {
let mut store = TypeStore::default();
let files = Files::default();
let file_id = files.intern(Path::new("/foo"));
let id = store.add_class(file_id, "C");
assert_eq!(store.get_class(id).name(), "C");
let class = Type::Class(id);
assert_eq!(format!("{}", class.display(&store)), "C");
}
#[test]
fn add_function() {
let mut store = TypeStore::default();
let files = Files::default();
let file_id = files.intern(Path::new("/foo"));
let id = store.add_function(file_id, "func");
assert_eq!(store.get_function(id).name(), "func");
let func = Type::Function(id);
assert_eq!(format!("{}", func.display(&store)), "func");
}
#[test]
fn add_union() {
let mut store = TypeStore::default();
let files = Files::default();
let file_id = files.intern(Path::new("/foo"));
let c1 = store.add_class(file_id, "C1");
let c2 = store.add_class(file_id, "C2");
let elems = vec![Type::Class(c1), Type::Class(c2)];
let id = store.add_union(file_id, &elems);
assert_eq!(
store.get_union(id).elements,
elems.into_iter().collect::<FxIndexSet<_>>()
);
let union = Type::Union(id);
assert_eq!(format!("{}", union.display(&store)), "(C1 | C2)");
}
#[test]
fn add_intersection() {
let mut store = TypeStore::default();
let files = Files::default();
let file_id = files.intern(Path::new("/foo"));
let c1 = store.add_class(file_id, "C1");
let c2 = store.add_class(file_id, "C2");
let c3 = store.add_class(file_id, "C3");
let pos = vec![Type::Class(c1), Type::Class(c2)];
let neg = vec![Type::Class(c3)];
let id = store.add_intersection(file_id, &pos, &neg);
assert_eq!(
store.get_intersection(id).positive,
pos.into_iter().collect::<FxIndexSet<_>>()
);
assert_eq!(
store.get_intersection(id).negative,
neg.into_iter().collect::<FxIndexSet<_>>()
);
let intersection = Type::Intersection(id);
assert_eq!(
format!("{}", intersection.display(&store)),
"(C1 & C2 & ~C3)"
);
}
}

View file

@ -0,0 +1,141 @@
#![allow(dead_code)]
use crate::db::{HasJar, SemanticDb, SemanticJar};
use crate::module::ModuleName;
use crate::symbols::{Definition, ImportFromDefinition, SymbolId};
use crate::types::Type;
use crate::FileId;
use ruff_python_ast::AstNode;
// TODO this should not take a &mut db, it should be a query, not a mutation. This means we'll need
// to use interior mutability in TypeStore instead, and avoid races in populating the cache.
#[tracing::instrument(level = "trace", skip(db))]
pub fn infer_symbol_type<Db>(db: &mut Db, file_id: FileId, symbol_id: SymbolId) -> Type
where
Db: SemanticDb + HasJar<SemanticJar>,
{
let symbols = db.symbol_table(file_id);
let defs = symbols.defs(symbol_id);
if let Some(ty) = db
.jar()
.type_store
.get_cached_symbol_type(file_id, symbol_id)
{
return ty;
}
// TODO handle multiple defs, conditional defs...
assert_eq!(defs.len(), 1);
let ty = match &defs[0] {
Definition::ImportFrom(ImportFromDefinition {
module,
name,
level,
}) => {
// TODO relative imports
assert!(matches!(level, 0));
let module_name = ModuleName::new(module.as_ref().expect("TODO relative imports"));
if let Some(module) = db.resolve_module(module_name) {
let remote_file_id = module.path(db).file();
let remote_symbols = db.symbol_table(remote_file_id);
if let Some(remote_symbol_id) = remote_symbols.root_symbol_id_by_name(name) {
db.infer_symbol_type(remote_file_id, remote_symbol_id)
} else {
Type::Unknown
}
} else {
Type::Unknown
}
}
Definition::ClassDef(node_key) => {
if let Some(ty) = db
.jar()
.type_store
.get_cached_node_type(file_id, node_key.erased())
{
ty
} else {
let parsed = db.parse(file_id);
let ast = parsed.ast();
let node = node_key.resolve_unwrap(ast.as_any_node_ref());
let store = &mut db.jar_mut().type_store;
let ty = Type::Class(store.add_class(file_id, &node.name.id));
store.cache_node_type(file_id, *node_key.erased(), ty);
ty
}
}
_ => todo!("other kinds of definitions"),
};
db.jar_mut()
.type_store
.cache_symbol_type(file_id, symbol_id, ty);
// TODO record dependencies
ty
}
#[cfg(test)]
mod tests {
use crate::db::tests::TestDb;
use crate::db::{HasJar, SemanticDb, SemanticJar};
use crate::module::{ModuleName, ModuleSearchPath, ModuleSearchPathKind};
use crate::types::Type;
// TODO with virtual filesystem we shouldn't have to write files to disk for these
// tests
struct TestCase {
temp_dir: tempfile::TempDir,
db: TestDb,
src: ModuleSearchPath,
}
fn create_test() -> std::io::Result<TestCase> {
let temp_dir = tempfile::tempdir()?;
let src = temp_dir.path().join("src");
std::fs::create_dir(&src)?;
let src = ModuleSearchPath::new(src.canonicalize()?, ModuleSearchPathKind::FirstParty);
let roots = vec![src.clone()];
let mut db = TestDb::default();
db.set_module_search_paths(roots);
Ok(TestCase { temp_dir, db, src })
}
#[test]
fn follow_import_to_class() -> std::io::Result<()> {
let TestCase {
src,
mut db,
temp_dir: _temp_dir,
} = create_test()?;
let a_path = src.path().join("a.py");
let b_path = src.path().join("b.py");
std::fs::write(a_path, "from b import C as D")?;
std::fs::write(b_path, "class C: pass")?;
let a_file = db
.resolve_module(ModuleName::new("a"))
.expect("module should be found")
.path(&db)
.file();
let a_syms = db.symbol_table(a_file);
let d_sym = a_syms
.root_symbol_id_by_name("D")
.expect("D symbol should be found");
let ty = db.infer_symbol_type(a_file, d_sym);
let jar = HasJar::<SemanticJar>::jar(&db);
assert!(matches!(ty, Type::Class(_)));
assert_eq!(format!("{}", ty.display(&jar.type_store)), "C");
Ok(())
}
}

View file

@ -0,0 +1,78 @@
use anyhow::Context;
use std::path::Path;
use crate::files::Files;
use crate::program::{FileChange, FileChangeKind};
use notify::event::{CreateKind, RemoveKind};
use notify::{recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher};
pub struct FileWatcher {
watcher: RecommendedWatcher,
}
pub trait EventHandler: Send + 'static {
fn handle(&self, changes: Vec<FileChange>);
}
impl<F> EventHandler for F
where
F: Fn(Vec<FileChange>) + Send + 'static,
{
fn handle(&self, changes: Vec<FileChange>) {
let f = self;
f(changes);
}
}
impl FileWatcher {
pub fn new<E>(handler: E, files: Files) -> anyhow::Result<Self>
where
E: EventHandler,
{
Self::from_handler(Box::new(handler), files)
}
fn from_handler(handler: Box<dyn EventHandler>, files: Files) -> anyhow::Result<Self> {
let watcher = recommended_watcher(move |changes: notify::Result<Event>| {
match changes {
Ok(event) => {
// TODO verify that this handles all events correctly
let change_kind = match event.kind {
EventKind::Create(CreateKind::File) => FileChangeKind::Created,
EventKind::Modify(_) => FileChangeKind::Modified,
EventKind::Remove(RemoveKind::File) => FileChangeKind::Deleted,
_ => {
return;
}
};
let mut changes = Vec::new();
for path in event.paths {
if path.is_file() {
let id = files.intern(&path);
changes.push(FileChange::new(id, change_kind));
}
}
if !changes.is_empty() {
handler.handle(changes);
}
}
// TODO proper error handling
Err(err) => {
panic!("Error: {err}");
}
}
})
.context("Failed to create file watcher.")?;
Ok(Self { watcher })
}
pub fn watch_folder(&mut self, path: &Path) -> anyhow::Result<()> {
self.watcher.watch(path, RecursiveMode::Recursive)?;
Ok(())
}
}

View file

@ -11,9 +11,9 @@ repository = { workspace = true }
license = { workspace = true } license = { workspace = true }
[dependencies] [dependencies]
itertools = { workspace = true }
glob = { workspace = true } glob = { workspace = true }
globset = { workspace = true } globset = { workspace = true }
itertools = { workspace = true }
regex = { workspace = true } regex = { workspace = true }
filetime = { workspace = true } filetime = { workspace = true }
seahash = { workspace = true } seahash = { workspace = true }

File diff suppressed because it is too large Load diff

View file

@ -1,11 +1,10 @@
#![allow(clippy::derive_partial_eq_without_eq)] #![allow(clippy::derive_partial_eq_without_eq)]
use std::cell::OnceCell;
use std::fmt; use std::fmt;
use std::fmt::Debug; use std::fmt::Debug;
use std::ops::Deref; use std::ops::Deref;
use std::slice::{Iter, IterMut}; use std::slice::{Iter, IterMut};
use std::sync::OnceLock;
use bitflags::bitflags; use bitflags::bitflags;
use itertools::Itertools; use itertools::Itertools;
@ -1420,7 +1419,7 @@ impl StringLiteralValue {
Self { Self {
inner: StringLiteralValueInner::Concatenated(ConcatenatedStringLiteral { inner: StringLiteralValueInner::Concatenated(ConcatenatedStringLiteral {
strings, strings,
value: OnceCell::new(), value: OnceLock::new(),
}), }),
} }
} }
@ -1782,7 +1781,7 @@ struct ConcatenatedStringLiteral {
/// Each string literal that makes up the concatenated string. /// Each string literal that makes up the concatenated string.
strings: Vec<StringLiteral>, strings: Vec<StringLiteral>,
/// The concatenated string value. /// The concatenated string value.
value: OnceCell<Box<str>>, value: OnceLock<Box<str>>,
} }
impl ConcatenatedStringLiteral { impl ConcatenatedStringLiteral {
@ -4168,7 +4167,7 @@ mod tests {
assert_eq!(std::mem::size_of::<ExprSetComp>(), 40); assert_eq!(std::mem::size_of::<ExprSetComp>(), 40);
assert_eq!(std::mem::size_of::<ExprSlice>(), 32); assert_eq!(std::mem::size_of::<ExprSlice>(), 32);
assert_eq!(std::mem::size_of::<ExprStarred>(), 24); assert_eq!(std::mem::size_of::<ExprStarred>(), 24);
assert_eq!(std::mem::size_of::<ExprStringLiteral>(), 48); assert_eq!(std::mem::size_of::<ExprStringLiteral>(), 56);
assert_eq!(std::mem::size_of::<ExprSubscript>(), 32); assert_eq!(std::mem::size_of::<ExprSubscript>(), 32);
assert_eq!(std::mem::size_of::<ExprTuple>(), 40); assert_eq!(std::mem::size_of::<ExprTuple>(), 40);
assert_eq!(std::mem::size_of::<ExprUnaryOp>(), 24); assert_eq!(std::mem::size_of::<ExprUnaryOp>(), 24);