Add a subcommand to generate dependency graphs (#13402)

## Summary

This PR adds an experimental Ruff subcommand to generate dependency
graphs based on module resolution.

A few highlights:

- You can generate either dependency or dependent graphs via the
`--direction` command-line argument.
- Like Pants, we also provide an option to identify imports from string
literals (`--detect-string-imports`).
- Users can also provide additional dependency data via the
`include-dependencies` key under `[tool.ruff.import-map]`. This map uses
file paths as keys, and lists of strings as values. Those strings can be
file paths or globs.

The dependency resolution uses the red-knot module resolver which is
intended to be fully spec compliant, so it's also a chance to expose the
module resolver in a real-world setting.

The CLI is, e.g., `ruff graph build ../autobot`, which will output a
JSON map from file to files it depends on for the `autobot` project.
This commit is contained in:
Charlie Marsh 2024-09-19 21:06:32 -04:00 committed by GitHub
parent 260c2ecd15
commit 4e935f7d7d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 1339 additions and 45 deletions

View file

@ -0,0 +1,111 @@
use red_knot_python_semantic::ModuleName;
use ruff_python_ast::visitor::source_order::{walk_body, walk_expr, walk_stmt, SourceOrderVisitor};
use ruff_python_ast::{self as ast, Expr, ModModule, Stmt};
/// Collect all imports for a given Python file.
#[derive(Default, Debug)]
pub(crate) struct Collector<'a> {
/// The path to the current module.
module_path: Option<&'a [String]>,
/// Whether to detect imports from string literals.
string_imports: bool,
/// The collected imports from the Python AST.
imports: Vec<CollectedImport>,
}
impl<'a> Collector<'a> {
pub(crate) fn new(module_path: Option<&'a [String]>, string_imports: bool) -> Self {
Self {
module_path,
string_imports,
imports: Vec::new(),
}
}
#[must_use]
pub(crate) fn collect(mut self, module: &ModModule) -> Vec<CollectedImport> {
walk_body(&mut self, &module.body);
self.imports
}
}
impl<'ast> SourceOrderVisitor<'ast> for Collector<'_> {
fn visit_stmt(&mut self, stmt: &'ast Stmt) {
match stmt {
Stmt::ImportFrom(ast::StmtImportFrom {
names,
module,
level,
range: _,
}) => {
let module = module.as_deref();
let level = *level;
for alias in names {
let mut components = vec![];
if level > 0 {
// If we're resolving a relative import, we must have a module path.
let Some(module_path) = self.module_path else {
return;
};
// Start with the containing module.
components.extend(module_path.iter().map(String::as_str));
// Remove segments based on the number of dots.
for _ in 0..level {
if components.is_empty() {
return;
}
components.pop();
}
}
// Add the module path.
if let Some(module) = module {
components.extend(module.split('.'));
}
// Add the alias name.
components.push(alias.name.as_str());
if let Some(module_name) = ModuleName::from_components(components) {
self.imports.push(CollectedImport::ImportFrom(module_name));
}
}
}
Stmt::Import(ast::StmtImport { names, range: _ }) => {
for alias in names {
if let Some(module_name) = ModuleName::new(alias.name.as_str()) {
self.imports.push(CollectedImport::Import(module_name));
}
}
}
_ => {
walk_stmt(self, stmt);
}
}
}
fn visit_expr(&mut self, expr: &'ast Expr) {
if self.string_imports {
if let Expr::StringLiteral(ast::ExprStringLiteral { value, range: _ }) = expr {
// Determine whether the string literal "looks like" an import statement: contains
// a dot, and consists solely of valid Python identifiers.
let value = value.to_str();
if let Some(module_name) = ModuleName::new(value) {
self.imports.push(CollectedImport::Import(module_name));
}
}
walk_expr(self, expr);
}
}
}
#[derive(Debug)]
pub(crate) enum CollectedImport {
/// The import was part of an `import` statement.
Import(ModuleName),
/// The import was part of an `import from` statement.
ImportFrom(ModuleName),
}

View file

@ -0,0 +1,94 @@
use anyhow::Result;
use red_knot_python_semantic::{Db, Program, ProgramSettings, PythonVersion, SearchPathSettings};
use ruff_db::files::{File, Files};
use ruff_db::system::{OsSystem, System, SystemPathBuf};
use ruff_db::vendored::VendoredFileSystem;
use ruff_db::{Db as SourceDb, Upcast};
#[salsa::db]
#[derive(Default)]
pub struct ModuleDb {
storage: salsa::Storage<Self>,
files: Files,
system: OsSystem,
vendored: VendoredFileSystem,
}
impl ModuleDb {
/// Initialize a [`ModuleDb`] from the given source root.
pub fn from_src_roots(mut src_roots: impl Iterator<Item = SystemPathBuf>) -> Result<Self> {
let search_paths = {
// Use the first source root.
let src_root = src_roots
.next()
.ok_or_else(|| anyhow::anyhow!("No source roots provided"))?;
let mut search_paths = SearchPathSettings::new(src_root.to_path_buf());
// Add the remaining source roots as extra paths.
for src_root in src_roots {
search_paths.extra_paths.push(src_root.to_path_buf());
}
search_paths
};
let db = Self::default();
Program::from_settings(
&db,
&ProgramSettings {
target_version: PythonVersion::default(),
search_paths,
},
)?;
Ok(db)
}
/// Create a snapshot of the current database.
#[must_use]
pub fn snapshot(&self) -> Self {
Self {
storage: self.storage.clone(),
system: self.system.clone(),
vendored: self.vendored.clone(),
files: self.files.snapshot(),
}
}
}
impl Upcast<dyn SourceDb> for ModuleDb {
fn upcast(&self) -> &(dyn SourceDb + 'static) {
self
}
fn upcast_mut(&mut self) -> &mut (dyn SourceDb + 'static) {
self
}
}
#[salsa::db]
impl SourceDb for ModuleDb {
fn vendored(&self) -> &VendoredFileSystem {
&self.vendored
}
fn system(&self) -> &dyn System {
&self.system
}
fn files(&self) -> &Files {
&self.files
}
}
#[salsa::db]
impl Db for ModuleDb {
fn is_file_open(&self, file: File) -> bool {
!file.path(self).is_vendored_path()
}
}
#[salsa::db]
impl salsa::Database for ModuleDb {
fn salsa_event(&self, _event: &dyn Fn() -> salsa::Event) {}
}

View file

@ -0,0 +1,120 @@
use crate::collector::Collector;
pub use crate::db::ModuleDb;
use crate::resolver::Resolver;
pub use crate::settings::{AnalyzeSettings, Direction};
use anyhow::Result;
use red_knot_python_semantic::SemanticModel;
use ruff_db::files::system_path_to_file;
use ruff_db::parsed::parsed_module;
use ruff_db::system::{SystemPath, SystemPathBuf};
use ruff_python_ast::helpers::to_module_path;
use serde::{Deserialize, Serialize};
use std::collections::{BTreeMap, BTreeSet};
mod collector;
mod db;
mod resolver;
mod settings;
#[derive(Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ModuleImports(BTreeSet<SystemPathBuf>);
impl ModuleImports {
/// Insert a file path into the module imports.
pub fn insert(&mut self, path: SystemPathBuf) {
self.0.insert(path);
}
/// Returns `true` if the module imports are empty.
pub fn is_empty(&self) -> bool {
self.0.is_empty()
}
/// Returns the number of module imports.
pub fn len(&self) -> usize {
self.0.len()
}
/// Convert the file paths to be relative to a given path.
#[must_use]
pub fn relative_to(self, path: &SystemPath) -> Self {
Self(
self.0
.into_iter()
.map(|import| {
import
.strip_prefix(path)
.map(SystemPath::to_path_buf)
.unwrap_or(import)
})
.collect(),
)
}
}
#[derive(Debug, Default)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct ImportMap(BTreeMap<SystemPathBuf, ModuleImports>);
impl ImportMap {
/// Insert a module's imports into the map.
pub fn insert(&mut self, path: SystemPathBuf, imports: ModuleImports) {
self.0.insert(path, imports);
}
/// Reverse the [`ImportMap`], e.g., to convert from dependencies to dependents.
#[must_use]
pub fn reverse(imports: impl IntoIterator<Item = (SystemPathBuf, ModuleImports)>) -> Self {
let mut reverse = ImportMap::default();
for (path, imports) in imports {
for import in imports.0 {
reverse.0.entry(import).or_default().insert(path.clone());
}
reverse.0.entry(path).or_default();
}
reverse
}
}
impl FromIterator<(SystemPathBuf, ModuleImports)> for ImportMap {
fn from_iter<I: IntoIterator<Item = (SystemPathBuf, ModuleImports)>>(iter: I) -> Self {
let mut map = ImportMap::default();
for (path, imports) in iter {
map.0.entry(path).or_default().0.extend(imports.0);
}
map
}
}
/// Generate the module imports for a given Python file.
pub fn generate(
path: &SystemPath,
package: Option<&SystemPath>,
string_imports: bool,
db: &ModuleDb,
) -> Result<ModuleImports> {
// Read and parse the source code.
let file = system_path_to_file(db, path)?;
let parsed = parsed_module(db, file);
let module_path =
package.and_then(|package| to_module_path(package.as_std_path(), path.as_std_path()));
let model = SemanticModel::new(db, file);
// Collect the imports.
let imports = Collector::new(module_path.as_deref(), string_imports).collect(parsed.syntax());
// Resolve the imports.
let mut resolved_imports = ModuleImports::default();
for import in imports {
let Some(resolved) = Resolver::new(&model).resolve(import) else {
continue;
};
let Some(path) = resolved.as_system_path() else {
continue;
};
resolved_imports.insert(path.to_path_buf());
}
Ok(resolved_imports)
}

View file

@ -0,0 +1,39 @@
use red_knot_python_semantic::SemanticModel;
use ruff_db::files::FilePath;
use crate::collector::CollectedImport;
/// Collect all imports for a given Python file.
pub(crate) struct Resolver<'a> {
semantic: &'a SemanticModel<'a>,
}
impl<'a> Resolver<'a> {
/// Initialize a [`Resolver`] with a given [`SemanticModel`].
pub(crate) fn new(semantic: &'a SemanticModel<'a>) -> Self {
Self { semantic }
}
/// Resolve the [`CollectedImport`] into a [`FilePath`].
pub(crate) fn resolve(&self, import: CollectedImport) -> Option<&'a FilePath> {
match import {
CollectedImport::Import(import) => self
.semantic
.resolve_module(import)
.map(|module| module.file().path(self.semantic.db())),
CollectedImport::ImportFrom(import) => {
// Attempt to resolve the member (e.g., given `from foo import bar`, look for `foo.bar`).
let parent = import.parent();
self.semantic
.resolve_module(import)
.map(|module| module.file().path(self.semantic.db()))
.or_else(|| {
// Attempt to resolve the module (e.g., given `from foo import bar`, look for `foo`).
self.semantic
.resolve_module(parent?)
.map(|module| module.file().path(self.semantic.db()))
})
}
}
}
}

View file

@ -0,0 +1,52 @@
use ruff_linter::display_settings;
use ruff_linter::settings::types::{ExtensionMapping, PreviewMode};
use ruff_macros::CacheKey;
use std::collections::BTreeMap;
use std::fmt;
use std::path::PathBuf;
#[derive(Debug, Default, Clone, CacheKey)]
pub struct AnalyzeSettings {
pub preview: PreviewMode,
pub detect_string_imports: bool,
pub include_dependencies: BTreeMap<PathBuf, (PathBuf, Vec<String>)>,
pub extension: ExtensionMapping,
}
impl fmt::Display for AnalyzeSettings {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
writeln!(f, "\n# Analyze Settings")?;
display_settings! {
formatter = f,
namespace = "analyze",
fields = [
self.preview,
self.detect_string_imports,
self.extension | debug,
self.include_dependencies | debug,
]
}
Ok(())
}
}
#[derive(Default, Debug, Copy, Clone, PartialEq, Eq, CacheKey)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
pub enum Direction {
/// Construct a map from module to its dependencies (i.e., the modules that it imports).
#[default]
Dependencies,
/// Construct a map from module to its dependents (i.e., the modules that import it).
Dependents,
}
impl fmt::Display for Direction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Dependencies => write!(f, "\"dependencies\""),
Self::Dependents => write!(f, "\"dependents\""),
}
}
}