commit 0d8e4bd6e9580a2484fa68c45cb3569e0815ac3f Author: Charles Marsh Date: Tue Aug 9 13:19:19 2022 -0400 Initial commit for linter prototype diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000000..9b4eae7c8e --- /dev/null +++ b/.gitignore @@ -0,0 +1,24 @@ +# Local cache +.cache + +# IntelliJ +.idea + +### +# Rust.gitignore +### + +# Generated by Cargo +# will have compiled files and executables +debug/ +target/ + +# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries +# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html +Cargo.lock + +# These are backup files generated by rustfmt +**/*.rs.bk + +# MSVC Windows builds of rustc generate these, which store debugging information +*.pdb diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000000..93a0ed7d3a --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "rust-python-linter" +version = "0.1.0" +edition = "2021" + +[dependencies] +anyhow = { version = "1.0.60" } +bincode = "1.3.3" +cacache = "10.0.1" +chrono = "0.4.21" +clap = { version = "3.2.16", features = ["derive"] } +colored = { version = "2.0.0" } +fern = { version = "0.6.1" } +log = "0.4.17" +rayon = "1.5.3" +rustpython-parser = { git = "https://github.com/RustPython/RustPython.git" } +serde = { version = "1", features = ["derive"] } +serde_json = "1" +walkdir = { version = "2.3.2" } + diff --git a/README.md b/README.md new file mode 100644 index 0000000000..3ba8e999b0 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +# rust-python-linter diff --git a/src/bin/main.rs b/src/bin/main.rs new file mode 100644 index 0000000000..4d5ae46470 --- /dev/null +++ b/src/bin/main.rs @@ -0,0 +1,91 @@ +use std::path::PathBuf; +use std::time::Instant; + +use anyhow::Result; +use clap::{Parser, ValueHint}; + +use log::info; +use rayon::prelude::*; +use walkdir::{DirEntry, WalkDir}; + +use rust_python_linter::linter::check_path; +use rust_python_linter::message::Message; + +fn set_up_logging(verbose: bool) -> Result<()> { + fern::Dispatch::new() + .format(|out, message, record| { + out.finish(format_args!( + "{}[{}][{}] {}", + chrono::Local::now().format("[%Y-%m-%d][%H:%M:%S]"), + record.target(), + record.level(), + message + )) + }) + .level(if verbose { + log::LevelFilter::Debug + } else { + log::LevelFilter::Info + }) + .level_for("hyper", log::LevelFilter::Info) + .chain(std::io::stdout()) + .apply() + .map_err(|e| e.into()) +} + +#[derive(Debug, Parser)] +#[clap(name = "rust-python-linter")] +#[clap(about = "A bare-bones Python linter written in Rust", long_about = None)] +struct Cli { + #[clap(name = "filename", parse(from_os_str), value_hint = ValueHint::DirPath)] + filename: PathBuf, + #[clap(short, long, action)] + verbose: bool, + // /// Files to process + // #[clap(name = "FILE", parse(from_os_str), value_hint = ValueHint::AnyPath)] + // files: Vec, +} + +fn is_not_hidden(entry: &DirEntry) -> bool { + entry + .file_name() + .to_str() + .map(|s| entry.depth() == 0 || !s.starts_with('.')) + .unwrap_or(false) +} + +fn main() -> Result<()> { + let cli = Cli::parse(); + + set_up_logging(cli.verbose)?; + + // Collect all the files to check. + let start = Instant::now(); + let files: Vec = WalkDir::new(cli.filename) + .follow_links(true) + .into_iter() + .filter_entry(is_not_hidden) + .filter_map(|entry| entry.ok()) + .filter(|entry| entry.path().to_string_lossy().ends_with(".py")) + .collect(); + let duration = start.elapsed(); + info!("Identified files to lint in: {:?}", duration); + + let start = Instant::now(); + let messages: Vec = files + .par_iter() + .map(|entry| check_path(entry.path()).unwrap()) + .flatten() + .collect(); + let duration = start.elapsed(); + info!("Checked files in: {:?}", duration); + + if !messages.is_empty() { + println!("Found {} error(s)!", messages.len()); + for message in messages { + println!("{}", message); + } + } + + Ok(()) +} diff --git a/src/cache.rs b/src/cache.rs new file mode 100644 index 0000000000..de1329f345 --- /dev/null +++ b/src/cache.rs @@ -0,0 +1,67 @@ +use std::borrow::Cow; +use std::os::unix::fs::MetadataExt; +use std::path::Path; + +use serde::{Deserialize, Serialize}; + +use crate::message::Message; + +#[derive(Serialize, Deserialize)] +struct CacheMetadata { + size: u64, + mtime: i64, +} + +#[derive(Serialize)] +struct CheckResultRef<'a> { + metadata: &'a CacheMetadata, + messages: &'a [Message], +} + +#[derive(Deserialize)] +struct CheckResult { + metadata: CacheMetadata, + messages: Vec, +} + +fn cache_dir() -> &'static str { + "./.cache" +} + +fn cache_key(path: &Path) -> Cow { + path.to_string_lossy() +} + +pub fn get(path: &Path) -> Option> { + if let Ok(encoded) = cacache::read_sync(cache_dir(), cache_key(path)) { + if let Ok(file_metadata) = path.metadata() { + if let Ok(CheckResult { metadata, messages }) = + bincode::deserialize::(&encoded[..]) + { + if file_metadata.size() == metadata.size && file_metadata.mtime() == metadata.mtime + { + return Some(messages); + } + } + } + } + + None +} + +pub fn set(path: &Path, messages: &[Message]) { + if let Ok(metadata) = path.metadata() { + let check_result = CheckResultRef { + metadata: &CacheMetadata { + size: metadata.size(), + mtime: metadata.mtime(), + }, + messages, + }; + let _ = cacache::write_sync( + cache_dir(), + cache_key(path), + bincode::serialize(&check_result).unwrap(), + ); + } +} diff --git a/src/check.rs b/src/check.rs new file mode 100644 index 0000000000..195da0cb65 --- /dev/null +++ b/src/check.rs @@ -0,0 +1,57 @@ +use std::path::Path; + +use rustpython_parser::ast::{Located, StmtKind, Suite}; + +use crate::message::Message; + +pub fn check_ast(path: &Path, python_ast: &Suite) -> Vec { + let mut messages: Vec = vec![]; + for statement in python_ast { + let Located { + location, + custom: _, + node, + } = statement; + match node { + StmtKind::FunctionDef { .. } => {} + StmtKind::AsyncFunctionDef { .. } => {} + StmtKind::ClassDef { .. } => {} + StmtKind::Return { .. } => {} + StmtKind::Delete { .. } => {} + StmtKind::Assign { .. } => {} + StmtKind::AugAssign { .. } => {} + StmtKind::AnnAssign { .. } => {} + StmtKind::For { .. } => {} + StmtKind::AsyncFor { .. } => {} + StmtKind::While { .. } => {} + StmtKind::If { .. } => {} + StmtKind::With { .. } => {} + StmtKind::AsyncWith { .. } => {} + StmtKind::Raise { .. } => {} + StmtKind::Try { .. } => {} + StmtKind::Assert { .. } => {} + StmtKind::Import { .. } => {} + StmtKind::ImportFrom { + level: _, + module: _, + names, + } => { + for alias in names { + if alias.name == "*" { + messages.push(Message::ImportStarUsage { + filename: path.to_path_buf(), + location: *location, + }); + } + } + } + StmtKind::Global { .. } => {} + StmtKind::Nonlocal { .. } => {} + StmtKind::Expr { .. } => {} + StmtKind::Pass => {} + StmtKind::Break => {} + StmtKind::Continue => {} + } + } + messages +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000000..665e3ecdcc --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,5 @@ +mod cache; +mod check; +pub mod linter; +pub mod message; +mod parser; diff --git a/src/linter.rs b/src/linter.rs new file mode 100644 index 0000000000..534754c933 --- /dev/null +++ b/src/linter.rs @@ -0,0 +1,45 @@ +use std::path::Path; + +use anyhow::Result; +use log::debug; +use serde::{Deserialize, Serialize}; + +use crate::check::check_ast; +use crate::message::Message; +use crate::{cache, parser}; + +#[derive(Serialize, Deserialize)] +struct CacheMetadata { + size: u64, + mtime: i64, +} + +#[derive(Serialize, Deserialize)] +struct CheckResult { + metadata: CacheMetadata, + messages: Vec, +} + +pub fn check_path(path: &Path) -> Result> { + // TODO(charlie): These specific files are causing a stack overflow. + if path.to_string_lossy().eq_ignore_ascii_case( + "../../spring-experiments/spr_experiments/spr_experiments/assayworks/experiments/order_20220204/pipeline_steps.py") || + path.to_string_lossy().eq_ignore_ascii_case + ( "../../spring-experiments/spr_platform/spr_platform/data_index/bigquery_index.py") + { + return Ok(vec![]); + } + + // Check the cache. + if let Some(messages) = cache::get(path) { + debug!("Cache hit for: {}", path.to_string_lossy()); + return Ok(messages); + } + + // Run the linter. + let python_ast = parser::parse(path)?; + let messages = check_ast(path, &python_ast); + cache::set(path, &messages); + + Ok(messages) +} diff --git a/src/message.rs b/src/message.rs new file mode 100644 index 0000000000..5f24530d49 --- /dev/null +++ b/src/message.rs @@ -0,0 +1,69 @@ +use colored::Colorize; +use rustpython_parser::ast::Location; +use serde::{Deserialize, Serialize}; +use std::fmt; +use std::path::PathBuf; + +#[derive(Serialize, Deserialize)] +#[serde(remote = "Location")] +struct LocationDef { + #[serde(getter = "Location::row")] + row: usize, + #[serde(getter = "Location::column")] + column: usize, +} + +impl From for Location { + fn from(def: LocationDef) -> Location { + Location::new(def.row, def.column) + } +} + +#[derive(Serialize, Deserialize)] +pub enum Message { + ImportStarUsage { + filename: PathBuf, + #[serde(with = "LocationDef")] + location: Location, + }, +} + +impl Message { + /// A four-letter shorthand code for the message. + pub fn code(&self) -> &'static str { + match self { + Message::ImportStarUsage { + filename: _, + location: _, + } => "F403", + } + } + + /// The body text for the message. + pub fn body(&self) -> &'static str { + match self { + Message::ImportStarUsage { + filename: _, + location: _, + } => "Unable to detect undefined names", + } + } +} + +impl fmt::Display for Message { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + Message::ImportStarUsage { filename, location } => write!( + f, + "{}{}{}{}{}\t{}\t{}", + filename.to_string_lossy().white().bold(), + ":".cyan(), + location.column(), + ":".cyan(), + location.row(), + self.code().red().bold(), + self.body() + ), + } + } +} diff --git a/src/parser.rs b/src/parser.rs new file mode 100644 index 0000000000..57f8d5734b --- /dev/null +++ b/src/parser.rs @@ -0,0 +1,14 @@ +use anyhow::Result; +use rustpython_parser::ast::Suite; +use rustpython_parser::parser; +use std::fs::File; +use std::io::{BufReader, Read}; +use std::path::Path; + +pub fn parse(path: &Path) -> Result { + let file = File::open(path)?; + let mut buf_reader = BufReader::new(file); + let mut contents = String::new(); + buf_reader.read_to_string(&mut contents)?; + parser::parse_program(&contents).map_err(|e| e.into()) +} diff --git a/test_sources/__init__.py b/test_sources/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test_sources/bar.py b/test_sources/bar.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/test_sources/foo.py b/test_sources/foo.py new file mode 100644 index 0000000000..a8caefaa8c --- /dev/null +++ b/test_sources/foo.py @@ -0,0 +1 @@ +from bar import *