Initial commit for linter prototype

This commit is contained in:
Charles Marsh 2022-08-09 13:19:19 -04:00
commit 0d8e4bd6e9
13 changed files with 394 additions and 0 deletions

24
.gitignore vendored Normal file
View file

@ -0,0 +1,24 @@
# Local cache
.cache
# IntelliJ
.idea
###
# Rust.gitignore
###
# Generated by Cargo
# will have compiled files and executables
debug/
target/
# Remove Cargo.lock from gitignore if creating an executable, leave it for libraries
# More information here https://doc.rust-lang.org/cargo/guide/cargo-toml-vs-cargo-lock.html
Cargo.lock
# These are backup files generated by rustfmt
**/*.rs.bk
# MSVC Windows builds of rustc generate these, which store debugging information
*.pdb

20
Cargo.toml Normal file
View file

@ -0,0 +1,20 @@
[package]
name = "rust-python-linter"
version = "0.1.0"
edition = "2021"
[dependencies]
anyhow = { version = "1.0.60" }
bincode = "1.3.3"
cacache = "10.0.1"
chrono = "0.4.21"
clap = { version = "3.2.16", features = ["derive"] }
colored = { version = "2.0.0" }
fern = { version = "0.6.1" }
log = "0.4.17"
rayon = "1.5.3"
rustpython-parser = { git = "https://github.com/RustPython/RustPython.git" }
serde = { version = "1", features = ["derive"] }
serde_json = "1"
walkdir = { version = "2.3.2" }

1
README.md Normal file
View file

@ -0,0 +1 @@
# rust-python-linter

91
src/bin/main.rs Normal file
View file

@ -0,0 +1,91 @@
use std::path::PathBuf;
use std::time::Instant;
use anyhow::Result;
use clap::{Parser, ValueHint};
use log::info;
use rayon::prelude::*;
use walkdir::{DirEntry, WalkDir};
use rust_python_linter::linter::check_path;
use rust_python_linter::message::Message;
fn set_up_logging(verbose: bool) -> Result<()> {
fern::Dispatch::new()
.format(|out, message, record| {
out.finish(format_args!(
"{}[{}][{}] {}",
chrono::Local::now().format("[%Y-%m-%d][%H:%M:%S]"),
record.target(),
record.level(),
message
))
})
.level(if verbose {
log::LevelFilter::Debug
} else {
log::LevelFilter::Info
})
.level_for("hyper", log::LevelFilter::Info)
.chain(std::io::stdout())
.apply()
.map_err(|e| e.into())
}
#[derive(Debug, Parser)]
#[clap(name = "rust-python-linter")]
#[clap(about = "A bare-bones Python linter written in Rust", long_about = None)]
struct Cli {
#[clap(name = "filename", parse(from_os_str), value_hint = ValueHint::DirPath)]
filename: PathBuf,
#[clap(short, long, action)]
verbose: bool,
// /// Files to process
// #[clap(name = "FILE", parse(from_os_str), value_hint = ValueHint::AnyPath)]
// files: Vec<PathBuf>,
}
fn is_not_hidden(entry: &DirEntry) -> bool {
entry
.file_name()
.to_str()
.map(|s| entry.depth() == 0 || !s.starts_with('.'))
.unwrap_or(false)
}
fn main() -> Result<()> {
let cli = Cli::parse();
set_up_logging(cli.verbose)?;
// Collect all the files to check.
let start = Instant::now();
let files: Vec<DirEntry> = WalkDir::new(cli.filename)
.follow_links(true)
.into_iter()
.filter_entry(is_not_hidden)
.filter_map(|entry| entry.ok())
.filter(|entry| entry.path().to_string_lossy().ends_with(".py"))
.collect();
let duration = start.elapsed();
info!("Identified files to lint in: {:?}", duration);
let start = Instant::now();
let messages: Vec<Message> = files
.par_iter()
.map(|entry| check_path(entry.path()).unwrap())
.flatten()
.collect();
let duration = start.elapsed();
info!("Checked files in: {:?}", duration);
if !messages.is_empty() {
println!("Found {} error(s)!", messages.len());
for message in messages {
println!("{}", message);
}
}
Ok(())
}

67
src/cache.rs Normal file
View file

@ -0,0 +1,67 @@
use std::borrow::Cow;
use std::os::unix::fs::MetadataExt;
use std::path::Path;
use serde::{Deserialize, Serialize};
use crate::message::Message;
#[derive(Serialize, Deserialize)]
struct CacheMetadata {
size: u64,
mtime: i64,
}
#[derive(Serialize)]
struct CheckResultRef<'a> {
metadata: &'a CacheMetadata,
messages: &'a [Message],
}
#[derive(Deserialize)]
struct CheckResult {
metadata: CacheMetadata,
messages: Vec<Message>,
}
fn cache_dir() -> &'static str {
"./.cache"
}
fn cache_key(path: &Path) -> Cow<str> {
path.to_string_lossy()
}
pub fn get(path: &Path) -> Option<Vec<Message>> {
if let Ok(encoded) = cacache::read_sync(cache_dir(), cache_key(path)) {
if let Ok(file_metadata) = path.metadata() {
if let Ok(CheckResult { metadata, messages }) =
bincode::deserialize::<CheckResult>(&encoded[..])
{
if file_metadata.size() == metadata.size && file_metadata.mtime() == metadata.mtime
{
return Some(messages);
}
}
}
}
None
}
pub fn set(path: &Path, messages: &[Message]) {
if let Ok(metadata) = path.metadata() {
let check_result = CheckResultRef {
metadata: &CacheMetadata {
size: metadata.size(),
mtime: metadata.mtime(),
},
messages,
};
let _ = cacache::write_sync(
cache_dir(),
cache_key(path),
bincode::serialize(&check_result).unwrap(),
);
}
}

57
src/check.rs Normal file
View file

@ -0,0 +1,57 @@
use std::path::Path;
use rustpython_parser::ast::{Located, StmtKind, Suite};
use crate::message::Message;
pub fn check_ast(path: &Path, python_ast: &Suite) -> Vec<Message> {
let mut messages: Vec<Message> = vec![];
for statement in python_ast {
let Located {
location,
custom: _,
node,
} = statement;
match node {
StmtKind::FunctionDef { .. } => {}
StmtKind::AsyncFunctionDef { .. } => {}
StmtKind::ClassDef { .. } => {}
StmtKind::Return { .. } => {}
StmtKind::Delete { .. } => {}
StmtKind::Assign { .. } => {}
StmtKind::AugAssign { .. } => {}
StmtKind::AnnAssign { .. } => {}
StmtKind::For { .. } => {}
StmtKind::AsyncFor { .. } => {}
StmtKind::While { .. } => {}
StmtKind::If { .. } => {}
StmtKind::With { .. } => {}
StmtKind::AsyncWith { .. } => {}
StmtKind::Raise { .. } => {}
StmtKind::Try { .. } => {}
StmtKind::Assert { .. } => {}
StmtKind::Import { .. } => {}
StmtKind::ImportFrom {
level: _,
module: _,
names,
} => {
for alias in names {
if alias.name == "*" {
messages.push(Message::ImportStarUsage {
filename: path.to_path_buf(),
location: *location,
});
}
}
}
StmtKind::Global { .. } => {}
StmtKind::Nonlocal { .. } => {}
StmtKind::Expr { .. } => {}
StmtKind::Pass => {}
StmtKind::Break => {}
StmtKind::Continue => {}
}
}
messages
}

5
src/lib.rs Normal file
View file

@ -0,0 +1,5 @@
mod cache;
mod check;
pub mod linter;
pub mod message;
mod parser;

45
src/linter.rs Normal file
View file

@ -0,0 +1,45 @@
use std::path::Path;
use anyhow::Result;
use log::debug;
use serde::{Deserialize, Serialize};
use crate::check::check_ast;
use crate::message::Message;
use crate::{cache, parser};
#[derive(Serialize, Deserialize)]
struct CacheMetadata {
size: u64,
mtime: i64,
}
#[derive(Serialize, Deserialize)]
struct CheckResult {
metadata: CacheMetadata,
messages: Vec<Message>,
}
pub fn check_path(path: &Path) -> Result<Vec<Message>> {
// TODO(charlie): These specific files are causing a stack overflow.
if path.to_string_lossy().eq_ignore_ascii_case(
"../../spring-experiments/spr_experiments/spr_experiments/assayworks/experiments/order_20220204/pipeline_steps.py") ||
path.to_string_lossy().eq_ignore_ascii_case
( "../../spring-experiments/spr_platform/spr_platform/data_index/bigquery_index.py")
{
return Ok(vec![]);
}
// Check the cache.
if let Some(messages) = cache::get(path) {
debug!("Cache hit for: {}", path.to_string_lossy());
return Ok(messages);
}
// Run the linter.
let python_ast = parser::parse(path)?;
let messages = check_ast(path, &python_ast);
cache::set(path, &messages);
Ok(messages)
}

69
src/message.rs Normal file
View file

@ -0,0 +1,69 @@
use colored::Colorize;
use rustpython_parser::ast::Location;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::path::PathBuf;
#[derive(Serialize, Deserialize)]
#[serde(remote = "Location")]
struct LocationDef {
#[serde(getter = "Location::row")]
row: usize,
#[serde(getter = "Location::column")]
column: usize,
}
impl From<LocationDef> for Location {
fn from(def: LocationDef) -> Location {
Location::new(def.row, def.column)
}
}
#[derive(Serialize, Deserialize)]
pub enum Message {
ImportStarUsage {
filename: PathBuf,
#[serde(with = "LocationDef")]
location: Location,
},
}
impl Message {
/// A four-letter shorthand code for the message.
pub fn code(&self) -> &'static str {
match self {
Message::ImportStarUsage {
filename: _,
location: _,
} => "F403",
}
}
/// The body text for the message.
pub fn body(&self) -> &'static str {
match self {
Message::ImportStarUsage {
filename: _,
location: _,
} => "Unable to detect undefined names",
}
}
}
impl fmt::Display for Message {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
Message::ImportStarUsage { filename, location } => write!(
f,
"{}{}{}{}{}\t{}\t{}",
filename.to_string_lossy().white().bold(),
":".cyan(),
location.column(),
":".cyan(),
location.row(),
self.code().red().bold(),
self.body()
),
}
}
}

14
src/parser.rs Normal file
View file

@ -0,0 +1,14 @@
use anyhow::Result;
use rustpython_parser::ast::Suite;
use rustpython_parser::parser;
use std::fs::File;
use std::io::{BufReader, Read};
use std::path::Path;
pub fn parse(path: &Path) -> Result<Suite> {
let file = File::open(path)?;
let mut buf_reader = BufReader::new(file);
let mut contents = String::new();
buf_reader.read_to_string(&mut contents)?;
parser::parse_program(&contents).map_err(|e| e.into())
}

0
test_sources/__init__.py Normal file
View file

0
test_sources/bar.py Normal file
View file

1
test_sources/foo.py Normal file
View file

@ -0,0 +1 @@
from bar import *