Read black options in format_dev script (#5827)

## Summary

Comparing repos with black requires that we use the settings as black,
notably line length and magic trailing comma behaviour. Excludes and
preserving quotes (vs. a preference for either quote style) is not yet
implemented because they weren't needed for the test projects.

In the other two commits i fixed the output when the progress bar is
hidden (this way is recommonded in the indicatif docs), added a
`scratch.pyi` file to gitignore because black formats stub files
differently and also updated the ecosystem readme with the projects json
without forks.

## Test Plan

I added a `line-length` vs `line_length` test. Otherwise only my
personal usage atm, a PR to integrate the script into the CI to check
some projects will follow.
This commit is contained in:
konsti 2023-07-17 15:29:43 +02:00 committed by GitHub
parent 21063544f7
commit 7dd30f0270
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 187 additions and 27 deletions

2
.gitignore vendored
View file

@ -10,7 +10,7 @@ schemastore
# `maturin develop` and ecosystem_all_check.sh
.venv*
# Formatter debugging (crates/ruff_python_formatter/README.md)
scratch.py
scratch.*
# Created by `perf` (CONTRIBUTING.md)
perf.data
perf.data.old

21
Cargo.lock generated
View file

@ -963,6 +963,12 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "indoc"
version = "2.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c785eefb63ebd0e33416dfcb8d6da0bf27ce752843a45632a67bf10d4d4b5c4"
[[package]]
name = "inotify"
version = "0.9.6"
@ -1987,6 +1993,7 @@ dependencies = [
"clap",
"ignore",
"indicatif",
"indoc",
"itertools",
"libcst",
"log",
@ -2004,11 +2011,13 @@ dependencies = [
"rustpython-format",
"rustpython-parser",
"schemars",
"serde",
"serde_json",
"similar",
"strum",
"strum_macros",
"tempfile",
"toml",
]
[[package]]
@ -2783,9 +2792,9 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
[[package]]
name = "toml"
version = "0.7.5"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ebafdf5ad1220cb59e7d17cf4d2c72015297b75b19a10472f99b89225089240"
checksum = "c17e963a819c331dcacd7ab957d80bc2b9a9c1e71c804826d2f283dd65306542"
dependencies = [
"serde",
"serde_spanned",
@ -2804,9 +2813,9 @@ dependencies = [
[[package]]
name = "toml_edit"
version = "0.19.11"
version = "0.19.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "266f016b7f039eec8a1a80dfe6156b633d208b9fccca5e4db1d6775b0c4e34a7"
checksum = "5f8751d9c1b03c6500c387e96f81f815a4f8e72d142d2d4a9ffa6fedd51ddee7"
dependencies = [
"indexmap 2.0.0",
"serde",
@ -3339,9 +3348,9 @@ checksum = "1a515f5799fe4961cb532f983ce2b23082366b898e52ffbce459c86f67c8378a"
[[package]]
name = "winnow"
version = "0.4.7"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca0ace3845f0d96209f0375e6d367e3eb87eb65d27d445bdc9f1843a26f39448"
checksum = "81fac9742fd1ad1bd9643b991319f72dd031016d44b77039a26977eb667141e7"
dependencies = [
"memchr",
]

View file

@ -28,13 +28,18 @@ libcst = { workspace = true }
log = { workspace = true }
once_cell = { workspace = true }
pretty_assertions = { version = "1.3.0" }
regex = { workspace = true }
rayon = "1.7.0"
regex = { workspace = true }
rustpython-format = { workspace = true }
rustpython-parser = { workspace = true }
schemars = { workspace = true }
serde = { workspace = true, features = ["derive"] }
serde_json = { workspace = true }
similar = { workspace = true }
strum = { workspace = true }
strum_macros = { workspace = true }
tempfile = "3.6.0"
toml = { workspace = true, features = ["parse"] }
[dev-dependencies]
indoc = "2.0.3"

View file

@ -1,15 +1,18 @@
use anyhow::{bail, Context};
use anyhow::{bail, format_err, Context};
use clap::{CommandFactory, FromArgMatches};
use ignore::DirEntry;
use indicatif::ProgressBar;
use log::debug;
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use ruff::resolver::python_files_in_path;
use ruff::settings::types::{FilePattern, FilePatternSet};
use ruff_cli::args::CheckArgs;
use ruff_cli::resolve::resolve;
use ruff_formatter::{FormatError, PrintError};
use ruff_python_formatter::{format_module, FormatModuleError, PyFormatOptions};
use ruff_formatter::{FormatError, LineWidth, PrintError};
use ruff_python_formatter::{
format_module, FormatModuleError, MagicTrailingComma, PyFormatOptions,
};
use serde::Deserialize;
use similar::{ChangeTag, TextDiff};
use std::fmt::{Display, Formatter};
use std::fs::File;
@ -225,6 +228,7 @@ enum Message {
fn format_dev_multi_project(args: &Args) -> bool {
let mut total_errors = 0;
let mut total_files = 0;
let mut num_projects: usize = 0;
let start = Instant::now();
rayon::scope(|scope| {
@ -233,6 +237,7 @@ fn format_dev_multi_project(args: &Args) -> bool {
// Workers, to check is subdirectory in parallel
for base_dir in &args.files {
for dir in base_dir.read_dir().unwrap() {
num_projects += 1;
let path = dir.unwrap().path().clone();
let sender = sender.clone();
@ -255,36 +260,39 @@ fn format_dev_multi_project(args: &Args) -> bool {
}
// Main thread, writing to stdout
#[allow(clippy::print_stdout)]
scope.spawn(|_| {
let mut error_file = args.error_file.as_ref().map(|error_file| {
BufWriter::new(File::create(error_file).expect("Couldn't open error file"))
});
let bar = ProgressBar::new(args.files.len() as u64);
let bar = ProgressBar::new(num_projects as u64);
for message in receiver {
match message {
Message::Start { path } => {
bar.println(path.display().to_string());
bar.suspend(|| println!("Starting {}", path.display()));
}
Message::Finished { path, result } => {
total_errors += result.error_count();
total_files += result.file_count;
bar.println(format!(
"Finished {} with {} files (similarity index {:.3}) in {:.2}s",
path.display(),
result.file_count,
result.statistics.similarity_index(),
result.duration.as_secs_f32(),
));
bar.println(result.display(args.format).to_string().trim_end());
bar.suspend(|| {
println!(
"Finished {} with {} files (similarity index {:.3}) in {:.2}s",
path.display(),
result.file_count,
result.statistics.similarity_index(),
result.duration.as_secs_f32(),
);
});
bar.suspend(|| print!("{}", result.display(args.format)));
if let Some(error_file) = &mut error_file {
write!(error_file, "{}", result.display(args.format)).unwrap();
}
bar.inc(1);
}
Message::Failed { path, error } => {
bar.println(format!("Failed {}: {}", path.display(), error));
bar.suspend(|| println!("Failed {}: {}", path.display(), error));
bar.inc(1);
}
}
@ -314,6 +322,13 @@ fn format_dev_project(
) -> anyhow::Result<CheckRepoResult> {
let start = Instant::now();
// TODO(konstin): The assumptions between this script (one repo) and ruff (pass in a bunch of
// files) mismatch.
let options = BlackOptions::from_file(&files[0])?.to_py_format_options();
debug!("Options for {}: {options:?}", files[0].display());
// TODO(konstin): black excludes
// Find files to check (or in this case, format twice). Adapted from ruff_cli
// First argument is ignored
let paths = ruff_check_paths(files)?;
@ -335,7 +350,9 @@ fn format_dev_project(
let file = dir_entry.path().to_path_buf();
// Handle panics (mostly in `debug_assert!`)
let result = match catch_unwind(|| format_dev_file(&file, stability_check, write)) {
let result = match catch_unwind(|| {
format_dev_file(&file, stability_check, write, options.clone())
}) {
Ok(result) => result,
Err(panic) => {
if let Some(message) = panic.downcast_ref::<String>() {
@ -600,11 +617,12 @@ fn format_dev_file(
input_path: &Path,
stability_check: bool,
write: bool,
options: PyFormatOptions,
) -> Result<Statistics, CheckFileError> {
let content = fs::read_to_string(input_path)?;
#[cfg(not(debug_assertions))]
let start = Instant::now();
let printed = match format_module(&content, PyFormatOptions::default()) {
let printed = match format_module(&content, options.clone()) {
Ok(printed) => printed,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInInput(err));
@ -631,7 +649,7 @@ fn format_dev_file(
}
if stability_check {
let reformatted = match format_module(formatted, PyFormatOptions::default()) {
let reformatted = match format_module(formatted, options) {
Ok(reformatted) => reformatted,
Err(err @ (FormatModuleError::LexError(_) | FormatModuleError::ParseError(_))) => {
return Err(CheckFileError::SyntaxErrorInOutput {
@ -662,3 +680,131 @@ fn format_dev_file(
Ok(Statistics::from_versions(&content, formatted))
}
#[derive(Deserialize, Default)]
struct PyprojectToml {
tool: Option<PyprojectTomlTool>,
}
#[derive(Deserialize, Default)]
struct PyprojectTomlTool {
black: Option<BlackOptions>,
}
#[derive(Deserialize, Debug)]
#[serde(default)]
struct BlackOptions {
// Black actually allows both snake case and kebab case
#[serde(alias = "line-length")]
line_length: u16,
#[serde(alias = "skip-magic-trailing-comma")]
skip_magic_trailing_comma: bool,
#[allow(unused)]
#[serde(alias = "force-exclude")]
force_exclude: Option<String>,
}
impl Default for BlackOptions {
fn default() -> Self {
Self {
line_length: 88,
skip_magic_trailing_comma: false,
force_exclude: None,
}
}
}
impl BlackOptions {
/// TODO(konstin): For the real version, fix printing of error chains and remove the path
/// argument
fn from_toml(toml: &str, path: &Path) -> anyhow::Result<Self> {
let pyproject_toml: PyprojectToml = toml::from_str(toml).map_err(|e| {
format_err!(
"Not a valid pyproject.toml toml file at {}: {e}",
path.display()
)
})?;
let black_options = pyproject_toml
.tool
.unwrap_or_default()
.black
.unwrap_or_default();
debug!(
"Found {}, setting black options: {:?}",
path.display(),
&black_options
);
Ok(black_options)
}
fn from_file(repo: &Path) -> anyhow::Result<Self> {
let path = repo.join("pyproject.toml");
if !path.is_file() {
debug!(
"No pyproject.toml at {}, using black option defaults",
path.display()
);
return Ok(Self::default());
}
Self::from_toml(&fs::read_to_string(&path)?, repo)
}
fn to_py_format_options(&self) -> PyFormatOptions {
let mut options = PyFormatOptions::default();
options
.with_line_width(
LineWidth::try_from(self.line_length).expect("Invalid line length limit"),
)
.with_magic_trailing_comma(if self.skip_magic_trailing_comma {
MagicTrailingComma::Ignore
} else {
MagicTrailingComma::Respect
});
options
}
}
#[cfg(test)]
mod tests {
use crate::format_dev::BlackOptions;
use indoc::indoc;
use ruff_formatter::{FormatOptions, LineWidth};
use ruff_python_formatter::MagicTrailingComma;
use std::path::Path;
#[test]
fn test_transformers() {
let toml = indoc! {"
[tool.black]
line-length = 119
target-version = ['py37']
"};
let options = BlackOptions::from_toml(toml, Path::new("pyproject.toml"))
.unwrap()
.to_py_format_options();
assert_eq!(options.line_width(), LineWidth::try_from(119).unwrap());
assert!(matches!(
options.magic_trailing_comma(),
MagicTrailingComma::Respect
));
}
#[test]
fn test_typeshed() {
let toml = indoc! {r#"
[tool.black]
line_length = 130
target_version = ["py310"]
skip_magic_trailing_comma = true
force-exclude = ".*_pb2.pyi"
"#};
let options = BlackOptions::from_toml(toml, Path::new("pyproject.toml"))
.unwrap()
.to_py_format_options();
assert_eq!(options.line_width(), LineWidth::try_from(130).unwrap());
assert!(matches!(
options.magic_trailing_comma(),
MagicTrailingComma::Ignore
));
}
}

View file

@ -271,7 +271,7 @@ It is also possible large number of repositories using ruff. This dataset is lar
only do this occasionally:
```shell
curl https://raw.githubusercontent.com/akx/ruff-usage-aggregate/master/data/known-github-tomls.jsonl > github_search.jsonl
curl https://raw.githubusercontent.com/akx/ruff-usage-aggregate/master/data/known-github-tomls-clean.jsonl> github_search.jsonl
python scripts/check_ecosystem.py --checkouts target/checkouts --projects github_search.jsonl -v $(which true) $(which true)
cargo run --bin ruff_dev -- format-dev --stability-check --multi-project target/checkouts
```