Add --diff option ruff format (#7937)

**Summary** `ruff format --diff` is similar to `ruff format --check`,
but we don't only error with the list of file that would be formatted,
but also show a diff between the unformatted input and the formatted
output.

```console
$ ruff format --diff scratch.py scratch.pyi scratch.ipynb
warning: `ruff format` is not yet stable, and subject to change in future versions.
--- scratch.ipynb
+++ scratch.ipynb
@@ -1,3 +1,4 @@
 import numpy
-maths = (numpy.arange(100)**2).sum()
-stats= numpy.asarray([1,2,3,4]).median()
+
+maths = (numpy.arange(100) ** 2).sum()
+stats = numpy.asarray([1, 2, 3, 4]).median()
--- scratch.py
+++ scratch.py
@@ -1,3 +1,3 @@
 x = 1
-y=2
+y = 2
 z = 3
2 files would be reformatted, 1 file left unchanged
```

With `--diff`, the summary message gets printed to stderr to allow e.g.
`ruff format --diff . > format.patch`.

At the moment, jupyter notebooks are formatted as code diffs, while
everything else is a real diff that could be applied. This means that
the diffs containing jupyter notebooks are not real diffs and can't be
applied. We could change this to json diffs, but they are hard to read.
We could also split the diff option into a human diff option, where we
deviate from the machine readable diff constraints, and a proper machine
readable, appliable diff output that you can pipe into other tools.

To make the tests work, the results (and errors, if any) are sorted
before printing them. Previously, the print order was random, i.e. two
identical runs could have different output.

Open question: Should this go into the markdown docs? Or will this be
subsumed by the integration of the formatter into `ruff check`?

**Test plan** Fixtures for the change and no change cases, including a
jupyter notebook and for file input and stdin.

Fixes #7231

---------

Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
konsti 2023-10-18 13:55:05 +02:00 committed by GitHub
parent 0c3123e07e
commit 51aa73f405
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 347 additions and 93 deletions

View file

@ -1,5 +1,7 @@
use std::fmt::{Display, Formatter};
use std::fs::File;
use std::io;
use std::io::{stderr, stdout, Write};
use std::path::{Path, PathBuf};
use std::time::Instant;
@ -9,6 +11,7 @@ use itertools::Itertools;
use log::error;
use rayon::iter::Either::{Left, Right};
use rayon::iter::{IntoParallelIterator, ParallelIterator};
use similar::TextDiff;
use thiserror::Error;
use tracing::debug;
@ -34,6 +37,20 @@ pub(crate) enum FormatMode {
Write,
/// Check if the file is formatted, but do not write the formatted contents back.
Check,
/// Check if the file is formatted, show a diff if not.
Diff,
}
impl FormatMode {
pub(crate) fn from_cli(cli: &FormatArguments) -> Self {
if cli.diff {
FormatMode::Diff
} else if cli.check {
FormatMode::Check
} else {
FormatMode::Write
}
}
}
/// Format a set of files, and return the exit status.
@ -48,11 +65,7 @@ pub(crate) fn format(
overrides,
cli.stdin_filename.as_deref(),
)?;
let mode = if cli.check {
FormatMode::Check
} else {
FormatMode::Write
};
let mode = FormatMode::from_cli(cli);
let (paths, resolver) = python_files_in_path(&cli.files, &pyproject_config, overrides)?;
if paths.is_empty() {
@ -61,7 +74,7 @@ pub(crate) fn format(
}
let start = Instant::now();
let (mut results, errors): (Vec<_>, Vec<_>) = paths
let (mut results, mut errors): (Vec<_>, Vec<_>) = paths
.into_par_iter()
.filter_map(|entry| {
match entry {
@ -85,12 +98,31 @@ pub(crate) fn format(
return None;
}
// Extract the sources from the file.
let unformatted = match SourceKind::from_path(path, source_type) {
Ok(Some(source_kind)) => source_kind,
Ok(None) => return None,
Err(err) => {
return Some(Err(FormatCommandError::Read(
Some(path.to_path_buf()),
err,
)));
}
};
Some(
match catch_unwind(|| {
format_path(path, &resolved_settings.formatter, source_type, mode)
format_path(
path,
&resolved_settings.formatter,
&unformatted,
source_type,
mode,
)
}) {
Ok(inner) => inner.map(|result| FormatPathResult {
path: resolved_file.into_path(),
unformatted,
result,
}),
Err(error) => Err(FormatCommandError::Panic(
@ -109,6 +141,27 @@ pub(crate) fn format(
});
let duration = start.elapsed();
// Make output deterministic, at least as long as we have a path
results.sort_unstable_by(|x, y| x.path.cmp(&y.path));
errors.sort_by(|x, y| {
fn get_key(error: &FormatCommandError) -> Option<&PathBuf> {
match &error {
FormatCommandError::Ignore(ignore) => {
if let ignore::Error::WithPath { path, .. } = ignore {
Some(path)
} else {
None
}
}
FormatCommandError::Panic(path, _)
| FormatCommandError::Read(path, _)
| FormatCommandError::Format(path, _)
| FormatCommandError::Write(path, _) => path.as_ref(),
}
}
get_key(x).cmp(&get_key(y))
});
debug!(
"Formatted {} files in {:.2?}",
results.len() + errors.len(),
@ -121,14 +174,19 @@ pub(crate) fn format(
}
results.sort_unstable_by(|a, b| a.path.cmp(&b.path));
let results = FormatResults::new(results.as_slice(), mode);
let summary = FormatSummary::new(results.as_slice(), mode);
if mode.is_diff() {
results.write_diff(&mut stdout().lock())?;
}
// Report on the formatting changes.
if log_level >= LogLevel::Default {
#[allow(clippy::print_stdout)]
{
println!("{summary}");
if mode.is_diff() {
// Allow piping the diff to e.g. a file by writing the summary to stderr
results.write_summary(&mut stderr().lock())?;
} else {
results.write_summary(&mut stdout().lock())?;
}
}
@ -140,9 +198,9 @@ pub(crate) fn format(
Ok(ExitStatus::Error)
}
}
FormatMode::Check => {
FormatMode::Check | FormatMode::Diff => {
if errors.is_empty() {
if summary.any_formatted() {
if results.any_formatted() {
Ok(ExitStatus::Failure)
} else {
Ok(ExitStatus::Success)
@ -159,57 +217,43 @@ pub(crate) fn format(
fn format_path(
path: &Path,
settings: &FormatterSettings,
unformatted: &SourceKind,
source_type: PySourceType,
mode: FormatMode,
) -> Result<FormatResult, FormatCommandError> {
// Extract the sources from the file.
let source_kind = match SourceKind::from_path(path, source_type) {
Ok(Some(source_kind)) => source_kind,
Ok(None) => return Ok(FormatResult::Unchanged),
Err(err) => {
return Err(FormatCommandError::Read(Some(path.to_path_buf()), err));
}
};
// Format the source.
match format_source(source_kind, source_type, Some(path), settings)? {
FormattedSource::Formatted(formatted) => {
if mode.is_write() {
let format_result = match format_source(unformatted, source_type, Some(path), settings)? {
FormattedSource::Formatted(formatted) => match mode {
FormatMode::Write => {
let mut writer = File::create(path).map_err(|err| {
FormatCommandError::Write(Some(path.to_path_buf()), err.into())
})?;
formatted
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(Some(path.to_path_buf()), err))?;
FormatResult::Formatted
}
Ok(FormatResult::Formatted)
}
FormattedSource::Unchanged(_) => Ok(FormatResult::Unchanged),
}
FormatMode::Check => FormatResult::Formatted,
FormatMode::Diff => FormatResult::Diff(formatted),
},
FormattedSource::Unchanged => FormatResult::Unchanged,
};
Ok(format_result)
}
#[derive(Debug)]
pub(crate) enum FormattedSource {
/// The source was formatted, and the [`SourceKind`] contains the transformed source code.
Formatted(SourceKind),
/// The source was unchanged, and the [`SourceKind`] contains the original source code.
Unchanged(SourceKind),
/// The source was unchanged.
Unchanged,
}
impl From<FormattedSource> for FormatResult {
fn from(value: FormattedSource) -> Self {
match value {
FormattedSource::Formatted(_) => FormatResult::Formatted,
FormattedSource::Unchanged(_) => FormatResult::Unchanged,
}
}
}
impl FormattedSource {
pub(crate) fn source_kind(&self) -> &SourceKind {
match self {
FormattedSource::Formatted(source_kind) => source_kind,
FormattedSource::Unchanged(source_kind) => source_kind,
FormattedSource::Unchanged => FormatResult::Unchanged,
}
}
}
@ -217,30 +261,28 @@ impl FormattedSource {
/// Format a [`SourceKind`], returning the transformed [`SourceKind`], or `None` if the source was
/// unchanged.
pub(crate) fn format_source(
source_kind: SourceKind,
source_kind: &SourceKind,
source_type: PySourceType,
path: Option<&Path>,
settings: &FormatterSettings,
) -> Result<FormattedSource, FormatCommandError> {
match source_kind {
SourceKind::Python(unformatted) => {
let options = settings.to_format_options(source_type, &unformatted);
let options = settings.to_format_options(source_type, unformatted);
let formatted = format_module_source(&unformatted, options)
let formatted = format_module_source(unformatted, options)
.map_err(|err| FormatCommandError::Format(path.map(Path::to_path_buf), err))?;
let formatted = formatted.into_code();
if formatted.len() == unformatted.len() && formatted == *unformatted {
Ok(FormattedSource::Unchanged(SourceKind::Python(unformatted)))
Ok(FormattedSource::Unchanged)
} else {
Ok(FormattedSource::Formatted(SourceKind::Python(formatted)))
}
}
SourceKind::IpyNotebook(notebook) => {
if !notebook.is_python_notebook() {
return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook(
notebook,
)));
return Ok(FormattedSource::Unchanged);
}
let options = settings.to_format_options(source_type, notebook.source_code());
@ -288,9 +330,7 @@ pub(crate) fn format_source(
// If the file was unchanged, return `None`.
let (Some(mut output), Some(last)) = (output, last) else {
return Ok(FormattedSource::Unchanged(SourceKind::IpyNotebook(
notebook,
)));
return Ok(FormattedSource::Unchanged);
};
// Add the remaining content.
@ -298,21 +338,23 @@ pub(crate) fn format_source(
output.push_str(slice);
// Update the notebook.
let mut notebook = notebook.clone();
notebook.update(&source_map, output);
let mut formatted = notebook.clone();
formatted.update(&source_map, output);
Ok(FormattedSource::Formatted(SourceKind::IpyNotebook(
notebook,
formatted,
)))
}
}
}
/// The result of an individual formatting operation.
#[derive(Debug, Clone, Copy, is_macro::Is)]
#[derive(Debug, Clone, is_macro::Is)]
pub(crate) enum FormatResult {
/// The file was formatted.
Formatted,
/// The file was formatted, [`SourceKind`] contains the formatted code
Diff(SourceKind),
/// The file was unchanged, as the formatted contents matched the existing contents.
Unchanged,
}
@ -321,38 +363,55 @@ pub(crate) enum FormatResult {
#[derive(Debug)]
struct FormatPathResult {
path: PathBuf,
unformatted: SourceKind,
result: FormatResult,
}
/// A summary of the formatting results.
/// The results of formatting a set of files
#[derive(Debug)]
struct FormatSummary<'a> {
struct FormatResults<'a> {
/// The individual formatting results.
results: &'a [FormatPathResult],
/// The format mode that was used.
mode: FormatMode,
}
impl<'a> FormatSummary<'a> {
impl<'a> FormatResults<'a> {
fn new(results: &'a [FormatPathResult], mode: FormatMode) -> Self {
Self { results, mode }
}
/// Returns `true` if any of the files require formatting.
fn any_formatted(&self) -> bool {
self.results
.iter()
.any(|result| result.result.is_formatted())
self.results.iter().any(|result| match result.result {
FormatResult::Formatted | FormatResult::Diff { .. } => true,
FormatResult::Unchanged => false,
})
}
}
impl Display for FormatSummary<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
fn write_diff(&self, f: &mut impl Write) -> io::Result<()> {
for result in self.results {
if let FormatResult::Diff(formatted) = &result.result {
let text_diff =
TextDiff::from_lines(result.unformatted.source_code(), formatted.source_code());
let mut unified_diff = text_diff.unified_diff();
unified_diff.header(
&fs::relativize_path(&result.path),
&fs::relativize_path(&result.path),
);
unified_diff.to_writer(&mut *f)?;
}
}
Ok(())
}
fn write_summary(&self, f: &mut impl Write) -> io::Result<()> {
// Compute the number of changed and unchanged files.
let mut formatted = 0u32;
let mut changed = 0u32;
let mut unchanged = 0u32;
for result in self.results {
match result.result {
match &result.result {
FormatResult::Formatted => {
// If we're running in check mode, report on any files that would be formatted.
if self.mode.is_check() {
@ -362,39 +421,42 @@ impl Display for FormatSummary<'_> {
fs::relativize_path(&result.path).bold()
)?;
}
formatted += 1;
changed += 1;
}
FormatResult::Unchanged => unchanged += 1,
FormatResult::Diff(_) => {
changed += 1;
}
}
}
// Write out a summary of the formatting results.
if formatted > 0 && unchanged > 0 {
write!(
if changed > 0 && unchanged > 0 {
writeln!(
f,
"{} file{} {}, {} file{} left unchanged",
formatted,
if formatted == 1 { "" } else { "s" },
changed,
if changed == 1 { "" } else { "s" },
match self.mode {
FormatMode::Write => "reformatted",
FormatMode::Check => "would be reformatted",
FormatMode::Check | FormatMode::Diff => "would be reformatted",
},
unchanged,
if unchanged == 1 { "" } else { "s" },
)
} else if formatted > 0 {
write!(
} else if changed > 0 {
writeln!(
f,
"{} file{} {}",
formatted,
if formatted == 1 { "" } else { "s" },
changed,
if changed == 1 { "" } else { "s" },
match self.mode {
FormatMode::Write => "reformatted",
FormatMode::Check => "would be reformatted",
FormatMode::Check | FormatMode::Diff => "would be reformatted",
}
)
} else if unchanged > 0 {
write!(
writeln!(
f,
"{} file{} left unchanged",
unchanged,

View file

@ -3,6 +3,8 @@ use std::path::Path;
use anyhow::Result;
use log::error;
use ruff_linter::fs;
use similar::TextDiff;
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::{PySourceType, SourceType};
@ -10,7 +12,9 @@ use ruff_workspace::resolver::{match_exclusion, python_file_at_path};
use ruff_workspace::FormatterSettings;
use crate::args::{CliOverrides, FormatArguments};
use crate::commands::format::{format_source, FormatCommandError, FormatMode, FormatResult};
use crate::commands::format::{
format_source, FormatCommandError, FormatMode, FormatResult, FormattedSource,
};
use crate::resolve::resolve;
use crate::stdin::read_from_stdin;
use crate::ExitStatus;
@ -23,11 +27,7 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
overrides,
cli.stdin_filename.as_deref(),
)?;
let mode = if cli.check {
FormatMode::Check
} else {
FormatMode::Write
};
let mode = FormatMode::from_cli(cli);
if let Some(filename) = cli.stdin_filename.as_deref() {
if !python_file_at_path(filename, &pyproject_config, overrides)? {
@ -58,7 +58,7 @@ pub(crate) fn format_stdin(cli: &FormatArguments, overrides: &CliOverrides) -> R
) {
Ok(result) => match mode {
FormatMode::Write => Ok(ExitStatus::Success),
FormatMode::Check => {
FormatMode::Check | FormatMode::Diff => {
if result.is_formatted() {
Ok(ExitStatus::Failure)
} else {
@ -93,15 +93,37 @@ fn format_source_code(
};
// Format the source.
let formatted = format_source(source_kind, source_type, path, settings)?;
let formatted = format_source(&source_kind, source_type, path, settings)?;
// Write to stdout regardless of whether the source was formatted.
if mode.is_write() {
let mut writer = stdout().lock();
formatted
.source_kind()
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(path.map(Path::to_path_buf), err))?;
match &formatted {
FormattedSource::Formatted(formatted) => match mode {
FormatMode::Write => {
let mut writer = stdout().lock();
formatted
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(path.map(Path::to_path_buf), err))?;
}
FormatMode::Check => {}
FormatMode::Diff => {
let mut writer = stdout().lock();
let text_diff =
TextDiff::from_lines(source_kind.source_code(), formatted.source_code());
let mut unified_diff = text_diff.unified_diff();
if let Some(path) = path {
unified_diff.header(&fs::relativize_path(path), &fs::relativize_path(path));
}
unified_diff.to_writer(&mut writer).unwrap();
}
},
FormattedSource::Unchanged => {
// Write to stdout regardless of whether the source was formatted
if mode.is_write() {
let mut writer = stdout().lock();
source_kind
.write(&mut writer)
.map_err(|err| FormatCommandError::Write(path.map(Path::to_path_buf), err))?;
}
}
}
Ok(FormatResult::from(formatted))