mirror of
https://github.com/astral-sh/ruff.git
synced 2025-10-01 14:21:53 +00:00
Avoid cloning source code multiple times (#6629)
## Summary In working on https://github.com/astral-sh/ruff/pull/6628, I noticed that we clone the source code contents, potentially multiple times, prior to linting. The issue is that `SourceKind::Python` takes a `String`, so we first have to provide it with a `String`. In the stdin case, that means cloning. However, on top of this, we then have to clone `source_kind.contents()` because `SourceKind` gets mutated. So for stdin, we end up cloning twice. For non-stdin, we end up cloning once, but unnecessarily (since the _contents_ don't get mutated, only the kind). This PR removes the `String` from `source_kind`, instead requiring that we parse it out elsewhere. It reduces the number of clones down to 1 for Jupyter Notebooks, and zero otherwise.
This commit is contained in:
parent
0cea4975fc
commit
2aeb27334d
4 changed files with 164 additions and 100 deletions
|
@ -33,7 +33,7 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
||||||
err
|
err
|
||||||
)
|
)
|
||||||
})?;
|
})?;
|
||||||
let code = notebook.content().to_string();
|
let code = notebook.source_code().to_string();
|
||||||
notebook.update_cell_content(&code);
|
notebook.update_cell_content(&code);
|
||||||
let mut writer = Vec::new();
|
let mut writer = Vec::new();
|
||||||
notebook.write_inner(&mut writer)?;
|
notebook.write_inner(&mut writer)?;
|
||||||
|
@ -103,7 +103,7 @@ pub struct Notebook {
|
||||||
/// separated by a newline and a trailing newline. The trailing newline
|
/// separated by a newline and a trailing newline. The trailing newline
|
||||||
/// is added to make sure that each cell ends with a newline which will
|
/// is added to make sure that each cell ends with a newline which will
|
||||||
/// be removed when updating the cell content.
|
/// be removed when updating the cell content.
|
||||||
content: String,
|
source_code: String,
|
||||||
/// The index of the notebook. This is used to map between the concatenated
|
/// The index of the notebook. This is used to map between the concatenated
|
||||||
/// source code and the original notebook.
|
/// source code and the original notebook.
|
||||||
index: OnceCell<JupyterIndex>,
|
index: OnceCell<JupyterIndex>,
|
||||||
|
@ -132,8 +132,8 @@ impl Notebook {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read the Jupyter Notebook from its JSON string.
|
/// Read the Jupyter Notebook from its JSON string.
|
||||||
pub fn from_contents(contents: &str) -> Result<Self, Box<Diagnostic>> {
|
pub fn from_source_code(source_code: &str) -> Result<Self, Box<Diagnostic>> {
|
||||||
Self::from_reader(Cursor::new(contents))
|
Self::from_reader(Cursor::new(source_code))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Read a Jupyter Notebook from a [`Read`] implementor.
|
/// Read a Jupyter Notebook from a [`Read`] implementor.
|
||||||
|
@ -268,7 +268,7 @@ impl Notebook {
|
||||||
// The additional newline at the end is to maintain consistency for
|
// The additional newline at the end is to maintain consistency for
|
||||||
// all cells. These newlines will be removed before updating the
|
// all cells. These newlines will be removed before updating the
|
||||||
// source code with the transformed content. Refer `update_cell_content`.
|
// source code with the transformed content. Refer `update_cell_content`.
|
||||||
content: contents.join("\n") + "\n",
|
source_code: contents.join("\n") + "\n",
|
||||||
cell_offsets,
|
cell_offsets,
|
||||||
valid_code_cells,
|
valid_code_cells,
|
||||||
trailing_newline,
|
trailing_newline,
|
||||||
|
@ -404,8 +404,8 @@ impl Notebook {
|
||||||
/// Return the notebook content.
|
/// Return the notebook content.
|
||||||
///
|
///
|
||||||
/// This is the concatenation of all Python code cells.
|
/// This is the concatenation of all Python code cells.
|
||||||
pub(crate) fn content(&self) -> &str {
|
pub fn source_code(&self) -> &str {
|
||||||
&self.content
|
&self.source_code
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the Jupyter notebook index.
|
/// Return the Jupyter notebook index.
|
||||||
|
@ -429,7 +429,7 @@ impl Notebook {
|
||||||
// it depends on the offsets to extract the cell content.
|
// it depends on the offsets to extract the cell content.
|
||||||
self.update_cell_offsets(source_map);
|
self.update_cell_offsets(source_map);
|
||||||
self.update_cell_content(transformed);
|
self.update_cell_content(transformed);
|
||||||
self.content = transformed.to_string();
|
self.source_code = transformed.to_string();
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return a slice of [`Cell`] in the Jupyter notebook.
|
/// Return a slice of [`Cell`] in the Jupyter notebook.
|
||||||
|
@ -482,8 +482,8 @@ mod tests {
|
||||||
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
|
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
|
||||||
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
|
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
|
||||||
let path = test_resource_path("fixtures/jupyter/cell").join(path);
|
let path = test_resource_path("fixtures/jupyter/cell").join(path);
|
||||||
let contents = std::fs::read_to_string(path)?;
|
let source_code = std::fs::read_to_string(path)?;
|
||||||
Ok(serde_json::from_str(&contents)?)
|
Ok(serde_json::from_str(&source_code)?)
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
|
@ -536,7 +536,7 @@ mod tests {
|
||||||
fn test_concat_notebook() -> Result<()> {
|
fn test_concat_notebook() -> Result<()> {
|
||||||
let notebook = read_jupyter_notebook(Path::new("valid.ipynb"))?;
|
let notebook = read_jupyter_notebook(Path::new("valid.ipynb"))?;
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
notebook.content,
|
notebook.source_code,
|
||||||
r#"def unused_variable():
|
r#"def unused_variable():
|
||||||
x = 1
|
x = 1
|
||||||
y = 2
|
y = 2
|
||||||
|
|
|
@ -2,19 +2,11 @@ use crate::jupyter::Notebook;
|
||||||
|
|
||||||
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
|
#[derive(Clone, Debug, PartialEq, is_macro::Is)]
|
||||||
pub enum SourceKind {
|
pub enum SourceKind {
|
||||||
Python(String),
|
Python,
|
||||||
Jupyter(Notebook),
|
Jupyter(Notebook),
|
||||||
}
|
}
|
||||||
|
|
||||||
impl SourceKind {
|
impl SourceKind {
|
||||||
/// Return the source content.
|
|
||||||
pub fn content(&self) -> &str {
|
|
||||||
match self {
|
|
||||||
SourceKind::Python(content) => content,
|
|
||||||
SourceKind::Jupyter(notebook) => notebook.content(),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Return the [`Notebook`] if the source kind is [`SourceKind::Jupyter`].
|
/// Return the [`Notebook`] if the source kind is [`SourceKind::Jupyter`].
|
||||||
pub fn notebook(&self) -> Option<&Notebook> {
|
pub fn notebook(&self) -> Option<&Notebook> {
|
||||||
if let Self::Jupyter(notebook) = self {
|
if let Self::Jupyter(notebook) = self {
|
||||||
|
|
|
@ -6,14 +6,13 @@ use std::path::Path;
|
||||||
#[cfg(not(fuzzing))]
|
#[cfg(not(fuzzing))]
|
||||||
use anyhow::Result;
|
use anyhow::Result;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use ruff_python_parser::lexer::LexResult;
|
|
||||||
|
|
||||||
use rustc_hash::FxHashMap;
|
use rustc_hash::FxHashMap;
|
||||||
|
|
||||||
use ruff_diagnostics::{AutofixKind, Diagnostic};
|
use ruff_diagnostics::{AutofixKind, Diagnostic};
|
||||||
use ruff_python_ast::PySourceType;
|
use ruff_python_ast::PySourceType;
|
||||||
use ruff_python_codegen::Stylist;
|
use ruff_python_codegen::Stylist;
|
||||||
use ruff_python_index::Indexer;
|
use ruff_python_index::Indexer;
|
||||||
|
use ruff_python_parser::lexer::LexResult;
|
||||||
use ruff_python_parser::AsMode;
|
use ruff_python_parser::AsMode;
|
||||||
use ruff_python_trivia::textwrap::dedent;
|
use ruff_python_trivia::textwrap::dedent;
|
||||||
use ruff_source_file::{Locator, SourceFileBuilder};
|
use ruff_source_file::{Locator, SourceFileBuilder};
|
||||||
|
@ -53,7 +52,8 @@ pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<V
|
||||||
let path = test_resource_path("fixtures").join(path);
|
let path = test_resource_path("fixtures").join(path);
|
||||||
let contents = std::fs::read_to_string(&path)?;
|
let contents = std::fs::read_to_string(&path)?;
|
||||||
Ok(test_contents(
|
Ok(test_contents(
|
||||||
&mut SourceKind::Python(contents),
|
&contents,
|
||||||
|
&mut SourceKind::Python,
|
||||||
&path,
|
&path,
|
||||||
settings,
|
settings,
|
||||||
))
|
))
|
||||||
|
@ -65,14 +65,16 @@ pub(crate) fn test_notebook_path(
|
||||||
expected: impl AsRef<Path>,
|
expected: impl AsRef<Path>,
|
||||||
settings: &Settings,
|
settings: &Settings,
|
||||||
) -> Result<(Vec<Message>, SourceKind, SourceKind)> {
|
) -> Result<(Vec<Message>, SourceKind, SourceKind)> {
|
||||||
let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(path.as_ref())?);
|
let notebook = read_jupyter_notebook(path.as_ref())?;
|
||||||
|
let contents = notebook.source_code().to_string();
|
||||||
|
let mut source_kind = SourceKind::Jupyter(notebook);
|
||||||
let original_source_kind = source_kind.clone();
|
let original_source_kind = source_kind.clone();
|
||||||
let messages = test_contents(&mut source_kind, path.as_ref(), settings);
|
let messages = test_contents(&contents, &mut source_kind, path.as_ref(), settings);
|
||||||
let expected_notebook = read_jupyter_notebook(expected.as_ref())?;
|
let expected_notebook = read_jupyter_notebook(expected.as_ref())?;
|
||||||
if let SourceKind::Jupyter(notebook) = &source_kind {
|
if let SourceKind::Jupyter(notebook) = &source_kind {
|
||||||
assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
|
assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
|
||||||
assert_eq!(notebook.index(), expected_notebook.index());
|
assert_eq!(notebook.index(), expected_notebook.index());
|
||||||
assert_eq!(notebook.content(), expected_notebook.content());
|
assert_eq!(notebook.source_code(), expected_notebook.source_code());
|
||||||
};
|
};
|
||||||
Ok((messages, original_source_kind, source_kind))
|
Ok((messages, original_source_kind, source_kind))
|
||||||
}
|
}
|
||||||
|
@ -81,11 +83,7 @@ pub(crate) fn test_notebook_path(
|
||||||
pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
|
pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
|
||||||
let path = Path::new("<filename>");
|
let path = Path::new("<filename>");
|
||||||
let contents = dedent(contents);
|
let contents = dedent(contents);
|
||||||
test_contents(
|
test_contents(&contents, &mut SourceKind::Python, path, settings)
|
||||||
&mut SourceKind::Python(contents.to_string()),
|
|
||||||
path,
|
|
||||||
settings,
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
thread_local! {
|
thread_local! {
|
||||||
|
@ -102,11 +100,15 @@ pub(crate) fn max_iterations() -> usize {
|
||||||
|
|
||||||
/// A convenient wrapper around [`check_path`], that additionally
|
/// A convenient wrapper around [`check_path`], that additionally
|
||||||
/// asserts that autofixes converge after a fixed number of iterations.
|
/// asserts that autofixes converge after a fixed number of iterations.
|
||||||
fn test_contents(source_kind: &mut SourceKind, path: &Path, settings: &Settings) -> Vec<Message> {
|
fn test_contents(
|
||||||
let contents = source_kind.content().to_string();
|
contents: &str,
|
||||||
|
source_kind: &mut SourceKind,
|
||||||
|
path: &Path,
|
||||||
|
settings: &Settings,
|
||||||
|
) -> Vec<Message> {
|
||||||
let source_type = PySourceType::from(path);
|
let source_type = PySourceType::from(path);
|
||||||
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(&contents, source_type.as_mode());
|
let tokens: Vec<LexResult> = ruff_python_parser::tokenize(contents, source_type.as_mode());
|
||||||
let locator = Locator::new(&contents);
|
let locator = Locator::new(contents);
|
||||||
let stylist = Stylist::from_tokens(&tokens, &locator);
|
let stylist = Stylist::from_tokens(&tokens, &locator);
|
||||||
let indexer = Indexer::from_tokens(&tokens, &locator);
|
let indexer = Indexer::from_tokens(&tokens, &locator);
|
||||||
let directives = directives::extract_directives(
|
let directives = directives::extract_directives(
|
||||||
|
@ -225,7 +227,7 @@ Source with applied fixes:
|
||||||
|
|
||||||
let source_code = SourceFileBuilder::new(
|
let source_code = SourceFileBuilder::new(
|
||||||
path.file_name().unwrap().to_string_lossy().as_ref(),
|
path.file_name().unwrap().to_string_lossy().as_ref(),
|
||||||
contents.as_str(),
|
contents,
|
||||||
)
|
)
|
||||||
.finish();
|
.finish();
|
||||||
|
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
#![cfg_attr(target_family = "wasm", allow(dead_code))]
|
#![cfg_attr(target_family = "wasm", allow(dead_code))]
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
use std::fs::write;
|
use std::fs::write;
|
||||||
use std::io;
|
use std::io;
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
@ -75,6 +76,31 @@ impl Diagnostics {
|
||||||
source_kind: FxHashMap::default(),
|
source_kind: FxHashMap::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Generate [`Diagnostics`] based on an [`io::Error`].
|
||||||
|
pub(crate) fn from_io_error(err: &io::Error, path: &Path, settings: &Settings) -> Self {
|
||||||
|
if settings.rules.enabled(Rule::IOError) {
|
||||||
|
let io_err = Diagnostic::new(
|
||||||
|
IOError {
|
||||||
|
message: err.to_string(),
|
||||||
|
},
|
||||||
|
TextRange::default(),
|
||||||
|
);
|
||||||
|
let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
|
||||||
|
Self::new(
|
||||||
|
vec![Message::from_diagnostic(io_err, dummy, TextSize::default())],
|
||||||
|
ImportMap::default(),
|
||||||
|
)
|
||||||
|
} else {
|
||||||
|
warn!(
|
||||||
|
"{}{}{} {err}",
|
||||||
|
"Failed to lint ".bold(),
|
||||||
|
fs::relativize_path(path).bold(),
|
||||||
|
":".bold()
|
||||||
|
);
|
||||||
|
Self::default()
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl AddAssign for Diagnostics {
|
impl AddAssign for Diagnostics {
|
||||||
|
@ -131,11 +157,11 @@ fn notebook_from_path(path: &Path) -> Result<Notebook, Box<Diagnostics>> {
|
||||||
/// Parse a Jupyter Notebook from a JSON string.
|
/// Parse a Jupyter Notebook from a JSON string.
|
||||||
///
|
///
|
||||||
/// Returns either an indexed Python Jupyter Notebook or a diagnostic (which is empty if we skip).
|
/// Returns either an indexed Python Jupyter Notebook or a diagnostic (which is empty if we skip).
|
||||||
fn notebook_from_contents(
|
fn notebook_from_source_code(
|
||||||
contents: &str,
|
source_code: &str,
|
||||||
path: Option<&Path>,
|
path: Option<&Path>,
|
||||||
) -> Result<Notebook, Box<Diagnostics>> {
|
) -> Result<Notebook, Box<Diagnostics>> {
|
||||||
let notebook = match Notebook::from_contents(contents) {
|
let notebook = match Notebook::from_source_code(source_code) {
|
||||||
Ok(notebook) => {
|
Ok(notebook) => {
|
||||||
if !notebook.is_python_notebook() {
|
if !notebook.is_python_notebook() {
|
||||||
// Not a python notebook, this could e.g. be an R notebook which we want to just skip.
|
// Not a python notebook, this could e.g. be an R notebook which we want to just skip.
|
||||||
|
@ -202,31 +228,6 @@ pub(crate) fn lint_path(
|
||||||
|
|
||||||
debug!("Checking: {}", path.display());
|
debug!("Checking: {}", path.display());
|
||||||
|
|
||||||
// In case of an io error we want to exit early
|
|
||||||
let io_error_diagnostics = |err: io::Error, path: &Path| -> Diagnostics {
|
|
||||||
if settings.lib.rules.enabled(Rule::IOError) {
|
|
||||||
let io_err = Diagnostic::new(
|
|
||||||
IOError {
|
|
||||||
message: err.to_string(),
|
|
||||||
},
|
|
||||||
TextRange::default(),
|
|
||||||
);
|
|
||||||
let dummy = SourceFileBuilder::new(path.to_string_lossy().as_ref(), "").finish();
|
|
||||||
Diagnostics::new(
|
|
||||||
vec![Message::from_diagnostic(io_err, dummy, TextSize::default())],
|
|
||||||
ImportMap::default(),
|
|
||||||
)
|
|
||||||
} else {
|
|
||||||
warn!(
|
|
||||||
"{}{}{} {err}",
|
|
||||||
"Failed to lint ".bold(),
|
|
||||||
fs::relativize_path(path).bold(),
|
|
||||||
":".bold()
|
|
||||||
);
|
|
||||||
Diagnostics::default()
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
// We have to special case this here since the Python tokenizer doesn't work with TOML.
|
// We have to special case this here since the Python tokenizer doesn't work with TOML.
|
||||||
if is_project_toml(path) {
|
if is_project_toml(path) {
|
||||||
let messages = if settings
|
let messages = if settings
|
||||||
|
@ -238,7 +239,7 @@ pub(crate) fn lint_path(
|
||||||
let contents = match std::fs::read_to_string(path) {
|
let contents = match std::fs::read_to_string(path) {
|
||||||
Ok(contents) => contents,
|
Ok(contents) => contents,
|
||||||
Err(err) => {
|
Err(err) => {
|
||||||
return Ok(io_error_diagnostics(err, path));
|
return Ok(Diagnostics::from_io_error(&err, path, &settings.lib));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
|
let source_file = SourceFileBuilder::new(path.to_string_lossy(), contents).finish();
|
||||||
|
@ -252,27 +253,21 @@ pub(crate) fn lint_path(
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
let source_type = PySourceType::from(path);
|
// Extract the sources from the file.
|
||||||
|
let LintSources {
|
||||||
// Read the file from disk
|
source_type,
|
||||||
let mut source_kind = if source_type.is_jupyter() {
|
mut source_kind,
|
||||||
match notebook_from_path(path) {
|
contents,
|
||||||
Ok(notebook) => SourceKind::Jupyter(notebook),
|
} = match LintSources::try_from_path(path) {
|
||||||
Err(diagnostic) => return Ok(*diagnostic),
|
Ok(sources) => sources,
|
||||||
|
Err(SourceExtractionError::Io(err)) => {
|
||||||
|
return Ok(Diagnostics::from_io_error(&err, path, &settings.lib));
|
||||||
|
}
|
||||||
|
Err(SourceExtractionError::Diagnostics(diagnostics)) => {
|
||||||
|
return Ok(*diagnostics);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
// This is tested by ruff_cli integration test `unreadable_file`
|
|
||||||
let contents = match std::fs::read_to_string(path) {
|
|
||||||
Ok(contents) => contents,
|
|
||||||
Err(err) => {
|
|
||||||
return Ok(io_error_diagnostics(err, path));
|
|
||||||
}
|
|
||||||
};
|
|
||||||
SourceKind::Python(contents)
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let contents = source_kind.content().to_string();
|
|
||||||
|
|
||||||
// Lint the file.
|
// Lint the file.
|
||||||
let (
|
let (
|
||||||
LinterResult {
|
LinterResult {
|
||||||
|
@ -297,7 +292,7 @@ pub(crate) fn lint_path(
|
||||||
if !fixed.is_empty() {
|
if !fixed.is_empty() {
|
||||||
match autofix {
|
match autofix {
|
||||||
flags::FixMode::Apply => match &source_kind {
|
flags::FixMode::Apply => match &source_kind {
|
||||||
SourceKind::Python(_) => {
|
SourceKind::Python => {
|
||||||
write(path, transformed.as_bytes())?;
|
write(path, transformed.as_bytes())?;
|
||||||
}
|
}
|
||||||
SourceKind::Jupyter(notebook) => {
|
SourceKind::Jupyter(notebook) => {
|
||||||
|
@ -306,9 +301,9 @@ pub(crate) fn lint_path(
|
||||||
},
|
},
|
||||||
flags::FixMode::Diff => {
|
flags::FixMode::Diff => {
|
||||||
match &source_kind {
|
match &source_kind {
|
||||||
SourceKind::Python(_) => {
|
SourceKind::Python => {
|
||||||
let mut stdout = io::stdout().lock();
|
let mut stdout = io::stdout().lock();
|
||||||
TextDiff::from_lines(contents.as_str(), &transformed)
|
TextDiff::from_lines(&contents, &transformed)
|
||||||
.unified_diff()
|
.unified_diff()
|
||||||
.header(&fs::relativize_path(path), &fs::relativize_path(path))
|
.header(&fs::relativize_path(path), &fs::relativize_path(path))
|
||||||
.to_writer(&mut stdout)?;
|
.to_writer(&mut stdout)?;
|
||||||
|
@ -441,20 +436,22 @@ pub(crate) fn lint_stdin(
|
||||||
noqa: flags::Noqa,
|
noqa: flags::Noqa,
|
||||||
autofix: flags::FixMode,
|
autofix: flags::FixMode,
|
||||||
) -> Result<Diagnostics> {
|
) -> Result<Diagnostics> {
|
||||||
let source_type = path.map(PySourceType::from).unwrap_or_default();
|
// Extract the sources from the file.
|
||||||
|
let LintSources {
|
||||||
let mut source_kind = if source_type.is_jupyter() {
|
source_type,
|
||||||
// SAFETY: Jupyter isn't the default type, so we must have a path.
|
mut source_kind,
|
||||||
match notebook_from_contents(contents, path) {
|
contents,
|
||||||
Ok(notebook) => SourceKind::Jupyter(notebook),
|
} = match LintSources::try_from_source_code(contents, path) {
|
||||||
Err(diagnostic) => return Ok(*diagnostic),
|
Ok(sources) => sources,
|
||||||
|
Err(SourceExtractionError::Io(err)) => {
|
||||||
|
// SAFETY: An `io::Error` can only occur if we're reading from a path.
|
||||||
|
return Ok(Diagnostics::from_io_error(&err, path.unwrap(), settings));
|
||||||
|
}
|
||||||
|
Err(SourceExtractionError::Diagnostics(diagnostics)) => {
|
||||||
|
return Ok(*diagnostics);
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
SourceKind::Python(contents.to_string())
|
|
||||||
};
|
};
|
||||||
|
|
||||||
let contents = source_kind.content().to_string();
|
|
||||||
|
|
||||||
// Lint the inputs.
|
// Lint the inputs.
|
||||||
let (
|
let (
|
||||||
LinterResult {
|
LinterResult {
|
||||||
|
@ -484,7 +481,7 @@ pub(crate) fn lint_stdin(
|
||||||
flags::FixMode::Diff => {
|
flags::FixMode::Diff => {
|
||||||
// But only write a diff if it's non-empty.
|
// But only write a diff if it's non-empty.
|
||||||
if !fixed.is_empty() {
|
if !fixed.is_empty() {
|
||||||
let text_diff = TextDiff::from_lines(contents.as_str(), &transformed);
|
let text_diff = TextDiff::from_lines(&contents, &transformed);
|
||||||
let mut unified_diff = text_diff.unified_diff();
|
let mut unified_diff = text_diff.unified_diff();
|
||||||
if let Some(path) = path {
|
if let Some(path) = path {
|
||||||
unified_diff
|
unified_diff
|
||||||
|
@ -555,11 +552,84 @@ pub(crate) fn lint_stdin(
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct LintSources<'a> {
|
||||||
|
/// The "type" of source code, e.g. `.py`, `.pyi`, `.ipynb`, etc.
|
||||||
|
source_type: PySourceType,
|
||||||
|
/// The "kind" of source, e.g. Python file, Jupyter Notebook, etc.
|
||||||
|
source_kind: SourceKind,
|
||||||
|
/// The contents of the source code.
|
||||||
|
contents: Cow<'a, str>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> LintSources<'a> {
|
||||||
|
/// Extract the lint [`LintSources`] from the given file path.
|
||||||
|
fn try_from_path(path: &Path) -> Result<LintSources, SourceExtractionError> {
|
||||||
|
let source_type = PySourceType::from(path);
|
||||||
|
|
||||||
|
// Read the file from disk.
|
||||||
|
if source_type.is_jupyter() {
|
||||||
|
let notebook = notebook_from_path(path).map_err(SourceExtractionError::Diagnostics)?;
|
||||||
|
let contents = notebook.source_code().to_string();
|
||||||
|
let source_kind = SourceKind::Jupyter(notebook);
|
||||||
|
Ok(LintSources {
|
||||||
|
source_type,
|
||||||
|
source_kind,
|
||||||
|
contents: Cow::Owned(contents),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
// This is tested by ruff_cli integration test `unreadable_file`
|
||||||
|
let contents = std::fs::read_to_string(path).map_err(SourceExtractionError::Io)?;
|
||||||
|
Ok(LintSources {
|
||||||
|
source_type,
|
||||||
|
source_kind: SourceKind::Python,
|
||||||
|
contents: Cow::Owned(contents),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Extract the lint [`LintSources`] from the raw string contents, optionally accompanied by a
|
||||||
|
/// file path indicating the path to the file from which the contents were read. If provided,
|
||||||
|
/// the file path should be used for diagnostics, but not for reading the file from disk.
|
||||||
|
fn try_from_source_code(
|
||||||
|
source_code: &'a str,
|
||||||
|
path: Option<&Path>,
|
||||||
|
) -> Result<LintSources<'a>, SourceExtractionError> {
|
||||||
|
let source_type = path.map(PySourceType::from).unwrap_or_default();
|
||||||
|
|
||||||
|
if source_type.is_jupyter() {
|
||||||
|
let notebook = notebook_from_source_code(source_code, path)
|
||||||
|
.map_err(SourceExtractionError::Diagnostics)?;
|
||||||
|
let contents = notebook.source_code().to_string();
|
||||||
|
let source_kind = SourceKind::Jupyter(notebook);
|
||||||
|
Ok(LintSources {
|
||||||
|
source_type,
|
||||||
|
source_kind,
|
||||||
|
contents: Cow::Owned(contents),
|
||||||
|
})
|
||||||
|
} else {
|
||||||
|
Ok(LintSources {
|
||||||
|
source_type,
|
||||||
|
source_kind: SourceKind::Python,
|
||||||
|
contents: Cow::Borrowed(source_code),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum SourceExtractionError {
|
||||||
|
/// The extraction failed due to an [`io::Error`].
|
||||||
|
Io(io::Error),
|
||||||
|
/// The extraction failed, and generated [`Diagnostics`] to report.
|
||||||
|
Diagnostics(Box<Diagnostics>),
|
||||||
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
|
||||||
use crate::diagnostics::{notebook_from_contents, notebook_from_path, Diagnostics};
|
use crate::diagnostics::{notebook_from_path, notebook_from_source_code, Diagnostics};
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_r() {
|
fn test_r() {
|
||||||
|
@ -573,7 +643,7 @@ mod tests {
|
||||||
let contents = std::fs::read_to_string(path).unwrap();
|
let contents = std::fs::read_to_string(path).unwrap();
|
||||||
// No diagnostics is used as skip signal.
|
// No diagnostics is used as skip signal.
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
notebook_from_contents(&contents, Some(path)).unwrap_err(),
|
notebook_from_source_code(&contents, Some(path)).unwrap_err(),
|
||||||
Box::<Diagnostics>::default()
|
Box::<Diagnostics>::default()
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue