Handle trailing newline in Jupyter notebook JSON string (#5202)

## Summary

Handle trailing newline in Jupyter Notebook JSON string similar to how
`black`
does it.

## Test Plan

Add test cases when the JSON string for notebook ends with and without a
newline.

resolves: #5190
This commit is contained in:
Dhruv Manilawala 2023-06-20 15:49:11 +05:30 committed by GitHub
parent 773e79b481
commit 062b6e5c2b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 94 additions and 26 deletions

View file

@ -34,4 +34,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View file

@ -0,0 +1,38 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "4cec6161-f594-446c-ab65-37395bbb3127",
"metadata": {},
"outputs": [],
"source": [
"import math\n",
"import os\n",
"\n",
"_ = math.pi"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python (ruff)",
"language": "python",
"name": "ruff"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.3"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View file

@ -1,6 +1,6 @@
use std::cmp::Ordering;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::io::{BufReader, BufWriter, Read, Seek, SeekFrom, Write};
use std::iter;
use std::path::Path;
@ -34,9 +34,9 @@ pub fn round_trip(path: &Path) -> anyhow::Result<String> {
})?;
let code = notebook.content().to_string();
notebook.update_cell_content(&code);
let mut buffer = BufWriter::new(Vec::new());
notebook.write_inner(&mut buffer)?;
Ok(String::from_utf8(buffer.into_inner()?)?)
let mut writer = Vec::new();
notebook.write_inner(&mut writer)?;
Ok(String::from_utf8(writer)?)
}
/// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`).
@ -113,13 +113,17 @@ pub struct Notebook {
cell_offsets: Vec<TextSize>,
/// The cell index of all valid code cells in the notebook.
valid_code_cells: Vec<u32>,
/// Flag to indicate if the JSON string of the notebook has a trailing newline.
trailing_newline: bool,
}
impl Notebook {
/// Read the Jupyter Notebook from the given [`Path`].
///
/// See also the black implementation
/// <https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#L1017-L1046>
pub fn read(path: &Path) -> Result<Self, Box<Diagnostic>> {
let reader = BufReader::new(File::open(path).map_err(|err| {
let mut reader = BufReader::new(File::open(path).map_err(|err| {
Diagnostic::new(
IOError {
message: format!("{err}"),
@ -127,6 +131,18 @@ impl Notebook {
TextRange::default(),
)
})?);
let trailing_newline = reader.seek(SeekFrom::End(-1)).is_ok_and(|_| {
let mut buf = [0; 1];
reader.read_exact(&mut buf).is_ok_and(|_| buf[0] == b'\n')
});
reader.rewind().map_err(|err| {
Diagnostic::new(
IOError {
message: format!("{err}"),
},
TextRange::default(),
)
})?;
let raw_notebook: RawNotebook = match serde_json::from_reader(reader) {
Ok(notebook) => notebook,
Err(err) => {
@ -240,6 +256,7 @@ impl Notebook {
content: contents.join("\n") + "\n",
cell_offsets,
valid_code_cells,
trailing_newline,
})
}
@ -411,8 +428,11 @@ impl Notebook {
fn write_inner(&self, writer: &mut impl Write) -> anyhow::Result<()> {
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut ser = serde_json::Serializer::with_formatter(writer, formatter);
SortAlphabetically(&self.raw).serialize(&mut ser)?;
let mut serializer = serde_json::Serializer::with_formatter(writer, formatter);
SortAlphabetically(&self.raw).serialize(&mut serializer)?;
if self.trailing_newline {
writeln!(serializer.into_inner())?;
}
Ok(())
}
@ -426,7 +446,6 @@ impl Notebook {
#[cfg(test)]
mod test {
use std::io::BufWriter;
use std::path::Path;
use anyhow::Result;
@ -438,7 +457,7 @@ mod test {
use crate::jupyter::schema::Cell;
use crate::jupyter::Notebook;
use crate::registry::Rule;
use crate::test::{test_notebook_path, test_resource_path};
use crate::test::{read_jupyter_notebook, test_notebook_path, test_resource_path};
use crate::{assert_messages, settings};
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
@ -450,15 +469,13 @@ mod test {
#[test]
fn test_valid() {
let path = Path::new("resources/test/fixtures/jupyter/valid.ipynb");
assert!(Notebook::read(path).is_ok());
assert!(read_jupyter_notebook(Path::new("valid.ipynb")).is_ok());
}
#[test]
fn test_r() {
// We can load this, it will be filtered out later
let path = Path::new("resources/test/fixtures/jupyter/R.ipynb");
assert!(Notebook::read(path).is_ok());
assert!(read_jupyter_notebook(Path::new("R.ipynb")).is_ok());
}
#[test]
@ -506,9 +523,8 @@ mod test {
}
#[test]
fn test_concat_notebook() {
let path = Path::new("resources/test/fixtures/jupyter/valid.ipynb");
let notebook = Notebook::read(path).unwrap();
fn test_concat_notebook() -> Result<()> {
let notebook = read_jupyter_notebook(Path::new("valid.ipynb"))?;
assert_eq!(
notebook.content,
r#"def unused_variable():
@ -546,6 +562,7 @@ print("after empty cells")
198.into()
]
);
Ok(())
}
#[test]
@ -568,12 +585,26 @@ print("after empty cells")
Path::new("after_fix.ipynb"),
&settings::Settings::for_rule(Rule::UnusedImport),
)?;
let mut writer = BufWriter::new(Vec::new());
let mut writer = Vec::new();
source_kind.expect_jupyter().write_inner(&mut writer)?;
let actual = String::from_utf8(writer.into_inner()?)?;
let actual = String::from_utf8(writer)?;
let expected =
std::fs::read_to_string(test_resource_path("fixtures/jupyter/after_fix.ipynb"))?;
assert_eq!(actual, expected);
Ok(())
}
#[test_case(Path::new("before_fix.ipynb"), true; "trailing_newline")]
#[test_case(Path::new("no_trailing_newline.ipynb"), false; "no_trailing_newline")]
fn test_trailing_newline(path: &Path, trailing_newline: bool) -> Result<()> {
let notebook = read_jupyter_notebook(path)?;
assert_eq!(notebook.trailing_newline, trailing_newline);
let mut writer = Vec::new();
notebook.write_inner(&mut writer)?;
let string = String::from_utf8(writer)?;
assert_eq!(string.ends_with('\n'), trailing_newline);
Ok(())
}
}

View file

@ -25,8 +25,9 @@ use crate::settings::{flags, Settings};
use crate::source_kind::SourceKind;
#[cfg(not(fuzzing))]
fn read_jupyter_notebook(path: &Path) -> Result<Notebook> {
Notebook::read(path).map_err(|err| {
pub(crate) fn read_jupyter_notebook(path: &Path) -> Result<Notebook> {
let path = test_resource_path("fixtures/jupyter").join(path);
Notebook::read(&path).map_err(|err| {
anyhow::anyhow!(
"Failed to read notebook file `{}`: {:?}",
path.display(),
@ -58,11 +59,9 @@ pub(crate) fn test_notebook_path(
expected: impl AsRef<Path>,
settings: &Settings,
) -> Result<(Vec<Message>, SourceKind)> {
let path = test_resource_path("fixtures/jupyter").join(path);
let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(&path)?);
let messages = test_contents(&mut source_kind, &path, settings);
let expected_notebook =
read_jupyter_notebook(&test_resource_path("fixtures/jupyter").join(expected))?;
let mut source_kind = SourceKind::Jupyter(read_jupyter_notebook(path.as_ref())?);
let messages = test_contents(&mut source_kind, path.as_ref(), settings);
let expected_notebook = read_jupyter_notebook(expected.as_ref())?;
if let SourceKind::Jupyter(notebook) = &source_kind {
assert_eq!(notebook.cell_offsets(), expected_notebook.cell_offsets());
assert_eq!(notebook.index(), expected_notebook.index());