Add roundtrip support for Jupyter notebook (#5028)

## Summary

Add roundtrip support for Jupyter notebook.

1. Read the notebook
2. Extract out the source code content
3. Use it to update the notebook itself (should be exactly the same [^1])
4. Serialize into JSON and print it to stdout

## Test Plan

`cargo run --all-features --bin ruff_dev --package ruff_dev --
round-trip <path/to/notebook.ipynb>`

<details><summary>Example output:</summary>
<p>

```
{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "f3c286e9-fa52-4440-816f-4449232f199a",
   "metadata": {},
   "source": [
    "# Ruff Test"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "a2b7bc6c-778a-4b07-86ae-dde5a2d9511e",
   "metadata": {},
   "source": [
    "Markdown block before the first import"
   ]
  },
  {
   "cell_type": "code",
   "id": "5e3ef98e-224c-450a-80e6-be442ad50907",
   "metadata": {
    "tags": []
   },
   "source": "",
   "execution_count": 1,
   "outputs": []
  },
  {
   "cell_type": "code",
   "id": "6bced3f8-e0a4-450c-ae7c-f60ad5671ee9",
   "metadata": {},
   "source": "import contextlib\n\nwith contextlib.suppress(ValueError):\n    print()\n",
   "outputs": []
  },
  {
   "cell_type": "code",
   "id": "d7102cfd-5bb5-4f5b-a3b8-07a7b8cca34c",
   "metadata": {},
   "source": "import random\n\nrandom.randint(10, 20)",
   "outputs": []
  },
  {
   "cell_type": "code",
   "id": "88471d1c-7429-4967-898f-b0088fcb4c53",
   "metadata": {},
   "source": "foo = 1\nif foo < 2:\n    msg = f\"Invalid foo: {foo}\"\n    raise ValueError(msg)",
   "outputs": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python (ruff-playground)",
   "name": "ruff-playground",
   "language": "python"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "pygments_lexer": "ipython3",
   "nbconvert_exporter": "python",
   "version": "3.11.3"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
```

</p>
</details> 

[^1]: The type in JSON might be different (https://github.com/astral-sh/ruff/pull/4665#discussion_r1212663495)

Part of #1218
This commit is contained in:
Dhruv Manilawala 2023-06-12 23:27:45 +05:30 committed by GitHub
parent a77d2df934
commit cb4f086cbf
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 9 deletions

View file

@ -1,6 +1,6 @@
use std::cmp::Ordering; use std::cmp::Ordering;
use std::fs::File; use std::fs::File;
use std::io::{BufReader, BufWriter}; use std::io::{BufReader, BufWriter, Cursor, Write};
use std::iter; use std::iter;
use std::path::Path; use std::path::Path;
@ -23,6 +23,22 @@ pub const JUPYTER_NOTEBOOK_EXT: &str = "ipynb";
const MAGIC_PREFIX: [&str; 3] = ["%", "!", "?"]; const MAGIC_PREFIX: [&str; 3] = ["%", "!", "?"];
/// Run round-trip source code generation on a given Jupyter notebook file path.
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
let mut notebook = Notebook::read(path).map_err(|err| {
anyhow::anyhow!(
"Failed to read notebook file `{}`: {:?}",
path.display(),
err
)
})?;
let code = notebook.content().to_string();
notebook.update_cell_content(&code);
let mut buffer = Cursor::new(Vec::new());
notebook.write_inner(&mut buffer)?;
Ok(String::from_utf8(buffer.into_inner())?)
}
/// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`). /// Return `true` if the [`Path`] appears to be that of a jupyter notebook file (`.ipynb`).
pub fn is_jupyter_notebook(path: &Path) -> bool { pub fn is_jupyter_notebook(path: &Path) -> bool {
path.extension() path.extension()
@ -370,13 +386,18 @@ impl Notebook {
.map_or(true, |language| language.name == "python") .map_or(true, |language| language.name == "python")
} }
fn write_inner(&self, writer: &mut impl Write) -> anyhow::Result<()> {
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut ser = serde_json::Serializer::with_formatter(writer, formatter);
self.raw.serialize(&mut ser)?;
Ok(())
}
/// Write back with an indent of 1, just like black /// Write back with an indent of 1, just like black
pub fn write(&self, path: &Path) -> anyhow::Result<()> { pub fn write(&self, path: &Path) -> anyhow::Result<()> {
let mut writer = BufWriter::new(File::create(path)?); let mut writer = BufWriter::new(File::create(path)?);
// https://github.com/psf/black/blob/69ca0a4c7a365c5f5eea519a90980bab72cab764/src/black/__init__.py#LL1041 self.write_inner(&mut writer)?;
let formatter = serde_json::ser::PrettyFormatter::with_indent(b" ");
let mut ser = serde_json::Serializer::with_formatter(&mut writer, formatter);
self.raw.serialize(&mut ser)?;
Ok(()) Ok(())
} }
} }

View file

@ -1,4 +1,4 @@
//! Run round-trip source code generation on a given Python file. //! Run round-trip source code generation on a given Python or Jupyter notebook file.
#![allow(clippy::print_stdout, clippy::print_stderr)] #![allow(clippy::print_stdout, clippy::print_stderr)]
use std::fs; use std::fs;
@ -6,17 +6,23 @@ use std::path::PathBuf;
use anyhow::Result; use anyhow::Result;
use ruff::jupyter;
use ruff::round_trip; use ruff::round_trip;
#[derive(clap::Args)] #[derive(clap::Args)]
pub(crate) struct Args { pub(crate) struct Args {
/// Python file to round-trip. /// Python or Jupyter notebook file to round-trip.
#[arg(required = true)] #[arg(required = true)]
file: PathBuf, file: PathBuf,
} }
pub(crate) fn main(args: &Args) -> Result<()> { pub(crate) fn main(args: &Args) -> Result<()> {
let contents = fs::read_to_string(&args.file)?; let path = args.file.as_path();
println!("{}", round_trip(&contents, &args.file.to_string_lossy())?); if jupyter::is_jupyter_notebook(path) {
println!("{}", jupyter::round_trip(path)?);
} else {
let contents = fs::read_to_string(&args.file)?;
println!("{}", round_trip(&contents, &args.file.to_string_lossy())?);
}
Ok(()) Ok(())
} }