mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 18:58:04 +00:00
Detect and ignore Jupyter automagics (#8398)
## Summary LangChain is attempting to use Ruff over their Jupyter notebooks (https://github.com/langchain-ai/langchain/pull/12677/files), but running into a bunch of syntax errors, the majority of which come from our inability to recognize automagic. If you run this in a cell: ```jupyter pip install requests ``` Jupyter will automatically treat that as: ```jupyter %pip install requests ``` We need to ignore cells that use these automagics, since the parser doesn't understand them. (I guess we could support it in the parser, but that seems much harder?). The good news is that AFAICT Jupyter doesn't let you mix automagics with code, so by skipping these cells, we don't miss out on analyzing any Python code. ## Test Plan 1. `cargo test` 2. Ran over LangChain and verified that there are no more errors relating to `pip install` automagics.
This commit is contained in:
parent
2ff1afb15c
commit
f64c389654
5 changed files with 154 additions and 6 deletions
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic.json
vendored
Normal file
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic.json
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["pip install requests"]
|
||||
}
|
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic_after_code.json
vendored
Normal file
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic_after_code.json
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["x = 1\n", "pip install requests"]
|
||||
}
|
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic_before_code.json
vendored
Normal file
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagic_before_code.json
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["pip install requests\n", "x = 1"]
|
||||
}
|
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagics.json
vendored
Normal file
8
crates/ruff_notebook/resources/test/fixtures/jupyter/cell/automagics.json
vendored
Normal file
|
@ -0,0 +1,8 @@
|
|||
{
|
||||
"execution_count": null,
|
||||
"cell_type": "code",
|
||||
"id": "1",
|
||||
"metadata": {},
|
||||
"outputs": [],
|
||||
"source": ["pip install requests\n", "pip install requests"]
|
||||
}
|
|
@ -80,14 +80,126 @@ impl Cell {
|
|||
// Ignore cells containing cell magic as they act on the entire cell
|
||||
// as compared to line magic which acts on a single line.
|
||||
!match source {
|
||||
SourceValue::String(string) => string
|
||||
.lines()
|
||||
.any(|line| line.trim_start().starts_with("%%")),
|
||||
SourceValue::StringArray(string_array) => string_array
|
||||
.iter()
|
||||
.any(|line| line.trim_start().starts_with("%%")),
|
||||
SourceValue::String(string) => Self::is_magic_cell(string.lines()),
|
||||
SourceValue::StringArray(string_array) => {
|
||||
Self::is_magic_cell(string_array.iter().map(String::as_str))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if a cell should be ignored due to the use of cell magics.
|
||||
fn is_magic_cell<'a>(lines: impl Iterator<Item = &'a str>) -> bool {
|
||||
let mut lines = lines.peekable();
|
||||
|
||||
// Detect automatic line magics (automagic), which aren't supported by the parser. If a line
|
||||
// magic uses automagic, Jupyter doesn't allow following it with non-magic lines anyway, so
|
||||
// we aren't missing out on any valid Python code.
|
||||
//
|
||||
// For example, this is valid:
|
||||
// ```jupyter
|
||||
// cat /path/to/file
|
||||
// cat /path/to/file
|
||||
// ```
|
||||
//
|
||||
// But this is invalid:
|
||||
// ```jupyter
|
||||
// cat /path/to/file
|
||||
// x = 1
|
||||
// ```
|
||||
//
|
||||
// See: https://ipython.readthedocs.io/en/stable/interactive/magics.html
|
||||
if lines
|
||||
.peek()
|
||||
.and_then(|line| line.split_whitespace().next())
|
||||
.is_some_and(|token| {
|
||||
matches!(
|
||||
token,
|
||||
"alias"
|
||||
| "alias_magic"
|
||||
| "autoawait"
|
||||
| "autocall"
|
||||
| "automagic"
|
||||
| "bookmark"
|
||||
| "cd"
|
||||
| "code_wrap"
|
||||
| "colors"
|
||||
| "conda"
|
||||
| "config"
|
||||
| "debug"
|
||||
| "dhist"
|
||||
| "dirs"
|
||||
| "doctest_mode"
|
||||
| "edit"
|
||||
| "env"
|
||||
| "gui"
|
||||
| "history"
|
||||
| "killbgscripts"
|
||||
| "load"
|
||||
| "load_ext"
|
||||
| "loadpy"
|
||||
| "logoff"
|
||||
| "logon"
|
||||
| "logstart"
|
||||
| "logstate"
|
||||
| "logstop"
|
||||
| "lsmagic"
|
||||
| "macro"
|
||||
| "magic"
|
||||
| "mamba"
|
||||
| "matplotlib"
|
||||
| "micromamba"
|
||||
| "notebook"
|
||||
| "page"
|
||||
| "pastebin"
|
||||
| "pdb"
|
||||
| "pdef"
|
||||
| "pdoc"
|
||||
| "pfile"
|
||||
| "pinfo"
|
||||
| "pinfo2"
|
||||
| "pip"
|
||||
| "popd"
|
||||
| "pprint"
|
||||
| "precision"
|
||||
| "prun"
|
||||
| "psearch"
|
||||
| "psource"
|
||||
| "pushd"
|
||||
| "pwd"
|
||||
| "pycat"
|
||||
| "pylab"
|
||||
| "quickref"
|
||||
| "recall"
|
||||
| "rehashx"
|
||||
| "reload_ext"
|
||||
| "rerun"
|
||||
| "reset"
|
||||
| "reset_selective"
|
||||
| "run"
|
||||
| "save"
|
||||
| "sc"
|
||||
| "set_env"
|
||||
| "sx"
|
||||
| "system"
|
||||
| "tb"
|
||||
| "time"
|
||||
| "timeit"
|
||||
| "unalias"
|
||||
| "unload_ext"
|
||||
| "who"
|
||||
| "who_ls"
|
||||
| "whos"
|
||||
| "xdel"
|
||||
| "xmode"
|
||||
)
|
||||
})
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
// Detect cell magics (which operate on multiple lines).
|
||||
lines.any(|line| line.trim_start().starts_with("%%"))
|
||||
}
|
||||
}
|
||||
|
||||
/// An error that can occur while deserializing a Jupyter Notebook.
|
||||
|
@ -481,6 +593,10 @@ mod tests {
|
|||
#[test_case(Path::new("code_and_magic.json"), true; "code_and_magic")]
|
||||
#[test_case(Path::new("only_code.json"), true; "only_code")]
|
||||
#[test_case(Path::new("cell_magic.json"), false; "cell_magic")]
|
||||
#[test_case(Path::new("automagic.json"), false; "automagic")]
|
||||
#[test_case(Path::new("automagics.json"), false; "automagics")]
|
||||
#[test_case(Path::new("automagic_before_code.json"), false; "automagic_before_code")]
|
||||
#[test_case(Path::new("automagic_after_code.json"), true; "automagic_after_code")]
|
||||
fn test_is_valid_code_cell(path: &Path, expected: bool) -> Result<()> {
|
||||
/// Read a Jupyter cell from the `resources/test/fixtures/jupyter/cell` directory.
|
||||
fn read_jupyter_cell(path: impl AsRef<Path>) -> Result<Cell> {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue