ruff/crates/ruff_notebook/src/cell.rs
Dhruv Manilawala 5caabe54b6
Allow ipytest cell magic (#13745)
## Summary

fixes: #13718 

## Test Plan

Using the notebook as mentioned in
https://github.com/astral-sh/ruff/issues/13718#issuecomment-2410631674,
this PR does not give the "F821 Undefined name `test_sorted`"
diagnostic.
2024-10-14 15:48:33 +05:30

325 lines
10 KiB
Rust

use std::fmt;
use std::ops::{Deref, DerefMut};
use itertools::Itertools;
use ruff_text_size::{TextRange, TextSize};
use crate::schema::{Cell, SourceValue};
use crate::CellMetadata;
impl fmt::Display for SourceValue {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
SourceValue::String(string) => f.write_str(string),
SourceValue::StringArray(string_array) => {
for string in string_array {
f.write_str(string)?;
}
Ok(())
}
}
}
}
impl Cell {
/// Return the [`SourceValue`] of the cell.
pub fn source(&self) -> &SourceValue {
match self {
Cell::Code(cell) => &cell.source,
Cell::Markdown(cell) => &cell.source,
Cell::Raw(cell) => &cell.source,
}
}
pub fn is_code_cell(&self) -> bool {
matches!(self, Cell::Code(_))
}
pub fn metadata(&self) -> &CellMetadata {
match self {
Cell::Code(cell) => &cell.metadata,
Cell::Markdown(cell) => &cell.metadata,
Cell::Raw(cell) => &cell.metadata,
}
}
/// Update the [`SourceValue`] of the cell.
pub(crate) fn set_source(&mut self, source: SourceValue) {
match self {
Cell::Code(cell) => cell.source = source,
Cell::Markdown(cell) => cell.source = source,
Cell::Raw(cell) => cell.source = source,
}
}
/// Return `true` if it's a valid code cell.
///
/// A valid code cell is a cell where:
/// 1. The cell type is [`Cell::Code`]
/// 2. The source doesn't contain a cell magic
/// 3. If the language id is set, it should be `python`
pub(crate) fn is_valid_python_code_cell(&self) -> bool {
let source = match self {
Cell::Code(cell)
if cell
.metadata
.vscode
.as_ref()
.map_or(true, |vscode| vscode.language_id == "python") =>
{
&cell.source
}
_ => return false,
};
// Ignore cells containing cell magic as they act on the entire cell
// as compared to line magic which acts on a single line.
!match source {
SourceValue::String(string) => Self::is_magic_cell(string.lines()),
SourceValue::StringArray(string_array) => {
Self::is_magic_cell(string_array.iter().map(String::as_str))
}
}
}
/// Returns `true` if a cell should be ignored due to the use of cell magics.
fn is_magic_cell<'a>(lines: impl Iterator<Item = &'a str>) -> bool {
let mut lines = lines.peekable();
// Detect automatic line magics (automagic), which aren't supported by the parser. If a line
// magic uses automagic, Jupyter doesn't allow following it with non-magic lines anyway, so
// we aren't missing out on any valid Python code.
//
// For example, this is valid:
// ```jupyter
// cat /path/to/file
// cat /path/to/file
// ```
//
// But this is invalid:
// ```jupyter
// cat /path/to/file
// x = 1
// ```
//
// See: https://ipython.readthedocs.io/en/stable/interactive/magics.html
if let Some(line) = lines.peek() {
let mut tokens = line.split_whitespace();
// The first token must be an automagic, like `load_exit`.
if tokens.next().is_some_and(|token| {
matches!(
token,
"alias"
| "alias_magic"
| "autoawait"
| "autocall"
| "automagic"
| "bookmark"
| "cd"
| "code_wrap"
| "colors"
| "conda"
| "config"
| "debug"
| "dhist"
| "dirs"
| "doctest_mode"
| "edit"
| "env"
| "gui"
| "history"
| "killbgscripts"
| "load"
| "load_ext"
| "loadpy"
| "logoff"
| "logon"
| "logstart"
| "logstate"
| "logstop"
| "lsmagic"
| "macro"
| "magic"
| "mamba"
| "matplotlib"
| "micromamba"
| "notebook"
| "page"
| "pastebin"
| "pdb"
| "pdef"
| "pdoc"
| "pfile"
| "pinfo"
| "pinfo2"
| "pip"
| "popd"
| "pprint"
| "precision"
| "prun"
| "psearch"
| "psource"
| "pushd"
| "pwd"
| "pycat"
| "pylab"
| "quickref"
| "recall"
| "rehashx"
| "reload_ext"
| "rerun"
| "reset"
| "reset_selective"
| "run"
| "save"
| "sc"
| "set_env"
| "sx"
| "system"
| "tb"
| "time"
| "timeit"
| "unalias"
| "unload_ext"
| "who"
| "who_ls"
| "whos"
| "xdel"
| "xmode"
)
}) {
// The second token must _not_ be an operator, like `=` (to avoid false positives).
// The assignment operators can never follow an automagic. Some binary operators
// _can_, though (e.g., `cd -` is valid), so we omit them.
if !tokens.next().is_some_and(|token| {
matches!(
token,
"=" | "+=" | "-=" | "*=" | "/=" | "//=" | "%=" | "**=" | "&=" | "|=" | "^="
)
}) {
return true;
}
}
}
// Detect cell magics (which operate on multiple lines).
lines.any(|line| {
let Some(first) = line.split_whitespace().next() else {
return false;
};
if first.len() < 2 {
return false;
}
let Some(command) = first.strip_prefix("%%") else {
return false;
};
// These cell magics are special in that the lines following them are valid
// Python code and the variables defined in that scope are available to the
// rest of the notebook.
//
// For example:
//
// Cell 1:
// ```python
// x = 1
// ```
//
// Cell 2:
// ```python
// %%time
// y = x
// ```
//
// Cell 3:
// ```python
// print(y) # Here, `y` is available.
// ```
//
// This is to avoid false positives when these variables are referenced
// elsewhere in the notebook.
//
// Refer https://github.com/astral-sh/ruff/issues/13718 for `ipytest`.
!matches!(
command,
"capture"
| "debug"
| "ipytest"
| "prun"
| "pypy"
| "python"
| "python3"
| "time"
| "timeit"
)
})
}
}
/// Cell offsets are used to keep track of the start and end offsets of each
/// cell in the concatenated source code. These offsets are in sorted order.
#[derive(Clone, Debug, Default, PartialEq)]
pub struct CellOffsets(Vec<TextSize>);
impl CellOffsets {
/// Create a new [`CellOffsets`] with the given capacity.
pub(crate) fn with_capacity(capacity: usize) -> Self {
Self(Vec::with_capacity(capacity))
}
/// Push a new offset to the end of the [`CellOffsets`].
///
/// # Panics
///
/// Panics if the offset is less than the last offset pushed.
pub(crate) fn push(&mut self, offset: TextSize) {
if let Some(last_offset) = self.0.last() {
assert!(
*last_offset <= offset,
"Offsets must be pushed in sorted order"
);
}
self.0.push(offset);
}
/// Returns the range of the cell containing the given offset, if any.
pub fn containing_range(&self, offset: TextSize) -> Option<TextRange> {
self.iter().tuple_windows().find_map(|(start, end)| {
if *start <= offset && offset < *end {
Some(TextRange::new(*start, *end))
} else {
None
}
})
}
/// Returns `true` if the given range contains a cell boundary.
pub fn has_cell_boundary(&self, range: TextRange) -> bool {
self.binary_search_by(|offset| {
if range.start() <= *offset {
if range.end() < *offset {
std::cmp::Ordering::Greater
} else {
std::cmp::Ordering::Equal
}
} else {
std::cmp::Ordering::Less
}
})
.is_ok()
}
}
impl Deref for CellOffsets {
type Target = [TextSize];
fn deref(&self) -> &Self::Target {
&self.0
}
}
impl DerefMut for CellOffsets {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.0
}
}