mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-20 04:29:47 +00:00
Reduce notebook memory footprint (#21319)
This commit is contained in:
parent
33b942c7ad
commit
36cce347fd
3 changed files with 84 additions and 71 deletions
|
|
@ -112,16 +112,16 @@ impl std::fmt::Display for Diff<'_> {
|
|||
// `None`, indicating a regular script file, all the lines will be in one "cell" under the
|
||||
// `None` key.
|
||||
let cells = if let Some(notebook_index) = &self.notebook_index {
|
||||
let mut last_cell = OneIndexed::MIN;
|
||||
let mut last_cell_index = OneIndexed::MIN;
|
||||
let mut cells: Vec<(Option<OneIndexed>, TextSize)> = Vec::new();
|
||||
for (row, cell) in notebook_index.iter() {
|
||||
if cell != last_cell {
|
||||
let offset = source_code.line_start(row);
|
||||
cells.push((Some(last_cell), offset));
|
||||
last_cell = cell;
|
||||
for cell in notebook_index.iter() {
|
||||
if cell.cell_index() != last_cell_index {
|
||||
let offset = source_code.line_start(cell.start_row());
|
||||
cells.push((Some(last_cell_index), offset));
|
||||
last_cell_index = cell.cell_index();
|
||||
}
|
||||
}
|
||||
cells.push((Some(last_cell), source_text.text_len()));
|
||||
cells.push((Some(last_cell_index), source_text.text_len()));
|
||||
cells
|
||||
} else {
|
||||
vec![(None, source_text.text_len())]
|
||||
|
|
|
|||
|
|
@ -8,37 +8,40 @@ use ruff_source_file::{LineColumn, OneIndexed, SourceLocation};
|
|||
/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
|
||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct NotebookIndex {
|
||||
/// Enter a row (1-based), get back the cell (1-based)
|
||||
pub(super) row_to_cell: Vec<OneIndexed>,
|
||||
/// Enter a row (1-based), get back the row in cell (1-based)
|
||||
pub(super) row_to_row_in_cell: Vec<OneIndexed>,
|
||||
/// Stores the starting row and the absolute cell index for every Python (valid) cell.
|
||||
///
|
||||
/// The index in this vector corresponds to the Python cell index (valid cell index).
|
||||
pub(super) cell_starts: Vec<CellStart>,
|
||||
}
|
||||
|
||||
impl NotebookIndex {
|
||||
pub fn new(row_to_cell: Vec<OneIndexed>, row_to_row_in_cell: Vec<OneIndexed>) -> Self {
|
||||
Self {
|
||||
row_to_cell,
|
||||
row_to_row_in_cell,
|
||||
fn find_cell(&self, row: OneIndexed) -> Option<CellStart> {
|
||||
match self
|
||||
.cell_starts
|
||||
.binary_search_by_key(&row, |start| start.start_row)
|
||||
{
|
||||
Ok(cell_index) => Some(self.cell_starts[cell_index]),
|
||||
Err(insertion_point) => Some(self.cell_starts[insertion_point.checked_sub(1)?]),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the cell number (1-based) for the given row (1-based).
|
||||
/// Returns the (raw) cell number (1-based) for the given row (1-based).
|
||||
pub fn cell(&self, row: OneIndexed) -> Option<OneIndexed> {
|
||||
self.row_to_cell.get(row.to_zero_indexed()).copied()
|
||||
self.find_cell(row).map(|start| start.raw_cell_index)
|
||||
}
|
||||
|
||||
/// Returns the row number (1-based) in the cell (1-based) for the
|
||||
/// given row (1-based).
|
||||
pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> {
|
||||
self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
|
||||
self.find_cell(row)
|
||||
.map(|start| OneIndexed::from_zero_indexed(row.get() - start.start_row.get()))
|
||||
}
|
||||
|
||||
/// Returns an iterator over the row:cell-number pairs (both 1-based).
|
||||
pub fn iter(&self) -> impl Iterator<Item = (OneIndexed, OneIndexed)> {
|
||||
self.row_to_cell
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(row, cell)| (OneIndexed::from_zero_indexed(row), *cell))
|
||||
/// Returns an iterator over the starting rows of each cell (1-based).
|
||||
///
|
||||
/// This yields one entry per Python cell (skipping over Makrdown cell).
|
||||
pub fn iter(&self) -> impl Iterator<Item = CellStart> + '_ {
|
||||
self.cell_starts.iter().copied()
|
||||
}
|
||||
|
||||
/// Translates the given [`LineColumn`] based on the indexing table.
|
||||
|
|
@ -67,3 +70,23 @@ impl NotebookIndex {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||
pub struct CellStart {
|
||||
/// The row in the concatenated notebook source code at which
|
||||
/// this cell starts.
|
||||
pub(super) start_row: OneIndexed,
|
||||
|
||||
/// The absolute index of this cell in the notebook.
|
||||
pub(super) raw_cell_index: OneIndexed,
|
||||
}
|
||||
|
||||
impl CellStart {
|
||||
pub fn start_row(&self) -> OneIndexed {
|
||||
self.start_row
|
||||
}
|
||||
|
||||
pub fn cell_index(&self) -> OneIndexed {
|
||||
self.raw_cell_index
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ use ruff_text_size::TextSize;
|
|||
use crate::cell::CellOffsets;
|
||||
use crate::index::NotebookIndex;
|
||||
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
|
||||
use crate::{CellMetadata, RawNotebookMetadata, schema};
|
||||
use crate::{CellMetadata, CellStart, RawNotebookMetadata, schema};
|
||||
|
||||
/// Run round-trip source code generation on a given Jupyter notebook file path.
|
||||
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
||||
|
|
@ -320,11 +320,19 @@ impl Notebook {
|
|||
/// The index building is expensive as it needs to go through the content of
|
||||
/// every valid code cell.
|
||||
fn build_index(&self) -> NotebookIndex {
|
||||
let mut row_to_cell = Vec::new();
|
||||
let mut row_to_row_in_cell = Vec::new();
|
||||
let mut cell_starts = Vec::with_capacity(self.valid_code_cells.len());
|
||||
|
||||
let mut current_row = OneIndexed::MIN;
|
||||
|
||||
for &cell_index in &self.valid_code_cells {
|
||||
let line_count = match &self.raw.cells[cell_index as usize].source() {
|
||||
let raw_cell_index = cell_index as usize;
|
||||
// Record the starting row of this cell
|
||||
cell_starts.push(CellStart {
|
||||
start_row: current_row,
|
||||
raw_cell_index: OneIndexed::from_zero_indexed(raw_cell_index),
|
||||
});
|
||||
|
||||
let line_count = match &self.raw.cells[raw_cell_index].source() {
|
||||
SourceValue::String(string) => {
|
||||
if string.is_empty() {
|
||||
1
|
||||
|
|
@ -342,17 +350,11 @@ impl Notebook {
|
|||
}
|
||||
}
|
||||
};
|
||||
row_to_cell.extend(std::iter::repeat_n(
|
||||
OneIndexed::from_zero_indexed(cell_index as usize),
|
||||
line_count,
|
||||
));
|
||||
row_to_row_in_cell.extend((0..line_count).map(OneIndexed::from_zero_indexed));
|
||||
|
||||
current_row = current_row.saturating_add(line_count);
|
||||
}
|
||||
|
||||
NotebookIndex {
|
||||
row_to_cell,
|
||||
row_to_row_in_cell,
|
||||
}
|
||||
NotebookIndex { cell_starts }
|
||||
}
|
||||
|
||||
/// Return the notebook content.
|
||||
|
|
@ -456,7 +458,7 @@ mod tests {
|
|||
|
||||
use ruff_source_file::OneIndexed;
|
||||
|
||||
use crate::{Cell, Notebook, NotebookError, NotebookIndex};
|
||||
use crate::{Cell, CellStart, Notebook, NotebookError, NotebookIndex};
|
||||
|
||||
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
|
||||
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
|
||||
|
|
@ -548,39 +550,27 @@ print("after empty cells")
|
|||
assert_eq!(
|
||||
notebook.index(),
|
||||
&NotebookIndex {
|
||||
row_to_cell: vec![
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(4),
|
||||
OneIndexed::from_zero_indexed(6),
|
||||
OneIndexed::from_zero_indexed(6),
|
||||
OneIndexed::from_zero_indexed(7)
|
||||
],
|
||||
row_to_row_in_cell: vec![
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(1),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(3),
|
||||
OneIndexed::from_zero_indexed(4),
|
||||
OneIndexed::from_zero_indexed(5),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(1),
|
||||
OneIndexed::from_zero_indexed(2),
|
||||
OneIndexed::from_zero_indexed(3),
|
||||
OneIndexed::from_zero_indexed(4),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(0),
|
||||
OneIndexed::from_zero_indexed(1),
|
||||
OneIndexed::from_zero_indexed(0)
|
||||
cell_starts: vec![
|
||||
CellStart {
|
||||
start_row: OneIndexed::MIN,
|
||||
raw_cell_index: OneIndexed::MIN
|
||||
},
|
||||
CellStart {
|
||||
start_row: OneIndexed::from_zero_indexed(6),
|
||||
raw_cell_index: OneIndexed::from_zero_indexed(2)
|
||||
},
|
||||
CellStart {
|
||||
start_row: OneIndexed::from_zero_indexed(11),
|
||||
raw_cell_index: OneIndexed::from_zero_indexed(4)
|
||||
},
|
||||
CellStart {
|
||||
start_row: OneIndexed::from_zero_indexed(12),
|
||||
raw_cell_index: OneIndexed::from_zero_indexed(6)
|
||||
},
|
||||
CellStart {
|
||||
start_row: OneIndexed::from_zero_indexed(14),
|
||||
raw_cell_index: OneIndexed::from_zero_indexed(7)
|
||||
}
|
||||
],
|
||||
}
|
||||
);
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue