mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-22 13:23:18 +00:00
Reduce notebook memory footprint (#21319)
This commit is contained in:
parent
33b942c7ad
commit
36cce347fd
3 changed files with 84 additions and 71 deletions
|
|
@ -112,16 +112,16 @@ impl std::fmt::Display for Diff<'_> {
|
||||||
// `None`, indicating a regular script file, all the lines will be in one "cell" under the
|
// `None`, indicating a regular script file, all the lines will be in one "cell" under the
|
||||||
// `None` key.
|
// `None` key.
|
||||||
let cells = if let Some(notebook_index) = &self.notebook_index {
|
let cells = if let Some(notebook_index) = &self.notebook_index {
|
||||||
let mut last_cell = OneIndexed::MIN;
|
let mut last_cell_index = OneIndexed::MIN;
|
||||||
let mut cells: Vec<(Option<OneIndexed>, TextSize)> = Vec::new();
|
let mut cells: Vec<(Option<OneIndexed>, TextSize)> = Vec::new();
|
||||||
for (row, cell) in notebook_index.iter() {
|
for cell in notebook_index.iter() {
|
||||||
if cell != last_cell {
|
if cell.cell_index() != last_cell_index {
|
||||||
let offset = source_code.line_start(row);
|
let offset = source_code.line_start(cell.start_row());
|
||||||
cells.push((Some(last_cell), offset));
|
cells.push((Some(last_cell_index), offset));
|
||||||
last_cell = cell;
|
last_cell_index = cell.cell_index();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
cells.push((Some(last_cell), source_text.text_len()));
|
cells.push((Some(last_cell_index), source_text.text_len()));
|
||||||
cells
|
cells
|
||||||
} else {
|
} else {
|
||||||
vec![(None, source_text.text_len())]
|
vec![(None, source_text.text_len())]
|
||||||
|
|
|
||||||
|
|
@ -8,37 +8,40 @@ use ruff_source_file::{LineColumn, OneIndexed, SourceLocation};
|
||||||
/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
|
/// [`ruff_text_size::TextSize`] to jupyter notebook cell/row/column.
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
pub struct NotebookIndex {
|
pub struct NotebookIndex {
|
||||||
/// Enter a row (1-based), get back the cell (1-based)
|
/// Stores the starting row and the absolute cell index for every Python (valid) cell.
|
||||||
pub(super) row_to_cell: Vec<OneIndexed>,
|
///
|
||||||
/// Enter a row (1-based), get back the row in cell (1-based)
|
/// The index in this vector corresponds to the Python cell index (valid cell index).
|
||||||
pub(super) row_to_row_in_cell: Vec<OneIndexed>,
|
pub(super) cell_starts: Vec<CellStart>,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl NotebookIndex {
|
impl NotebookIndex {
|
||||||
pub fn new(row_to_cell: Vec<OneIndexed>, row_to_row_in_cell: Vec<OneIndexed>) -> Self {
|
fn find_cell(&self, row: OneIndexed) -> Option<CellStart> {
|
||||||
Self {
|
match self
|
||||||
row_to_cell,
|
.cell_starts
|
||||||
row_to_row_in_cell,
|
.binary_search_by_key(&row, |start| start.start_row)
|
||||||
|
{
|
||||||
|
Ok(cell_index) => Some(self.cell_starts[cell_index]),
|
||||||
|
Err(insertion_point) => Some(self.cell_starts[insertion_point.checked_sub(1)?]),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the cell number (1-based) for the given row (1-based).
|
/// Returns the (raw) cell number (1-based) for the given row (1-based).
|
||||||
pub fn cell(&self, row: OneIndexed) -> Option<OneIndexed> {
|
pub fn cell(&self, row: OneIndexed) -> Option<OneIndexed> {
|
||||||
self.row_to_cell.get(row.to_zero_indexed()).copied()
|
self.find_cell(row).map(|start| start.raw_cell_index)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns the row number (1-based) in the cell (1-based) for the
|
/// Returns the row number (1-based) in the cell (1-based) for the
|
||||||
/// given row (1-based).
|
/// given row (1-based).
|
||||||
pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> {
|
pub fn cell_row(&self, row: OneIndexed) -> Option<OneIndexed> {
|
||||||
self.row_to_row_in_cell.get(row.to_zero_indexed()).copied()
|
self.find_cell(row)
|
||||||
|
.map(|start| OneIndexed::from_zero_indexed(row.get() - start.start_row.get()))
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns an iterator over the row:cell-number pairs (both 1-based).
|
/// Returns an iterator over the starting rows of each cell (1-based).
|
||||||
pub fn iter(&self) -> impl Iterator<Item = (OneIndexed, OneIndexed)> {
|
///
|
||||||
self.row_to_cell
|
/// This yields one entry per Python cell (skipping over Makrdown cell).
|
||||||
.iter()
|
pub fn iter(&self) -> impl Iterator<Item = CellStart> + '_ {
|
||||||
.enumerate()
|
self.cell_starts.iter().copied()
|
||||||
.map(|(row, cell)| (OneIndexed::from_zero_indexed(row), *cell))
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Translates the given [`LineColumn`] based on the indexing table.
|
/// Translates the given [`LineColumn`] based on the indexing table.
|
||||||
|
|
@ -67,3 +70,23 @@ impl NotebookIndex {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[derive(Debug, Copy, Clone, Eq, PartialEq, Serialize, Deserialize)]
|
||||||
|
pub struct CellStart {
|
||||||
|
/// The row in the concatenated notebook source code at which
|
||||||
|
/// this cell starts.
|
||||||
|
pub(super) start_row: OneIndexed,
|
||||||
|
|
||||||
|
/// The absolute index of this cell in the notebook.
|
||||||
|
pub(super) raw_cell_index: OneIndexed,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl CellStart {
|
||||||
|
pub fn start_row(&self) -> OneIndexed {
|
||||||
|
self.start_row
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn cell_index(&self) -> OneIndexed {
|
||||||
|
self.raw_cell_index
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,7 @@ use ruff_text_size::TextSize;
|
||||||
use crate::cell::CellOffsets;
|
use crate::cell::CellOffsets;
|
||||||
use crate::index::NotebookIndex;
|
use crate::index::NotebookIndex;
|
||||||
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
|
use crate::schema::{Cell, RawNotebook, SortAlphabetically, SourceValue};
|
||||||
use crate::{CellMetadata, RawNotebookMetadata, schema};
|
use crate::{CellMetadata, CellStart, RawNotebookMetadata, schema};
|
||||||
|
|
||||||
/// Run round-trip source code generation on a given Jupyter notebook file path.
|
/// Run round-trip source code generation on a given Jupyter notebook file path.
|
||||||
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
pub fn round_trip(path: &Path) -> anyhow::Result<String> {
|
||||||
|
|
@ -320,11 +320,19 @@ impl Notebook {
|
||||||
/// The index building is expensive as it needs to go through the content of
|
/// The index building is expensive as it needs to go through the content of
|
||||||
/// every valid code cell.
|
/// every valid code cell.
|
||||||
fn build_index(&self) -> NotebookIndex {
|
fn build_index(&self) -> NotebookIndex {
|
||||||
let mut row_to_cell = Vec::new();
|
let mut cell_starts = Vec::with_capacity(self.valid_code_cells.len());
|
||||||
let mut row_to_row_in_cell = Vec::new();
|
|
||||||
|
let mut current_row = OneIndexed::MIN;
|
||||||
|
|
||||||
for &cell_index in &self.valid_code_cells {
|
for &cell_index in &self.valid_code_cells {
|
||||||
let line_count = match &self.raw.cells[cell_index as usize].source() {
|
let raw_cell_index = cell_index as usize;
|
||||||
|
// Record the starting row of this cell
|
||||||
|
cell_starts.push(CellStart {
|
||||||
|
start_row: current_row,
|
||||||
|
raw_cell_index: OneIndexed::from_zero_indexed(raw_cell_index),
|
||||||
|
});
|
||||||
|
|
||||||
|
let line_count = match &self.raw.cells[raw_cell_index].source() {
|
||||||
SourceValue::String(string) => {
|
SourceValue::String(string) => {
|
||||||
if string.is_empty() {
|
if string.is_empty() {
|
||||||
1
|
1
|
||||||
|
|
@ -342,17 +350,11 @@ impl Notebook {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
row_to_cell.extend(std::iter::repeat_n(
|
|
||||||
OneIndexed::from_zero_indexed(cell_index as usize),
|
current_row = current_row.saturating_add(line_count);
|
||||||
line_count,
|
|
||||||
));
|
|
||||||
row_to_row_in_cell.extend((0..line_count).map(OneIndexed::from_zero_indexed));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
NotebookIndex {
|
NotebookIndex { cell_starts }
|
||||||
row_to_cell,
|
|
||||||
row_to_row_in_cell,
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Return the notebook content.
|
/// Return the notebook content.
|
||||||
|
|
@ -456,7 +458,7 @@ mod tests {
|
||||||
|
|
||||||
use ruff_source_file::OneIndexed;
|
use ruff_source_file::OneIndexed;
|
||||||
|
|
||||||
use crate::{Cell, Notebook, NotebookError, NotebookIndex};
|
use crate::{Cell, CellStart, Notebook, NotebookError, NotebookIndex};
|
||||||
|
|
||||||
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
|
/// Construct a path to a Jupyter notebook in the `resources/test/fixtures/jupyter` directory.
|
||||||
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
|
fn notebook_path(path: impl AsRef<Path>) -> std::path::PathBuf {
|
||||||
|
|
@ -548,39 +550,27 @@ print("after empty cells")
|
||||||
assert_eq!(
|
assert_eq!(
|
||||||
notebook.index(),
|
notebook.index(),
|
||||||
&NotebookIndex {
|
&NotebookIndex {
|
||||||
row_to_cell: vec![
|
cell_starts: vec![
|
||||||
OneIndexed::from_zero_indexed(0),
|
CellStart {
|
||||||
OneIndexed::from_zero_indexed(0),
|
start_row: OneIndexed::MIN,
|
||||||
OneIndexed::from_zero_indexed(0),
|
raw_cell_index: OneIndexed::MIN
|
||||||
OneIndexed::from_zero_indexed(0),
|
},
|
||||||
OneIndexed::from_zero_indexed(0),
|
CellStart {
|
||||||
OneIndexed::from_zero_indexed(0),
|
start_row: OneIndexed::from_zero_indexed(6),
|
||||||
OneIndexed::from_zero_indexed(2),
|
raw_cell_index: OneIndexed::from_zero_indexed(2)
|
||||||
OneIndexed::from_zero_indexed(2),
|
},
|
||||||
OneIndexed::from_zero_indexed(2),
|
CellStart {
|
||||||
OneIndexed::from_zero_indexed(2),
|
start_row: OneIndexed::from_zero_indexed(11),
|
||||||
OneIndexed::from_zero_indexed(2),
|
raw_cell_index: OneIndexed::from_zero_indexed(4)
|
||||||
OneIndexed::from_zero_indexed(4),
|
},
|
||||||
OneIndexed::from_zero_indexed(6),
|
CellStart {
|
||||||
OneIndexed::from_zero_indexed(6),
|
start_row: OneIndexed::from_zero_indexed(12),
|
||||||
OneIndexed::from_zero_indexed(7)
|
raw_cell_index: OneIndexed::from_zero_indexed(6)
|
||||||
],
|
},
|
||||||
row_to_row_in_cell: vec![
|
CellStart {
|
||||||
OneIndexed::from_zero_indexed(0),
|
start_row: OneIndexed::from_zero_indexed(14),
|
||||||
OneIndexed::from_zero_indexed(1),
|
raw_cell_index: OneIndexed::from_zero_indexed(7)
|
||||||
OneIndexed::from_zero_indexed(2),
|
}
|
||||||
OneIndexed::from_zero_indexed(3),
|
|
||||||
OneIndexed::from_zero_indexed(4),
|
|
||||||
OneIndexed::from_zero_indexed(5),
|
|
||||||
OneIndexed::from_zero_indexed(0),
|
|
||||||
OneIndexed::from_zero_indexed(1),
|
|
||||||
OneIndexed::from_zero_indexed(2),
|
|
||||||
OneIndexed::from_zero_indexed(3),
|
|
||||||
OneIndexed::from_zero_indexed(4),
|
|
||||||
OneIndexed::from_zero_indexed(0),
|
|
||||||
OneIndexed::from_zero_indexed(0),
|
|
||||||
OneIndexed::from_zero_indexed(1),
|
|
||||||
OneIndexed::from_zero_indexed(0)
|
|
||||||
],
|
],
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue