mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-26 11:59:35 +00:00
Generate deterministic ids when formatting notebooks (#9359)
When formatting notebooks, we populate the `id` field for cells that do not have one. Previously, we generated a UUID v4 which resulted in non-deterministic formatting. Here, we generate the UUID from a seeded random number generator instead of using true randomness. For example, here are the first five ids it would generate: ``` 7fb27b94-1602-401d-9154-2211134fc71a acae54e3-7e7d-407b-bb7b-55eff062a284 9a63283c-baf0-4dbc-ab1f-6479b197f3a8 8dd0d809-2fe7-4a7c-9628-1538738b07e2 72eea511-9410-473a-a328-ad9291626812 ``` We also add a check that an id is not present in another cell to prevent accidental introduction of duplicate ids. The specification is lax, and we could just use incrementing integers e.g. `0`, `1`, ... but I have a minor preference for retaining the UUID format. Some discussion [here](https://github.com/astral-sh/ruff/pull/9359#discussion_r1439607121) — I'm happy to go either way though. Discovered via #9293
This commit is contained in:
parent
328262bfac
commit
aaa00976ae
3 changed files with 31 additions and 3 deletions
1
Cargo.lock
generated
1
Cargo.lock
generated
|
@ -2259,6 +2259,7 @@ dependencies = [
|
||||||
"insta",
|
"insta",
|
||||||
"itertools 0.12.0",
|
"itertools 0.12.0",
|
||||||
"once_cell",
|
"once_cell",
|
||||||
|
"rand",
|
||||||
"ruff_diagnostics",
|
"ruff_diagnostics",
|
||||||
"ruff_source_file",
|
"ruff_source_file",
|
||||||
"ruff_text_size",
|
"ruff_text_size",
|
||||||
|
|
|
@ -25,6 +25,7 @@ serde_json = { workspace = true }
|
||||||
serde_with = { workspace = true, default-features = false, features = ["macros"] }
|
serde_with = { workspace = true, default-features = false, features = ["macros"] }
|
||||||
thiserror = { workspace = true }
|
thiserror = { workspace = true }
|
||||||
uuid = { workspace = true }
|
uuid = { workspace = true }
|
||||||
|
rand = { workspace = true }
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
insta = { workspace = true }
|
insta = { workspace = true }
|
||||||
|
|
|
@ -1,4 +1,5 @@
|
||||||
use std::cmp::Ordering;
|
use std::cmp::Ordering;
|
||||||
|
use std::collections::HashSet;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
|
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
|
@ -6,10 +7,10 @@ use std::{io, iter};
|
||||||
|
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
use once_cell::sync::OnceCell;
|
use once_cell::sync::OnceCell;
|
||||||
|
use rand::{Rng, SeedableRng};
|
||||||
use serde::Serialize;
|
use serde::Serialize;
|
||||||
use serde_json::error::Category;
|
use serde_json::error::Category;
|
||||||
use thiserror::Error;
|
use thiserror::Error;
|
||||||
use uuid::Uuid;
|
|
||||||
|
|
||||||
use ruff_diagnostics::{SourceMap, SourceMarker};
|
use ruff_diagnostics::{SourceMap, SourceMarker};
|
||||||
use ruff_source_file::{NewlineWithTrailingNewline, OneIndexed, UniversalNewlineIterator};
|
use ruff_source_file::{NewlineWithTrailingNewline, OneIndexed, UniversalNewlineIterator};
|
||||||
|
@ -145,7 +146,23 @@ impl Notebook {
|
||||||
// Add cell ids to 4.5+ notebooks if they are missing
|
// Add cell ids to 4.5+ notebooks if they are missing
|
||||||
// https://github.com/astral-sh/ruff/issues/6834
|
// https://github.com/astral-sh/ruff/issues/6834
|
||||||
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
|
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
|
||||||
|
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
|
||||||
if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
|
if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
|
||||||
|
// We use a insecure random number generator to generate deterministic uuids
|
||||||
|
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
|
||||||
|
let mut existing_ids = HashSet::new();
|
||||||
|
|
||||||
|
for cell in &raw_notebook.cells {
|
||||||
|
let id = match cell {
|
||||||
|
Cell::Code(cell) => &cell.id,
|
||||||
|
Cell::Markdown(cell) => &cell.id,
|
||||||
|
Cell::Raw(cell) => &cell.id,
|
||||||
|
};
|
||||||
|
if let Some(id) = id {
|
||||||
|
existing_ids.insert(id.clone());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
for cell in &mut raw_notebook.cells {
|
for cell in &mut raw_notebook.cells {
|
||||||
let id = match cell {
|
let id = match cell {
|
||||||
Cell::Code(cell) => &mut cell.id,
|
Cell::Code(cell) => &mut cell.id,
|
||||||
|
@ -153,8 +170,17 @@ impl Notebook {
|
||||||
Cell::Raw(cell) => &mut cell.id,
|
Cell::Raw(cell) => &mut cell.id,
|
||||||
};
|
};
|
||||||
if id.is_none() {
|
if id.is_none() {
|
||||||
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
|
loop {
|
||||||
*id = Some(Uuid::new_v4().to_string());
|
let new_id = uuid::Builder::from_random_bytes(rng.gen())
|
||||||
|
.into_uuid()
|
||||||
|
.as_simple()
|
||||||
|
.to_string();
|
||||||
|
|
||||||
|
if existing_ids.insert(new_id.clone()) {
|
||||||
|
*id = Some(new_id);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue