mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-28 23:43:53 +00:00
Generate deterministic ids when formatting notebooks (#9359)
When formatting notebooks, we populate the `id` field for cells that do not have one. Previously, we generated a UUID v4 which resulted in non-deterministic formatting. Here, we generate the UUID from a seeded random number generator instead of using true randomness. For example, here are the first five ids it would generate: ``` 7fb27b94-1602-401d-9154-2211134fc71a acae54e3-7e7d-407b-bb7b-55eff062a284 9a63283c-baf0-4dbc-ab1f-6479b197f3a8 8dd0d809-2fe7-4a7c-9628-1538738b07e2 72eea511-9410-473a-a328-ad9291626812 ``` We also add a check that an id is not present in another cell to prevent accidental introduction of duplicate ids. The specification is lax, and we could just use incrementing integers e.g. `0`, `1`, ... but I have a minor preference for retaining the UUID format. Some discussion [here](https://github.com/astral-sh/ruff/pull/9359#discussion_r1439607121) — I'm happy to go either way though. Discovered via #9293
This commit is contained in:
parent
328262bfac
commit
aaa00976ae
3 changed files with 31 additions and 3 deletions
|
@ -1,4 +1,5 @@
|
|||
use std::cmp::Ordering;
|
||||
use std::collections::HashSet;
|
||||
use std::fs::File;
|
||||
use std::io::{BufReader, Cursor, Read, Seek, SeekFrom, Write};
|
||||
use std::path::Path;
|
||||
|
@ -6,10 +7,10 @@ use std::{io, iter};
|
|||
|
||||
use itertools::Itertools;
|
||||
use once_cell::sync::OnceCell;
|
||||
use rand::{Rng, SeedableRng};
|
||||
use serde::Serialize;
|
||||
use serde_json::error::Category;
|
||||
use thiserror::Error;
|
||||
use uuid::Uuid;
|
||||
|
||||
use ruff_diagnostics::{SourceMap, SourceMarker};
|
||||
use ruff_source_file::{NewlineWithTrailingNewline, OneIndexed, UniversalNewlineIterator};
|
||||
|
@ -145,7 +146,23 @@ impl Notebook {
|
|||
// Add cell ids to 4.5+ notebooks if they are missing
|
||||
// https://github.com/astral-sh/ruff/issues/6834
|
||||
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#required-field
|
||||
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
|
||||
if raw_notebook.nbformat == 4 && raw_notebook.nbformat_minor >= 5 {
|
||||
// We use a insecure random number generator to generate deterministic uuids
|
||||
let mut rng = rand::rngs::StdRng::seed_from_u64(0);
|
||||
let mut existing_ids = HashSet::new();
|
||||
|
||||
for cell in &raw_notebook.cells {
|
||||
let id = match cell {
|
||||
Cell::Code(cell) => &cell.id,
|
||||
Cell::Markdown(cell) => &cell.id,
|
||||
Cell::Raw(cell) => &cell.id,
|
||||
};
|
||||
if let Some(id) = id {
|
||||
existing_ids.insert(id.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for cell in &mut raw_notebook.cells {
|
||||
let id = match cell {
|
||||
Cell::Code(cell) => &mut cell.id,
|
||||
|
@ -153,8 +170,17 @@ impl Notebook {
|
|||
Cell::Raw(cell) => &mut cell.id,
|
||||
};
|
||||
if id.is_none() {
|
||||
// https://github.com/jupyter/enhancement-proposals/blob/master/62-cell-id/cell-id.md#questions
|
||||
*id = Some(Uuid::new_v4().to_string());
|
||||
loop {
|
||||
let new_id = uuid::Builder::from_random_bytes(rng.gen())
|
||||
.into_uuid()
|
||||
.as_simple()
|
||||
.to_string();
|
||||
|
||||
if existing_ids.insert(new_id.clone()) {
|
||||
*id = Some(new_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue