gh-89083: add support for UUID version 7 (RFC 9562) (#121119)

Add support for generating UUIDv7 objects according to RFC 9562, §5.7 [1].

The functionality is provided by the `uuid.uuid7()` function. The implementation
is based on a 42-bit counter as described by Method 1, §6.2 [2] and guarantees
monotonicity within the same millisecond.

[1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.7
[2]: https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2

---------

Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com>
Co-authored-by: Victor Stinner <vstinner@python.org>
Co-authored-by: Éric <merwok@netwok.org>
This commit is contained in:
Bénédikt Tran 2025-03-04 10:47:19 +01:00 committed by GitHub
parent 0b6e98c841
commit 3929af5e3a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 315 additions and 12 deletions

View file

@ -11,9 +11,10 @@
--------------
This module provides immutable :class:`UUID` objects (the :class:`UUID` class)
and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`,
:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6,
and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`).
and :ref:`functions <uuid-factory-functions>` for generating UUIDs corresponding
to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`),
for example, :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on.
Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC.
If all you want is a unique ID, you should probably call :func:`uuid1` or
:func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates
@ -154,7 +155,7 @@ which relays any information about the UUID's safety, using this enumeration:
:const:`RFC_4122`).
.. versionchanged:: next
Added UUID versions 6 and 8.
Added UUID versions 6, 7 and 8.
.. attribute:: UUID.is_safe
@ -185,6 +186,8 @@ The :mod:`uuid` module defines the following functions:
globally unique, while the latter are not.
.. _uuid-factory-functions:
.. function:: uuid1(node=None, clock_seq=None)
Generate a UUID from a host ID, sequence number, and the current time. If *node*
@ -228,6 +231,18 @@ The :mod:`uuid` module defines the following functions:
.. versionadded:: next
.. function:: uuid7()
Generate a time-based UUID according to
:rfc:`RFC 9562, §5.7 <9562#section-5.7>`.
For portability across platforms lacking sub-millisecond precision, UUIDs
produced by this function embed a 48-bit timestamp and use a 42-bit counter
to guarantee monotonicity within a millisecond.
.. versionadded:: next
.. function:: uuid8(a=None, b=None, c=None)
Generate a pseudo-random UUID according to
@ -330,7 +345,7 @@ The :mod:`uuid` module can be executed as a script from the command line.
.. code-block:: sh
python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n NAMESPACE] [-N NAME]
python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid7,uuid8}] [-n NAMESPACE] [-N NAME]
The following options are accepted:
@ -347,7 +362,7 @@ The following options are accepted:
is used.
.. versionchanged:: next
Allow generating UUID versions 6 and 8.
Allow generating UUID versions 6, 7 and 8.
.. option:: -n <namespace>
--namespace <namespace>

View file

@ -924,8 +924,9 @@ urllib
uuid
----
* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and
:func:`uuid.uuid8` respectively, as specified in :rfc:`9562`.
* Add support for UUID versions 6, 7, and 8 via :func:`uuid.uuid6`,
:func:`uuid.uuid7`, and :func:`uuid.uuid8` respectively, as specified
in :rfc:`9562`.
(Contributed by Bénédikt Tran in :gh:`89083`.)
* :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the

View file

@ -871,6 +871,206 @@ class BaseTestUUID:
equal((u.int >> 80) & 0xffff, 0x232a)
equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)
def test_uuid7(self):
equal = self.assertEqual
u = self.uuid.uuid7()
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
# 1 Jan 2023 12:34:56.123_456_789
timestamp_ns = 1672533296_123_456_789 # ns precision
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
for _ in range(100):
counter_hi = random.getrandbits(11)
counter_lo = random.getrandbits(30)
counter = (counter_hi << 30) | counter_lo
tail = random.getrandbits(32)
# effective number of bits is 32 + 30 + 11 = 73
random_bits = counter << 32 | tail
# set all remaining MSB of fake random bits to 1 to ensure that
# the implementation correctly removes them
random_bits = (((1 << 7) - 1) << 73) | random_bits
random_data = random_bits.to_bytes(10)
with (
mock.patch.multiple(
self.uuid,
_last_timestamp_v7=None,
_last_counter_v7=0,
),
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=random_data) as urand
):
u = self.uuid.uuid7()
urand.assert_called_once_with(10)
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, timestamp_ms)
equal(self.uuid._last_counter_v7, counter)
unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
equal((u.int >> 75) & 1, 0) # check that the MSB is 0
equal((u.int >> 64) & 0xfff, counter_hi)
equal((u.int >> 32) & 0x3fff_ffff, counter_lo)
equal(u.int & 0xffff_ffff, tail)
def test_uuid7_uniqueness(self):
# Test that UUIDv7-generated values are unique.
#
# While UUIDv8 has an entropy of 122 bits, those 122 bits may not
# necessarily be sampled from a PRNG. On the other hand, UUIDv7
# uses os.urandom() as a PRNG which features better randomness.
N = 1000
uuids = {self.uuid.uuid7() for _ in range(N)}
self.assertEqual(len(uuids), N)
versions = {u.version for u in uuids}
self.assertSetEqual(versions, {7})
def test_uuid7_monotonicity(self):
equal = self.assertEqual
us = [self.uuid.uuid7() for _ in range(10_000)]
equal(us, sorted(us))
with mock.patch.multiple(
self.uuid,
_last_timestamp_v7=0,
_last_counter_v7=0,
):
# 1 Jan 2023 12:34:56.123_456_789
timestamp_ns = 1672533296_123_456_789 # ns precision
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
# counter_{hi,lo} are chosen so that "counter + 1" does not overflow
counter_hi = random.getrandbits(11)
counter_lo = random.getrandbits(29)
counter = (counter_hi << 30) | counter_lo
self.assertLess(counter + 1, 0x3ff_ffff_ffff)
tail = random.getrandbits(32)
random_bits = counter << 32 | tail
random_data = random_bits.to_bytes(10)
with (
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=random_data) as urand
):
u1 = self.uuid.uuid7()
urand.assert_called_once_with(10)
equal(self.uuid._last_timestamp_v7, timestamp_ms)
equal(self.uuid._last_counter_v7, counter)
equal((u1.int >> 64) & 0xfff, counter_hi)
equal((u1.int >> 32) & 0x3fff_ffff, counter_lo)
equal(u1.int & 0xffff_ffff, tail)
# 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns)
next_timestamp_ns = 1672533296_123_457_032
next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
equal(timestamp_ms, next_timestamp_ms)
next_tail_bytes = os.urandom(4)
next_fail = int.from_bytes(next_tail_bytes)
with (
mock.patch('time.time_ns', return_value=next_timestamp_ns),
mock.patch('os.urandom', return_value=next_tail_bytes) as urand
):
u2 = self.uuid.uuid7()
urand.assert_called_once_with(4)
# same milli-second
equal(self.uuid._last_timestamp_v7, timestamp_ms)
# 42-bit counter advanced by 1
equal(self.uuid._last_counter_v7, counter + 1)
equal((u2.int >> 64) & 0xfff, counter_hi)
equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1)
equal(u2.int & 0xffff_ffff, next_fail)
self.assertLess(u1, u2)
def test_uuid7_timestamp_backwards(self):
equal = self.assertEqual
# 1 Jan 2023 12:34:56.123_456_789
timestamp_ns = 1672533296_123_456_789 # ns precision
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
fake_last_timestamp_v7 = timestamp_ms + 1
# counter_{hi,lo} are chosen so that "counter + 1" does not overflow
counter_hi = random.getrandbits(11)
counter_lo = random.getrandbits(29)
counter = (counter_hi << 30) | counter_lo
self.assertLess(counter + 1, 0x3ff_ffff_ffff)
tail_bytes = os.urandom(4)
tail = int.from_bytes(tail_bytes)
with (
mock.patch.multiple(
self.uuid,
_last_timestamp_v7=fake_last_timestamp_v7,
_last_counter_v7=counter,
),
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=tail_bytes) as urand
):
u = self.uuid.uuid7()
urand.assert_called_once_with(4)
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1)
unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
# 42-bit counter advanced by 1
equal(self.uuid._last_counter_v7, counter + 1)
equal((u.int >> 64) & 0xfff, counter_hi)
# 42-bit counter advanced by 1 (counter_hi is untouched)
equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1)
equal(u.int & 0xffff_ffff, tail)
def test_uuid7_overflow_counter(self):
equal = self.assertEqual
# 1 Jan 2023 12:34:56.123_456_789
timestamp_ns = 1672533296_123_456_789 # ns precision
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
new_counter_hi = random.getrandbits(11)
new_counter_lo = random.getrandbits(30)
new_counter = (new_counter_hi << 30) | new_counter_lo
tail = random.getrandbits(32)
random_bits = (new_counter << 32) | tail
random_data = random_bits.to_bytes(10)
with (
mock.patch.multiple(
self.uuid,
_last_timestamp_v7=timestamp_ms,
# same timestamp, but force an overflow on the counter
_last_counter_v7=0x3ff_ffff_ffff,
),
mock.patch('time.time_ns', return_value=timestamp_ns),
mock.patch('os.urandom', return_value=random_data) as urand
):
u = self.uuid.uuid7()
urand.assert_called_with(10)
equal(u.variant, self.uuid.RFC_4122)
equal(u.version, 7)
# timestamp advanced due to overflow
equal(self.uuid._last_timestamp_v7, timestamp_ms + 1)
unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
# counter overflowed, so we picked a new one
equal(self.uuid._last_counter_v7, new_counter)
equal((u.int >> 64) & 0xfff, new_counter_hi)
equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo)
equal(u.int & 0xffff_ffff, tail)
def test_uuid8(self):
equal = self.assertEqual
u = self.uuid.uuid8()

View file

@ -1,8 +1,12 @@
r"""UUID objects (universally unique identifiers) according to RFC 4122/9562.
This module provides immutable UUID objects (class UUID) and the functions
uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating
version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562.
This module provides immutable UUID objects (class UUID) and functions for
generating UUIDs corresponding to a specific UUID version as specified in
RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3,
and so on.
Note that UUID version 2 is deliberately omitted as it is outside the scope
of the RFC.
If all you want is a unique ID, you should probably call uuid1() or uuid4().
Note that uuid1() may compromise privacy since it creates a UUID containing
@ -54,6 +58,7 @@ Typical usage:
import os
import sys
import time
from enum import Enum, _simple_enum
@ -102,6 +107,7 @@ _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48))
_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
_RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48))
_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48))
_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
@ -720,7 +726,6 @@ def uuid1(node=None, clock_seq=None):
return UUID(bytes=uuid_time, is_safe=is_safe)
global _last_timestamp
import time
nanoseconds = time.time_ns()
# 0x01b21dd213814000 is the number of 100-ns intervals between the
# UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
@ -770,6 +775,7 @@ def uuid5(namespace, name):
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
return UUID._from_int(int_uuid_5)
_last_timestamp_v6 = None
def uuid6(node=None, clock_seq=None):
@ -808,6 +814,83 @@ def uuid6(node=None, clock_seq=None):
int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS
return UUID._from_int(int_uuid_6)
_last_timestamp_v7 = None
_last_counter_v7 = 0 # 42-bit counter
def _uuid7_get_counter_and_tail():
rand = int.from_bytes(os.urandom(10))
# 42-bit counter with MSB set to 0
counter = (rand >> 32) & 0x1ff_ffff_ffff
# 32-bit random data
tail = rand & 0xffff_ffff
return counter, tail
def uuid7():
"""Generate a UUID from a Unix timestamp in milliseconds and random bits.
UUIDv7 objects feature monotonicity within a millisecond.
"""
# --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 -
# unix_ts_ms | version | counter_hi | variant | counter_lo | random
#
# 'counter = counter_hi | counter_lo' is a 42-bit counter constructed
# with Method 1 of RFC 9562, §6.2, and its MSB is set to 0.
#
# 'random' is a 32-bit random value regenerated for every new UUID.
#
# If multiple UUIDs are generated within the same millisecond, the LSB
# of 'counter' is incremented by 1. When overflowing, the timestamp is
# advanced and the counter is reset to a random 42-bit integer with MSB
# set to 0.
global _last_timestamp_v7
global _last_counter_v7
nanoseconds = time.time_ns()
timestamp_ms = nanoseconds // 1_000_000
if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7:
counter, tail = _uuid7_get_counter_and_tail()
else:
if timestamp_ms < _last_timestamp_v7:
timestamp_ms = _last_timestamp_v7 + 1
# advance the 42-bit counter
counter = _last_counter_v7 + 1
if counter > 0x3ff_ffff_ffff:
# advance the 48-bit timestamp
timestamp_ms += 1
counter, tail = _uuid7_get_counter_and_tail()
else:
# 32-bit random data
tail = int.from_bytes(os.urandom(4))
unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
counter_msbs = counter >> 30
# keep 12 counter's MSBs and clear variant bits
counter_hi = counter_msbs & 0x0fff
# keep 30 counter's LSBs and clear version bits
counter_lo = counter & 0x3fff_ffff
# ensure that the tail is always a 32-bit integer (by construction,
# it is already the case, but future interfaces may allow the user
# to specify the random tail)
tail &= 0xffff_ffff
int_uuid_7 = unix_ts_ms << 80
int_uuid_7 |= counter_hi << 64
int_uuid_7 |= counter_lo << 32
int_uuid_7 |= tail
# by construction, the variant and version bits are already cleared
int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS
res = UUID._from_int(int_uuid_7)
# defer global update until all computations are done
_last_timestamp_v7 = timestamp_ms
_last_counter_v7 = counter
return res
def uuid8(a=None, b=None, c=None):
"""Generate a UUID from three custom blocks.
@ -833,6 +916,7 @@ def uuid8(a=None, b=None, c=None):
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
return UUID._from_int(int_uuid_8)
def main():
"""Run the uuid command line interface."""
uuid_funcs = {
@ -841,6 +925,7 @@ def main():
"uuid4": uuid4,
"uuid5": uuid5,
"uuid6": uuid6,
"uuid7": uuid7,
"uuid8": uuid8,
}
uuid_namespace_funcs = ("uuid3", "uuid5")

View file

@ -0,0 +1,2 @@
Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in
:rfc:`9562`. Patch by Bénédikt Tran.