mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
gh-89083: add support for UUID version 7 (RFC 9562) (#121119)
Add support for generating UUIDv7 objects according to RFC 9562, §5.7 [1]. The functionality is provided by the `uuid.uuid7()` function. The implementation is based on a 42-bit counter as described by Method 1, §6.2 [2] and guarantees monotonicity within the same millisecond. [1]: https://www.rfc-editor.org/rfc/rfc9562.html#section-5.7 [2]: https://www.rfc-editor.org/rfc/rfc9562.html#section-6.2 --------- Co-authored-by: Hugo van Kemenade <1324225+hugovk@users.noreply.github.com> Co-authored-by: Victor Stinner <vstinner@python.org> Co-authored-by: Éric <merwok@netwok.org>
This commit is contained in:
parent
0b6e98c841
commit
3929af5e3a
5 changed files with 315 additions and 12 deletions
|
@ -11,9 +11,10 @@
|
|||
--------------
|
||||
|
||||
This module provides immutable :class:`UUID` objects (the :class:`UUID` class)
|
||||
and the functions :func:`uuid1`, :func:`uuid3`, :func:`uuid4`, :func:`uuid5`,
|
||||
:func:`uuid6`, and :func:`uuid8` for generating version 1, 3, 4, 5, 6,
|
||||
and 8 UUIDs as specified in :rfc:`9562` (which supersedes :rfc:`4122`).
|
||||
and :ref:`functions <uuid-factory-functions>` for generating UUIDs corresponding
|
||||
to a specific UUID version as specified in :rfc:`9562` (which supersedes :rfc:`4122`),
|
||||
for example, :func:`uuid1` for UUID version 1, :func:`uuid3` for UUID version 3, and so on.
|
||||
Note that UUID version 2 is deliberately omitted as it is outside the scope of the RFC.
|
||||
|
||||
If all you want is a unique ID, you should probably call :func:`uuid1` or
|
||||
:func:`uuid4`. Note that :func:`uuid1` may compromise privacy since it creates
|
||||
|
@ -154,7 +155,7 @@ which relays any information about the UUID's safety, using this enumeration:
|
|||
:const:`RFC_4122`).
|
||||
|
||||
.. versionchanged:: next
|
||||
Added UUID versions 6 and 8.
|
||||
Added UUID versions 6, 7 and 8.
|
||||
|
||||
|
||||
.. attribute:: UUID.is_safe
|
||||
|
@ -185,6 +186,8 @@ The :mod:`uuid` module defines the following functions:
|
|||
globally unique, while the latter are not.
|
||||
|
||||
|
||||
.. _uuid-factory-functions:
|
||||
|
||||
.. function:: uuid1(node=None, clock_seq=None)
|
||||
|
||||
Generate a UUID from a host ID, sequence number, and the current time. If *node*
|
||||
|
@ -228,6 +231,18 @@ The :mod:`uuid` module defines the following functions:
|
|||
.. versionadded:: next
|
||||
|
||||
|
||||
.. function:: uuid7()
|
||||
|
||||
Generate a time-based UUID according to
|
||||
:rfc:`RFC 9562, §5.7 <9562#section-5.7>`.
|
||||
|
||||
For portability across platforms lacking sub-millisecond precision, UUIDs
|
||||
produced by this function embed a 48-bit timestamp and use a 42-bit counter
|
||||
to guarantee monotonicity within a millisecond.
|
||||
|
||||
.. versionadded:: next
|
||||
|
||||
|
||||
.. function:: uuid8(a=None, b=None, c=None)
|
||||
|
||||
Generate a pseudo-random UUID according to
|
||||
|
@ -330,7 +345,7 @@ The :mod:`uuid` module can be executed as a script from the command line.
|
|||
|
||||
.. code-block:: sh
|
||||
|
||||
python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid8}] [-n NAMESPACE] [-N NAME]
|
||||
python -m uuid [-h] [-u {uuid1,uuid3,uuid4,uuid5,uuid6,uuid7,uuid8}] [-n NAMESPACE] [-N NAME]
|
||||
|
||||
The following options are accepted:
|
||||
|
||||
|
@ -347,7 +362,7 @@ The following options are accepted:
|
|||
is used.
|
||||
|
||||
.. versionchanged:: next
|
||||
Allow generating UUID versions 6 and 8.
|
||||
Allow generating UUID versions 6, 7 and 8.
|
||||
|
||||
.. option:: -n <namespace>
|
||||
--namespace <namespace>
|
||||
|
|
|
@ -924,8 +924,9 @@ urllib
|
|||
uuid
|
||||
----
|
||||
|
||||
* Add support for UUID versions 6 and 8 via :func:`uuid.uuid6` and
|
||||
:func:`uuid.uuid8` respectively, as specified in :rfc:`9562`.
|
||||
* Add support for UUID versions 6, 7, and 8 via :func:`uuid.uuid6`,
|
||||
:func:`uuid.uuid7`, and :func:`uuid.uuid8` respectively, as specified
|
||||
in :rfc:`9562`.
|
||||
(Contributed by Bénédikt Tran in :gh:`89083`.)
|
||||
|
||||
* :const:`uuid.NIL` and :const:`uuid.MAX` are now available to represent the
|
||||
|
|
|
@ -871,6 +871,206 @@ class BaseTestUUID:
|
|||
equal((u.int >> 80) & 0xffff, 0x232a)
|
||||
equal((u.int >> 96) & 0xffff_ffff, 0x1ec9_414c)
|
||||
|
||||
def test_uuid7(self):
|
||||
equal = self.assertEqual
|
||||
u = self.uuid.uuid7()
|
||||
equal(u.variant, self.uuid.RFC_4122)
|
||||
equal(u.version, 7)
|
||||
|
||||
# 1 Jan 2023 12:34:56.123_456_789
|
||||
timestamp_ns = 1672533296_123_456_789 # ns precision
|
||||
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
|
||||
|
||||
for _ in range(100):
|
||||
counter_hi = random.getrandbits(11)
|
||||
counter_lo = random.getrandbits(30)
|
||||
counter = (counter_hi << 30) | counter_lo
|
||||
|
||||
tail = random.getrandbits(32)
|
||||
# effective number of bits is 32 + 30 + 11 = 73
|
||||
random_bits = counter << 32 | tail
|
||||
|
||||
# set all remaining MSB of fake random bits to 1 to ensure that
|
||||
# the implementation correctly removes them
|
||||
random_bits = (((1 << 7) - 1) << 73) | random_bits
|
||||
random_data = random_bits.to_bytes(10)
|
||||
|
||||
with (
|
||||
mock.patch.multiple(
|
||||
self.uuid,
|
||||
_last_timestamp_v7=None,
|
||||
_last_counter_v7=0,
|
||||
),
|
||||
mock.patch('time.time_ns', return_value=timestamp_ns),
|
||||
mock.patch('os.urandom', return_value=random_data) as urand
|
||||
):
|
||||
u = self.uuid.uuid7()
|
||||
urand.assert_called_once_with(10)
|
||||
equal(u.variant, self.uuid.RFC_4122)
|
||||
equal(u.version, 7)
|
||||
|
||||
equal(self.uuid._last_timestamp_v7, timestamp_ms)
|
||||
equal(self.uuid._last_counter_v7, counter)
|
||||
|
||||
unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
|
||||
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
|
||||
|
||||
equal((u.int >> 75) & 1, 0) # check that the MSB is 0
|
||||
equal((u.int >> 64) & 0xfff, counter_hi)
|
||||
equal((u.int >> 32) & 0x3fff_ffff, counter_lo)
|
||||
equal(u.int & 0xffff_ffff, tail)
|
||||
|
||||
def test_uuid7_uniqueness(self):
|
||||
# Test that UUIDv7-generated values are unique.
|
||||
#
|
||||
# While UUIDv8 has an entropy of 122 bits, those 122 bits may not
|
||||
# necessarily be sampled from a PRNG. On the other hand, UUIDv7
|
||||
# uses os.urandom() as a PRNG which features better randomness.
|
||||
N = 1000
|
||||
uuids = {self.uuid.uuid7() for _ in range(N)}
|
||||
self.assertEqual(len(uuids), N)
|
||||
|
||||
versions = {u.version for u in uuids}
|
||||
self.assertSetEqual(versions, {7})
|
||||
|
||||
def test_uuid7_monotonicity(self):
|
||||
equal = self.assertEqual
|
||||
|
||||
us = [self.uuid.uuid7() for _ in range(10_000)]
|
||||
equal(us, sorted(us))
|
||||
|
||||
with mock.patch.multiple(
|
||||
self.uuid,
|
||||
_last_timestamp_v7=0,
|
||||
_last_counter_v7=0,
|
||||
):
|
||||
# 1 Jan 2023 12:34:56.123_456_789
|
||||
timestamp_ns = 1672533296_123_456_789 # ns precision
|
||||
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
|
||||
|
||||
# counter_{hi,lo} are chosen so that "counter + 1" does not overflow
|
||||
counter_hi = random.getrandbits(11)
|
||||
counter_lo = random.getrandbits(29)
|
||||
counter = (counter_hi << 30) | counter_lo
|
||||
self.assertLess(counter + 1, 0x3ff_ffff_ffff)
|
||||
|
||||
tail = random.getrandbits(32)
|
||||
random_bits = counter << 32 | tail
|
||||
random_data = random_bits.to_bytes(10)
|
||||
|
||||
with (
|
||||
mock.patch('time.time_ns', return_value=timestamp_ns),
|
||||
mock.patch('os.urandom', return_value=random_data) as urand
|
||||
):
|
||||
u1 = self.uuid.uuid7()
|
||||
urand.assert_called_once_with(10)
|
||||
equal(self.uuid._last_timestamp_v7, timestamp_ms)
|
||||
equal(self.uuid._last_counter_v7, counter)
|
||||
equal((u1.int >> 64) & 0xfff, counter_hi)
|
||||
equal((u1.int >> 32) & 0x3fff_ffff, counter_lo)
|
||||
equal(u1.int & 0xffff_ffff, tail)
|
||||
|
||||
# 1 Jan 2023 12:34:56.123_457_032 (same millisecond but not same ns)
|
||||
next_timestamp_ns = 1672533296_123_457_032
|
||||
next_timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
|
||||
equal(timestamp_ms, next_timestamp_ms)
|
||||
|
||||
next_tail_bytes = os.urandom(4)
|
||||
next_fail = int.from_bytes(next_tail_bytes)
|
||||
|
||||
with (
|
||||
mock.patch('time.time_ns', return_value=next_timestamp_ns),
|
||||
mock.patch('os.urandom', return_value=next_tail_bytes) as urand
|
||||
):
|
||||
u2 = self.uuid.uuid7()
|
||||
urand.assert_called_once_with(4)
|
||||
# same milli-second
|
||||
equal(self.uuid._last_timestamp_v7, timestamp_ms)
|
||||
# 42-bit counter advanced by 1
|
||||
equal(self.uuid._last_counter_v7, counter + 1)
|
||||
equal((u2.int >> 64) & 0xfff, counter_hi)
|
||||
equal((u2.int >> 32) & 0x3fff_ffff, counter_lo + 1)
|
||||
equal(u2.int & 0xffff_ffff, next_fail)
|
||||
|
||||
self.assertLess(u1, u2)
|
||||
|
||||
def test_uuid7_timestamp_backwards(self):
|
||||
equal = self.assertEqual
|
||||
# 1 Jan 2023 12:34:56.123_456_789
|
||||
timestamp_ns = 1672533296_123_456_789 # ns precision
|
||||
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
|
||||
fake_last_timestamp_v7 = timestamp_ms + 1
|
||||
|
||||
# counter_{hi,lo} are chosen so that "counter + 1" does not overflow
|
||||
counter_hi = random.getrandbits(11)
|
||||
counter_lo = random.getrandbits(29)
|
||||
counter = (counter_hi << 30) | counter_lo
|
||||
self.assertLess(counter + 1, 0x3ff_ffff_ffff)
|
||||
|
||||
tail_bytes = os.urandom(4)
|
||||
tail = int.from_bytes(tail_bytes)
|
||||
|
||||
with (
|
||||
mock.patch.multiple(
|
||||
self.uuid,
|
||||
_last_timestamp_v7=fake_last_timestamp_v7,
|
||||
_last_counter_v7=counter,
|
||||
),
|
||||
mock.patch('time.time_ns', return_value=timestamp_ns),
|
||||
mock.patch('os.urandom', return_value=tail_bytes) as urand
|
||||
):
|
||||
u = self.uuid.uuid7()
|
||||
urand.assert_called_once_with(4)
|
||||
equal(u.variant, self.uuid.RFC_4122)
|
||||
equal(u.version, 7)
|
||||
equal(self.uuid._last_timestamp_v7, fake_last_timestamp_v7 + 1)
|
||||
unix_ts_ms = (fake_last_timestamp_v7 + 1) & 0xffff_ffff_ffff
|
||||
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
|
||||
# 42-bit counter advanced by 1
|
||||
equal(self.uuid._last_counter_v7, counter + 1)
|
||||
equal((u.int >> 64) & 0xfff, counter_hi)
|
||||
# 42-bit counter advanced by 1 (counter_hi is untouched)
|
||||
equal((u.int >> 32) & 0x3fff_ffff, counter_lo + 1)
|
||||
equal(u.int & 0xffff_ffff, tail)
|
||||
|
||||
def test_uuid7_overflow_counter(self):
|
||||
equal = self.assertEqual
|
||||
# 1 Jan 2023 12:34:56.123_456_789
|
||||
timestamp_ns = 1672533296_123_456_789 # ns precision
|
||||
timestamp_ms, _ = divmod(timestamp_ns, 1_000_000)
|
||||
|
||||
new_counter_hi = random.getrandbits(11)
|
||||
new_counter_lo = random.getrandbits(30)
|
||||
new_counter = (new_counter_hi << 30) | new_counter_lo
|
||||
|
||||
tail = random.getrandbits(32)
|
||||
random_bits = (new_counter << 32) | tail
|
||||
random_data = random_bits.to_bytes(10)
|
||||
|
||||
with (
|
||||
mock.patch.multiple(
|
||||
self.uuid,
|
||||
_last_timestamp_v7=timestamp_ms,
|
||||
# same timestamp, but force an overflow on the counter
|
||||
_last_counter_v7=0x3ff_ffff_ffff,
|
||||
),
|
||||
mock.patch('time.time_ns', return_value=timestamp_ns),
|
||||
mock.patch('os.urandom', return_value=random_data) as urand
|
||||
):
|
||||
u = self.uuid.uuid7()
|
||||
urand.assert_called_with(10)
|
||||
equal(u.variant, self.uuid.RFC_4122)
|
||||
equal(u.version, 7)
|
||||
# timestamp advanced due to overflow
|
||||
equal(self.uuid._last_timestamp_v7, timestamp_ms + 1)
|
||||
unix_ts_ms = (timestamp_ms + 1) & 0xffff_ffff_ffff
|
||||
equal((u.int >> 80) & 0xffff_ffff_ffff, unix_ts_ms)
|
||||
# counter overflowed, so we picked a new one
|
||||
equal(self.uuid._last_counter_v7, new_counter)
|
||||
equal((u.int >> 64) & 0xfff, new_counter_hi)
|
||||
equal((u.int >> 32) & 0x3fff_ffff, new_counter_lo)
|
||||
equal(u.int & 0xffff_ffff, tail)
|
||||
|
||||
def test_uuid8(self):
|
||||
equal = self.assertEqual
|
||||
u = self.uuid.uuid8()
|
||||
|
|
93
Lib/uuid.py
93
Lib/uuid.py
|
@ -1,8 +1,12 @@
|
|||
r"""UUID objects (universally unique identifiers) according to RFC 4122/9562.
|
||||
|
||||
This module provides immutable UUID objects (class UUID) and the functions
|
||||
uuid1(), uuid3(), uuid4(), uuid5(), uuid6(), and uuid8() for generating
|
||||
version 1, 3, 4, 5, 6, and 8 UUIDs as specified in RFC 4122/9562.
|
||||
This module provides immutable UUID objects (class UUID) and functions for
|
||||
generating UUIDs corresponding to a specific UUID version as specified in
|
||||
RFC 4122/9562, e.g., uuid1() for UUID version 1, uuid3() for UUID version 3,
|
||||
and so on.
|
||||
|
||||
Note that UUID version 2 is deliberately omitted as it is outside the scope
|
||||
of the RFC.
|
||||
|
||||
If all you want is a unique ID, you should probably call uuid1() or uuid4().
|
||||
Note that uuid1() may compromise privacy since it creates a UUID containing
|
||||
|
@ -54,6 +58,7 @@ Typical usage:
|
|||
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
|
||||
from enum import Enum, _simple_enum
|
||||
|
||||
|
@ -102,6 +107,7 @@ _RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48))
|
|||
_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
|
||||
_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
|
||||
_RFC_4122_VERSION_6_FLAGS = ((6 << 76) | (0x8000 << 48))
|
||||
_RFC_4122_VERSION_7_FLAGS = ((7 << 76) | (0x8000 << 48))
|
||||
_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
|
||||
|
||||
|
||||
|
@ -720,7 +726,6 @@ def uuid1(node=None, clock_seq=None):
|
|||
return UUID(bytes=uuid_time, is_safe=is_safe)
|
||||
|
||||
global _last_timestamp
|
||||
import time
|
||||
nanoseconds = time.time_ns()
|
||||
# 0x01b21dd213814000 is the number of 100-ns intervals between the
|
||||
# UUID epoch 1582-10-15 00:00:00 and the Unix epoch 1970-01-01 00:00:00.
|
||||
|
@ -770,6 +775,7 @@ def uuid5(namespace, name):
|
|||
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
|
||||
return UUID._from_int(int_uuid_5)
|
||||
|
||||
|
||||
_last_timestamp_v6 = None
|
||||
|
||||
def uuid6(node=None, clock_seq=None):
|
||||
|
@ -808,6 +814,83 @@ def uuid6(node=None, clock_seq=None):
|
|||
int_uuid_6 |= _RFC_4122_VERSION_6_FLAGS
|
||||
return UUID._from_int(int_uuid_6)
|
||||
|
||||
|
||||
_last_timestamp_v7 = None
|
||||
_last_counter_v7 = 0 # 42-bit counter
|
||||
|
||||
def _uuid7_get_counter_and_tail():
|
||||
rand = int.from_bytes(os.urandom(10))
|
||||
# 42-bit counter with MSB set to 0
|
||||
counter = (rand >> 32) & 0x1ff_ffff_ffff
|
||||
# 32-bit random data
|
||||
tail = rand & 0xffff_ffff
|
||||
return counter, tail
|
||||
|
||||
|
||||
def uuid7():
|
||||
"""Generate a UUID from a Unix timestamp in milliseconds and random bits.
|
||||
|
||||
UUIDv7 objects feature monotonicity within a millisecond.
|
||||
"""
|
||||
# --- 48 --- -- 4 -- --- 12 --- -- 2 -- --- 30 --- - 32 -
|
||||
# unix_ts_ms | version | counter_hi | variant | counter_lo | random
|
||||
#
|
||||
# 'counter = counter_hi | counter_lo' is a 42-bit counter constructed
|
||||
# with Method 1 of RFC 9562, §6.2, and its MSB is set to 0.
|
||||
#
|
||||
# 'random' is a 32-bit random value regenerated for every new UUID.
|
||||
#
|
||||
# If multiple UUIDs are generated within the same millisecond, the LSB
|
||||
# of 'counter' is incremented by 1. When overflowing, the timestamp is
|
||||
# advanced and the counter is reset to a random 42-bit integer with MSB
|
||||
# set to 0.
|
||||
|
||||
global _last_timestamp_v7
|
||||
global _last_counter_v7
|
||||
|
||||
nanoseconds = time.time_ns()
|
||||
timestamp_ms = nanoseconds // 1_000_000
|
||||
|
||||
if _last_timestamp_v7 is None or timestamp_ms > _last_timestamp_v7:
|
||||
counter, tail = _uuid7_get_counter_and_tail()
|
||||
else:
|
||||
if timestamp_ms < _last_timestamp_v7:
|
||||
timestamp_ms = _last_timestamp_v7 + 1
|
||||
# advance the 42-bit counter
|
||||
counter = _last_counter_v7 + 1
|
||||
if counter > 0x3ff_ffff_ffff:
|
||||
# advance the 48-bit timestamp
|
||||
timestamp_ms += 1
|
||||
counter, tail = _uuid7_get_counter_and_tail()
|
||||
else:
|
||||
# 32-bit random data
|
||||
tail = int.from_bytes(os.urandom(4))
|
||||
|
||||
unix_ts_ms = timestamp_ms & 0xffff_ffff_ffff
|
||||
counter_msbs = counter >> 30
|
||||
# keep 12 counter's MSBs and clear variant bits
|
||||
counter_hi = counter_msbs & 0x0fff
|
||||
# keep 30 counter's LSBs and clear version bits
|
||||
counter_lo = counter & 0x3fff_ffff
|
||||
# ensure that the tail is always a 32-bit integer (by construction,
|
||||
# it is already the case, but future interfaces may allow the user
|
||||
# to specify the random tail)
|
||||
tail &= 0xffff_ffff
|
||||
|
||||
int_uuid_7 = unix_ts_ms << 80
|
||||
int_uuid_7 |= counter_hi << 64
|
||||
int_uuid_7 |= counter_lo << 32
|
||||
int_uuid_7 |= tail
|
||||
# by construction, the variant and version bits are already cleared
|
||||
int_uuid_7 |= _RFC_4122_VERSION_7_FLAGS
|
||||
res = UUID._from_int(int_uuid_7)
|
||||
|
||||
# defer global update until all computations are done
|
||||
_last_timestamp_v7 = timestamp_ms
|
||||
_last_counter_v7 = counter
|
||||
return res
|
||||
|
||||
|
||||
def uuid8(a=None, b=None, c=None):
|
||||
"""Generate a UUID from three custom blocks.
|
||||
|
||||
|
@ -833,6 +916,7 @@ def uuid8(a=None, b=None, c=None):
|
|||
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
|
||||
return UUID._from_int(int_uuid_8)
|
||||
|
||||
|
||||
def main():
|
||||
"""Run the uuid command line interface."""
|
||||
uuid_funcs = {
|
||||
|
@ -841,6 +925,7 @@ def main():
|
|||
"uuid4": uuid4,
|
||||
"uuid5": uuid5,
|
||||
"uuid6": uuid6,
|
||||
"uuid7": uuid7,
|
||||
"uuid8": uuid8,
|
||||
}
|
||||
uuid_namespace_funcs = ("uuid3", "uuid5")
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Add :func:`uuid.uuid7` for generating UUIDv7 objects as specified in
|
||||
:rfc:`9562`. Patch by Bénédikt Tran.
|
Loading…
Add table
Add a link
Reference in a new issue