mirror of
https://github.com/python/cpython.git
synced 2025-09-27 10:50:04 +00:00
gh-128150: Improve performances of uuid.uuid*
constructor functions. (#128151)
We introduce a private constructor `UUID._from_int()` for RFC 4122/9562 UUIDs, which takes the integral UUID value as input. The latter must have correctly set its variant and version bits. We also make `UUID.__init__()` slightly more efficient.
This commit is contained in:
parent
39fc7ef4fe
commit
6ff8f82f92
3 changed files with 76 additions and 27 deletions
|
@ -717,6 +717,22 @@ io
|
||||||
file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in
|
file's bytes in full. (Contributed by Cody Maloney and Victor Stinner in
|
||||||
:gh:`120754` and :gh:`90102`.)
|
:gh:`120754` and :gh:`90102`.)
|
||||||
|
|
||||||
|
|
||||||
|
uuid
|
||||||
|
----
|
||||||
|
|
||||||
|
* Improve generation of :class:`~uuid.UUID` objects via their dedicated
|
||||||
|
functions:
|
||||||
|
|
||||||
|
* :func:`~uuid.uuid3` and :func:`~uuid.uuid5` are both roughly 40% faster
|
||||||
|
for 16-byte names and 20% faster for 1024-byte names. Performance for
|
||||||
|
longer names remains unchanged.
|
||||||
|
* :func:`~uuid.uuid4` and :func:`~uuid.uuid8` are 30% and 40% faster
|
||||||
|
respectively.
|
||||||
|
|
||||||
|
(Contributed by Bénédikt Tran in :gh:`128150`.)
|
||||||
|
|
||||||
|
|
||||||
Deprecated
|
Deprecated
|
||||||
==========
|
==========
|
||||||
|
|
||||||
|
|
85
Lib/uuid.py
85
Lib/uuid.py
|
@ -85,6 +85,17 @@ class SafeUUID:
|
||||||
unknown = None
|
unknown = None
|
||||||
|
|
||||||
|
|
||||||
|
_UINT_128_MAX = (1 << 128) - 1
|
||||||
|
# 128-bit mask to clear the variant and version bits of a UUID integral value
|
||||||
|
_RFC_4122_CLEARFLAGS_MASK = ~((0xf000 << 64) | (0xc000 << 48))
|
||||||
|
# RFC 4122 variant bits and version bits to activate on a UUID integral value.
|
||||||
|
_RFC_4122_VERSION_1_FLAGS = ((1 << 76) | (0x8000 << 48))
|
||||||
|
_RFC_4122_VERSION_3_FLAGS = ((3 << 76) | (0x8000 << 48))
|
||||||
|
_RFC_4122_VERSION_4_FLAGS = ((4 << 76) | (0x8000 << 48))
|
||||||
|
_RFC_4122_VERSION_5_FLAGS = ((5 << 76) | (0x8000 << 48))
|
||||||
|
_RFC_4122_VERSION_8_FLAGS = ((8 << 76) | (0x8000 << 48))
|
||||||
|
|
||||||
|
|
||||||
class UUID:
|
class UUID:
|
||||||
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
|
"""Instances of the UUID class represent UUIDs as specified in RFC 4122.
|
||||||
UUID objects are immutable, hashable, and usable as dictionary keys.
|
UUID objects are immutable, hashable, and usable as dictionary keys.
|
||||||
|
@ -174,57 +185,69 @@ class UUID:
|
||||||
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
|
if [hex, bytes, bytes_le, fields, int].count(None) != 4:
|
||||||
raise TypeError('one of the hex, bytes, bytes_le, fields, '
|
raise TypeError('one of the hex, bytes, bytes_le, fields, '
|
||||||
'or int arguments must be given')
|
'or int arguments must be given')
|
||||||
if hex is not None:
|
if int is not None:
|
||||||
|
pass
|
||||||
|
elif hex is not None:
|
||||||
hex = hex.replace('urn:', '').replace('uuid:', '')
|
hex = hex.replace('urn:', '').replace('uuid:', '')
|
||||||
hex = hex.strip('{}').replace('-', '')
|
hex = hex.strip('{}').replace('-', '')
|
||||||
if len(hex) != 32:
|
if len(hex) != 32:
|
||||||
raise ValueError('badly formed hexadecimal UUID string')
|
raise ValueError('badly formed hexadecimal UUID string')
|
||||||
int = int_(hex, 16)
|
int = int_(hex, 16)
|
||||||
if bytes_le is not None:
|
elif bytes_le is not None:
|
||||||
if len(bytes_le) != 16:
|
if len(bytes_le) != 16:
|
||||||
raise ValueError('bytes_le is not a 16-char string')
|
raise ValueError('bytes_le is not a 16-char string')
|
||||||
|
assert isinstance(bytes_le, bytes_), repr(bytes_le)
|
||||||
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
|
bytes = (bytes_le[4-1::-1] + bytes_le[6-1:4-1:-1] +
|
||||||
bytes_le[8-1:6-1:-1] + bytes_le[8:])
|
bytes_le[8-1:6-1:-1] + bytes_le[8:])
|
||||||
if bytes is not None:
|
int = int_.from_bytes(bytes) # big endian
|
||||||
|
elif bytes is not None:
|
||||||
if len(bytes) != 16:
|
if len(bytes) != 16:
|
||||||
raise ValueError('bytes is not a 16-char string')
|
raise ValueError('bytes is not a 16-char string')
|
||||||
assert isinstance(bytes, bytes_), repr(bytes)
|
assert isinstance(bytes, bytes_), repr(bytes)
|
||||||
int = int_.from_bytes(bytes) # big endian
|
int = int_.from_bytes(bytes) # big endian
|
||||||
if fields is not None:
|
elif fields is not None:
|
||||||
if len(fields) != 6:
|
if len(fields) != 6:
|
||||||
raise ValueError('fields is not a 6-tuple')
|
raise ValueError('fields is not a 6-tuple')
|
||||||
(time_low, time_mid, time_hi_version,
|
(time_low, time_mid, time_hi_version,
|
||||||
clock_seq_hi_variant, clock_seq_low, node) = fields
|
clock_seq_hi_variant, clock_seq_low, node) = fields
|
||||||
if not 0 <= time_low < 1<<32:
|
if not 0 <= time_low < (1 << 32):
|
||||||
raise ValueError('field 1 out of range (need a 32-bit value)')
|
raise ValueError('field 1 out of range (need a 32-bit value)')
|
||||||
if not 0 <= time_mid < 1<<16:
|
if not 0 <= time_mid < (1 << 16):
|
||||||
raise ValueError('field 2 out of range (need a 16-bit value)')
|
raise ValueError('field 2 out of range (need a 16-bit value)')
|
||||||
if not 0 <= time_hi_version < 1<<16:
|
if not 0 <= time_hi_version < (1 << 16):
|
||||||
raise ValueError('field 3 out of range (need a 16-bit value)')
|
raise ValueError('field 3 out of range (need a 16-bit value)')
|
||||||
if not 0 <= clock_seq_hi_variant < 1<<8:
|
if not 0 <= clock_seq_hi_variant < (1 << 8):
|
||||||
raise ValueError('field 4 out of range (need an 8-bit value)')
|
raise ValueError('field 4 out of range (need an 8-bit value)')
|
||||||
if not 0 <= clock_seq_low < 1<<8:
|
if not 0 <= clock_seq_low < (1 << 8):
|
||||||
raise ValueError('field 5 out of range (need an 8-bit value)')
|
raise ValueError('field 5 out of range (need an 8-bit value)')
|
||||||
if not 0 <= node < 1<<48:
|
if not 0 <= node < (1 << 48):
|
||||||
raise ValueError('field 6 out of range (need a 48-bit value)')
|
raise ValueError('field 6 out of range (need a 48-bit value)')
|
||||||
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
|
clock_seq = (clock_seq_hi_variant << 8) | clock_seq_low
|
||||||
int = ((time_low << 96) | (time_mid << 80) |
|
int = ((time_low << 96) | (time_mid << 80) |
|
||||||
(time_hi_version << 64) | (clock_seq << 48) | node)
|
(time_hi_version << 64) | (clock_seq << 48) | node)
|
||||||
if int is not None:
|
if not 0 <= int <= _UINT_128_MAX:
|
||||||
if not 0 <= int < 1<<128:
|
raise ValueError('int is out of range (need a 128-bit value)')
|
||||||
raise ValueError('int is out of range (need a 128-bit value)')
|
|
||||||
if version is not None:
|
if version is not None:
|
||||||
if not 1 <= version <= 8:
|
if not 1 <= version <= 8:
|
||||||
raise ValueError('illegal version number')
|
raise ValueError('illegal version number')
|
||||||
|
# clear the variant and the version number bits
|
||||||
|
int &= _RFC_4122_CLEARFLAGS_MASK
|
||||||
# Set the variant to RFC 4122/9562.
|
# Set the variant to RFC 4122/9562.
|
||||||
int &= ~(0xc000 << 48)
|
int |= 0x8000_0000_0000_0000 # (0x8000 << 48)
|
||||||
int |= 0x8000 << 48
|
|
||||||
# Set the version number.
|
# Set the version number.
|
||||||
int &= ~(0xf000 << 64)
|
|
||||||
int |= version << 76
|
int |= version << 76
|
||||||
object.__setattr__(self, 'int', int)
|
object.__setattr__(self, 'int', int)
|
||||||
object.__setattr__(self, 'is_safe', is_safe)
|
object.__setattr__(self, 'is_safe', is_safe)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def _from_int(cls, value):
|
||||||
|
"""Create a UUID from an integer *value*. Internal use only."""
|
||||||
|
assert 0 <= value <= _UINT_128_MAX, repr(value)
|
||||||
|
self = object.__new__(cls)
|
||||||
|
object.__setattr__(self, 'int', value)
|
||||||
|
object.__setattr__(self, 'is_safe', SafeUUID.unknown)
|
||||||
|
return self
|
||||||
|
|
||||||
def __getstate__(self):
|
def __getstate__(self):
|
||||||
d = {'int': self.int}
|
d = {'int': self.int}
|
||||||
if self.is_safe != SafeUUID.unknown:
|
if self.is_safe != SafeUUID.unknown:
|
||||||
|
@ -700,24 +723,30 @@ def uuid3(namespace, name):
|
||||||
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
|
"""Generate a UUID from the MD5 hash of a namespace UUID and a name."""
|
||||||
if isinstance(name, str):
|
if isinstance(name, str):
|
||||||
name = bytes(name, "utf-8")
|
name = bytes(name, "utf-8")
|
||||||
from hashlib import md5
|
import hashlib
|
||||||
digest = md5(
|
h = hashlib.md5(namespace.bytes + name, usedforsecurity=False)
|
||||||
namespace.bytes + name,
|
int_uuid_3 = int.from_bytes(h.digest())
|
||||||
usedforsecurity=False
|
int_uuid_3 &= _RFC_4122_CLEARFLAGS_MASK
|
||||||
).digest()
|
int_uuid_3 |= _RFC_4122_VERSION_3_FLAGS
|
||||||
return UUID(bytes=digest[:16], version=3)
|
return UUID._from_int(int_uuid_3)
|
||||||
|
|
||||||
def uuid4():
|
def uuid4():
|
||||||
"""Generate a random UUID."""
|
"""Generate a random UUID."""
|
||||||
return UUID(bytes=os.urandom(16), version=4)
|
int_uuid_4 = int.from_bytes(os.urandom(16))
|
||||||
|
int_uuid_4 &= _RFC_4122_CLEARFLAGS_MASK
|
||||||
|
int_uuid_4 |= _RFC_4122_VERSION_4_FLAGS
|
||||||
|
return UUID._from_int(int_uuid_4)
|
||||||
|
|
||||||
def uuid5(namespace, name):
|
def uuid5(namespace, name):
|
||||||
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
|
"""Generate a UUID from the SHA-1 hash of a namespace UUID and a name."""
|
||||||
if isinstance(name, str):
|
if isinstance(name, str):
|
||||||
name = bytes(name, "utf-8")
|
name = bytes(name, "utf-8")
|
||||||
from hashlib import sha1
|
import hashlib
|
||||||
hash = sha1(namespace.bytes + name).digest()
|
h = hashlib.sha1(namespace.bytes + name, usedforsecurity=False)
|
||||||
return UUID(bytes=hash[:16], version=5)
|
int_uuid_5 = int.from_bytes(h.digest()[:16])
|
||||||
|
int_uuid_5 &= _RFC_4122_CLEARFLAGS_MASK
|
||||||
|
int_uuid_5 |= _RFC_4122_VERSION_5_FLAGS
|
||||||
|
return UUID._from_int(int_uuid_5)
|
||||||
|
|
||||||
def uuid8(a=None, b=None, c=None):
|
def uuid8(a=None, b=None, c=None):
|
||||||
"""Generate a UUID from three custom blocks.
|
"""Generate a UUID from three custom blocks.
|
||||||
|
@ -740,7 +769,9 @@ def uuid8(a=None, b=None, c=None):
|
||||||
int_uuid_8 = (a & 0xffff_ffff_ffff) << 80
|
int_uuid_8 = (a & 0xffff_ffff_ffff) << 80
|
||||||
int_uuid_8 |= (b & 0xfff) << 64
|
int_uuid_8 |= (b & 0xfff) << 64
|
||||||
int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff
|
int_uuid_8 |= c & 0x3fff_ffff_ffff_ffff
|
||||||
return UUID(int=int_uuid_8, version=8)
|
# by construction, the variant and version bits are already cleared
|
||||||
|
int_uuid_8 |= _RFC_4122_VERSION_8_FLAGS
|
||||||
|
return UUID._from_int(int_uuid_8)
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
"""Run the uuid command line interface."""
|
"""Run the uuid command line interface."""
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Improve generation of :class:`~uuid.UUID` objects version 3, 4, 5, and 8
|
||||||
|
via their dedicated functions by 30%. Patch by Bénédikt Tran.
|
Loading…
Add table
Add a link
Reference in a new issue