mirror of
https://github.com/python/cpython.git
synced 2025-07-31 23:23:11 +00:00
Issue #4751: For hashlib algorithms provided by OpenSSL, the Python
GIL is now released during computation on data lengths >= 2048 bytes.
This commit is contained in:
parent
342c8db859
commit
d02eedacab
4 changed files with 162 additions and 53 deletions
|
@ -95,6 +95,12 @@ A hash object has the following methods:
|
||||||
a single call with the concatenation of all the arguments: ``m.update(a);
|
a single call with the concatenation of all the arguments: ``m.update(a);
|
||||||
m.update(b)`` is equivalent to ``m.update(a+b)``.
|
m.update(b)`` is equivalent to ``m.update(a+b)``.
|
||||||
|
|
||||||
|
.. versionchanged:: 2.7
|
||||||
|
|
||||||
|
The Python GIL is released to allow other threads to run while
|
||||||
|
hash updates on data larger than 2048 bytes is taking place when
|
||||||
|
using hash algorithms supplied by OpenSSL.
|
||||||
|
|
||||||
|
|
||||||
.. method:: hash.digest()
|
.. method:: hash.digest()
|
||||||
|
|
||||||
|
|
|
@ -2,11 +2,16 @@
|
||||||
#
|
#
|
||||||
# $Id$
|
# $Id$
|
||||||
#
|
#
|
||||||
# Copyright (C) 2005 Gregory P. Smith (greg@krypto.org)
|
# Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
|
||||||
# Licensed to PSF under a Contributor Agreement.
|
# Licensed to PSF under a Contributor Agreement.
|
||||||
#
|
#
|
||||||
|
|
||||||
import hashlib
|
import hashlib
|
||||||
|
import StringIO
|
||||||
|
try:
|
||||||
|
import threading
|
||||||
|
except ImportError:
|
||||||
|
threading = None
|
||||||
import unittest
|
import unittest
|
||||||
from test import test_support
|
from test import test_support
|
||||||
from test.test_support import _4G, precisionbigmemtest
|
from test.test_support import _4G, precisionbigmemtest
|
||||||
|
@ -61,10 +66,10 @@ class HashLibTestCase(unittest.TestCase):
|
||||||
def check(self, name, data, digest):
|
def check(self, name, data, digest):
|
||||||
# test the direct constructors
|
# test the direct constructors
|
||||||
computed = getattr(hashlib, name)(data).hexdigest()
|
computed = getattr(hashlib, name)(data).hexdigest()
|
||||||
self.assert_(computed == digest)
|
self.assertEqual(computed, digest)
|
||||||
# test the general new() interface
|
# test the general new() interface
|
||||||
computed = hashlib.new(name, data).hexdigest()
|
computed = hashlib.new(name, data).hexdigest()
|
||||||
self.assert_(computed == digest)
|
self.assertEqual(computed, digest)
|
||||||
|
|
||||||
def check_no_unicode(self, algorithm_name):
|
def check_no_unicode(self, algorithm_name):
|
||||||
# Unicode objects are not allowed as input.
|
# Unicode objects are not allowed as input.
|
||||||
|
@ -211,6 +216,44 @@ class HashLibTestCase(unittest.TestCase):
|
||||||
"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
|
"e718483d0ce769644e2e42c7bc15b4638e1f98b13b2044285632a803afa973eb"+
|
||||||
"de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
|
"de0ff244877ea60a4cb0432ce577c31beb009c5c2c49aa2e4eadb217ad8cc09b")
|
||||||
|
|
||||||
|
def test_threaded_hashing(self):
|
||||||
|
if not threading:
|
||||||
|
raise unittest.SkipTest('No threading module.')
|
||||||
|
|
||||||
|
# Updating the same hash object from several threads at once
|
||||||
|
# using data chunk sizes containing the same byte sequences.
|
||||||
|
#
|
||||||
|
# If the internal locks are working to prevent multiple
|
||||||
|
# updates on the same object from running at once, the resulting
|
||||||
|
# hash will be the same as doing it single threaded upfront.
|
||||||
|
hasher = hashlib.sha1()
|
||||||
|
num_threads = 5
|
||||||
|
smallest_data = 'swineflu'
|
||||||
|
data = smallest_data*200000
|
||||||
|
expected_hash = hashlib.sha1(data*num_threads).hexdigest()
|
||||||
|
|
||||||
|
def hash_in_chunks(chunk_size, event):
|
||||||
|
index = 0
|
||||||
|
while index < len(data):
|
||||||
|
hasher.update(data[index:index+chunk_size])
|
||||||
|
index += chunk_size
|
||||||
|
event.set()
|
||||||
|
|
||||||
|
events = []
|
||||||
|
for threadnum in xrange(num_threads):
|
||||||
|
chunk_size = len(data) // (10**threadnum)
|
||||||
|
assert chunk_size > 0
|
||||||
|
assert chunk_size % len(smallest_data) == 0
|
||||||
|
event = threading.Event()
|
||||||
|
events.append(event)
|
||||||
|
threading.Thread(target=hash_in_chunks,
|
||||||
|
args=(chunk_size, event)).start()
|
||||||
|
|
||||||
|
for event in events:
|
||||||
|
event.wait()
|
||||||
|
|
||||||
|
self.assertEqual(expected_hash, hasher.hexdigest())
|
||||||
|
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test_support.run_unittest(HashLibTestCase)
|
test_support.run_unittest(HashLibTestCase)
|
||||||
|
|
|
@ -905,6 +905,9 @@ C-API
|
||||||
Extension Modules
|
Extension Modules
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #4751: For hashlib algorithms provided by OpenSSL, the Python
|
||||||
|
GIL is now released during computation on data lengths >= 2048 bytes.
|
||||||
|
|
||||||
- Issue #3745: Fix hashlib to always reject unicode and non buffer-api
|
- Issue #3745: Fix hashlib to always reject unicode and non buffer-api
|
||||||
supporting objects as input no matter how it was compiled (built in
|
supporting objects as input no matter how it was compiled (built in
|
||||||
implementations or external openssl library).
|
implementations or external openssl library).
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
/* Module that wraps all OpenSSL hash algorithms */
|
/* Module that wraps all OpenSSL hash algorithms */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (C) 2005-2007 Gregory P. Smith (greg@krypto.org)
|
* Copyright (C) 2005-2009 Gregory P. Smith (greg@krypto.org)
|
||||||
* Licensed to PSF under a Contributor Agreement.
|
* Licensed to PSF under a Contributor Agreement.
|
||||||
*
|
*
|
||||||
* Derived from a skeleton of shamodule.c containing work performed by:
|
* Derived from a skeleton of shamodule.c containing work performed by:
|
||||||
|
@ -17,25 +17,49 @@
|
||||||
#include "structmember.h"
|
#include "structmember.h"
|
||||||
#include "hashlib.h"
|
#include "hashlib.h"
|
||||||
|
|
||||||
|
#ifdef WITH_THREAD
|
||||||
|
#include "pythread.h"
|
||||||
|
#define ENTER_HASHLIB(obj) \
|
||||||
|
if ((obj)->lock) \
|
||||||
|
{ \
|
||||||
|
if (!PyThread_acquire_lock((obj)->lock, 0)) \
|
||||||
|
{ \
|
||||||
|
Py_BEGIN_ALLOW_THREADS \
|
||||||
|
PyThread_acquire_lock((obj)->lock, 1); \
|
||||||
|
Py_END_ALLOW_THREADS \
|
||||||
|
} \
|
||||||
|
}
|
||||||
|
#define LEAVE_HASHLIB(obj) \
|
||||||
|
if ((obj)->lock) \
|
||||||
|
{ \
|
||||||
|
PyThread_release_lock((obj)->lock); \
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
#define ENTER_HASHLIB(obj)
|
||||||
|
#define LEAVE_HASHLIB(obj)
|
||||||
|
#endif
|
||||||
|
|
||||||
/* EVP is the preferred interface to hashing in OpenSSL */
|
/* EVP is the preferred interface to hashing in OpenSSL */
|
||||||
#include <openssl/evp.h>
|
#include <openssl/evp.h>
|
||||||
|
|
||||||
#define MUNCH_SIZE INT_MAX
|
#define MUNCH_SIZE INT_MAX
|
||||||
|
|
||||||
|
/* TODO(gps): We should probably make this a module or EVPobject attribute
|
||||||
|
* to allow the user to optimize based on the platform they're using. */
|
||||||
|
#define HASHLIB_GIL_MINSIZE 2048
|
||||||
|
|
||||||
#ifndef HASH_OBJ_CONSTRUCTOR
|
#ifndef HASH_OBJ_CONSTRUCTOR
|
||||||
#define HASH_OBJ_CONSTRUCTOR 0
|
#define HASH_OBJ_CONSTRUCTOR 0
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
||||||
typedef struct {
|
typedef struct {
|
||||||
PyObject_HEAD
|
PyObject_HEAD
|
||||||
PyObject *name; /* name of this hash algorithm */
|
PyObject *name; /* name of this hash algorithm */
|
||||||
EVP_MD_CTX ctx; /* OpenSSL message digest context */
|
EVP_MD_CTX ctx; /* OpenSSL message digest context */
|
||||||
/*
|
#ifdef WITH_THREAD
|
||||||
* TODO investigate performance impact of including a lock for this object
|
PyThread_type_lock lock; /* OpenSSL context lock */
|
||||||
* here and releasing the Python GIL while hash updates are in progress.
|
#endif
|
||||||
* (perhaps only release GIL if input length will take long to process?)
|
|
||||||
*/
|
|
||||||
} EVPobject;
|
} EVPobject;
|
||||||
|
|
||||||
|
|
||||||
|
@ -64,26 +88,57 @@ newEVPobject(PyObject *name)
|
||||||
if (retval != NULL) {
|
if (retval != NULL) {
|
||||||
Py_INCREF(name);
|
Py_INCREF(name);
|
||||||
retval->name = name;
|
retval->name = name;
|
||||||
|
#ifdef WITH_THREAD
|
||||||
|
retval->lock = NULL;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
return retval;
|
return retval;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
EVP_hash(EVPobject *self, const void *vp, Py_ssize_t len)
|
||||||
|
{
|
||||||
|
unsigned int process;
|
||||||
|
const unsigned char *cp = (const unsigned char *)vp;
|
||||||
|
while (0 < len)
|
||||||
|
{
|
||||||
|
if (len > (Py_ssize_t)MUNCH_SIZE)
|
||||||
|
process = MUNCH_SIZE;
|
||||||
|
else
|
||||||
|
process = Py_SAFE_DOWNCAST(len, Py_ssize_t, unsigned int);
|
||||||
|
EVP_DigestUpdate(&self->ctx, (const void*)cp, process);
|
||||||
|
len -= process;
|
||||||
|
cp += process;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* Internal methods for a hash object */
|
/* Internal methods for a hash object */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
EVP_dealloc(PyObject *ptr)
|
EVP_dealloc(EVPobject *self)
|
||||||
{
|
{
|
||||||
EVP_MD_CTX_cleanup(&((EVPobject *)ptr)->ctx);
|
#ifdef WITH_THREAD
|
||||||
Py_XDECREF(((EVPobject *)ptr)->name);
|
if (self->lock != NULL)
|
||||||
PyObject_Del(ptr);
|
PyThread_free_lock(self->lock);
|
||||||
|
#endif
|
||||||
|
EVP_MD_CTX_cleanup(&self->ctx);
|
||||||
|
Py_XDECREF(self->name);
|
||||||
|
PyObject_Del(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void locked_EVP_MD_CTX_copy(EVP_MD_CTX *new_ctx_p, EVPobject *self)
|
||||||
|
{
|
||||||
|
ENTER_HASHLIB(self);
|
||||||
|
EVP_MD_CTX_copy(new_ctx_p, &self->ctx);
|
||||||
|
LEAVE_HASHLIB(self);
|
||||||
|
}
|
||||||
|
|
||||||
/* External methods for a hash object */
|
/* External methods for a hash object */
|
||||||
|
|
||||||
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
|
PyDoc_STRVAR(EVP_copy__doc__, "Return a copy of the hash object.");
|
||||||
|
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
EVP_copy(EVPobject *self, PyObject *unused)
|
EVP_copy(EVPobject *self, PyObject *unused)
|
||||||
{
|
{
|
||||||
|
@ -92,7 +147,7 @@ EVP_copy(EVPobject *self, PyObject *unused)
|
||||||
if ( (newobj = newEVPobject(self->name))==NULL)
|
if ( (newobj = newEVPobject(self->name))==NULL)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
EVP_MD_CTX_copy(&newobj->ctx, &self->ctx);
|
locked_EVP_MD_CTX_copy(&newobj->ctx, self);
|
||||||
return (PyObject *)newobj;
|
return (PyObject *)newobj;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -107,7 +162,7 @@ EVP_digest(EVPobject *self, PyObject *unused)
|
||||||
PyObject *retval;
|
PyObject *retval;
|
||||||
unsigned int digest_size;
|
unsigned int digest_size;
|
||||||
|
|
||||||
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
|
locked_EVP_MD_CTX_copy(&temp_ctx, self);
|
||||||
digest_size = EVP_MD_CTX_size(&temp_ctx);
|
digest_size = EVP_MD_CTX_size(&temp_ctx);
|
||||||
EVP_DigestFinal(&temp_ctx, digest, NULL);
|
EVP_DigestFinal(&temp_ctx, digest, NULL);
|
||||||
|
|
||||||
|
@ -129,7 +184,7 @@ EVP_hexdigest(EVPobject *self, PyObject *unused)
|
||||||
unsigned int i, j, digest_size;
|
unsigned int i, j, digest_size;
|
||||||
|
|
||||||
/* Get the raw (binary) digest value */
|
/* Get the raw (binary) digest value */
|
||||||
EVP_MD_CTX_copy(&temp_ctx, &self->ctx);
|
locked_EVP_MD_CTX_copy(&temp_ctx, self);
|
||||||
digest_size = EVP_MD_CTX_size(&temp_ctx);
|
digest_size = EVP_MD_CTX_size(&temp_ctx);
|
||||||
EVP_DigestFinal(&temp_ctx, digest, NULL);
|
EVP_DigestFinal(&temp_ctx, digest, NULL);
|
||||||
|
|
||||||
|
@ -174,20 +229,27 @@ EVP_update(EVPobject *self, PyObject *args)
|
||||||
|
|
||||||
GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL);
|
GET_BUFFER_VIEW_OR_ERROUT(obj, &view, NULL);
|
||||||
|
|
||||||
if (view.len > 0 && view.len <= MUNCH_SIZE) {
|
#ifdef WITH_THREAD
|
||||||
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
|
if (self->lock == NULL && view.len >= HASHLIB_GIL_MINSIZE)
|
||||||
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
|
{
|
||||||
} else {
|
self->lock = PyThread_allocate_lock();
|
||||||
Py_ssize_t len = view.len;
|
/* fail? lock = NULL and we fail over to non-threaded code. */
|
||||||
unsigned char *cp = (unsigned char *)view.buf;
|
|
||||||
while (len > 0) {
|
|
||||||
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
|
|
||||||
EVP_DigestUpdate(&self->ctx, cp, process);
|
|
||||||
len -= process;
|
|
||||||
cp += process;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (self->lock != NULL)
|
||||||
|
{
|
||||||
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
PyThread_acquire_lock(self->lock, 1);
|
||||||
|
EVP_hash(self, view.buf, view.len);
|
||||||
|
PyThread_release_lock(self->lock);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
|
} else {
|
||||||
|
EVP_hash(self, view.buf, view.len);
|
||||||
|
}
|
||||||
|
#else
|
||||||
|
EVP_hash(self, view.buf, view.len);
|
||||||
|
#endif
|
||||||
|
|
||||||
PyBuffer_Release(&view);
|
PyBuffer_Release(&view);
|
||||||
|
|
||||||
Py_INCREF(Py_None);
|
Py_INCREF(Py_None);
|
||||||
|
@ -205,13 +267,17 @@ static PyMethodDef EVP_methods[] = {
|
||||||
static PyObject *
|
static PyObject *
|
||||||
EVP_get_block_size(EVPobject *self, void *closure)
|
EVP_get_block_size(EVPobject *self, void *closure)
|
||||||
{
|
{
|
||||||
return PyInt_FromLong(EVP_MD_CTX_block_size(&((EVPobject *)self)->ctx));
|
long block_size;
|
||||||
|
block_size = EVP_MD_CTX_block_size(&self->ctx);
|
||||||
|
return PyLong_FromLong(block_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
EVP_get_digest_size(EVPobject *self, void *closure)
|
EVP_get_digest_size(EVPobject *self, void *closure)
|
||||||
{
|
{
|
||||||
return PyInt_FromLong(EVP_MD_CTX_size(&((EVPobject *)self)->ctx));
|
long size;
|
||||||
|
size = EVP_MD_CTX_size(&self->ctx);
|
||||||
|
return PyLong_FromLong(size);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyMemberDef EVP_members[] = {
|
static PyMemberDef EVP_members[] = {
|
||||||
|
@ -286,19 +352,14 @@ EVP_tp_init(EVPobject *self, PyObject *args, PyObject *kwds)
|
||||||
Py_INCREF(self->name);
|
Py_INCREF(self->name);
|
||||||
|
|
||||||
if (data_obj) {
|
if (data_obj) {
|
||||||
if (view.len > 0 && view.len <= MUNCH_SIZE) {
|
if (view.len >= HASHLIB_GIL_MINSIZE)
|
||||||
EVP_DigestUpdate(&self->ctx, (unsigned char*)view.buf,
|
{
|
||||||
Py_SAFE_DOWNCAST(view.len, Py_ssize_t, unsigned int));
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
EVP_hash(self, view.buf, view.len);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
} else {
|
} else {
|
||||||
Py_ssize_t len = view.len;
|
EVP_hash(self, view.buf, view.len);
|
||||||
unsigned char *cp = (unsigned char*)view.buf;
|
}
|
||||||
while (len > 0) {
|
|
||||||
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
|
|
||||||
EVP_DigestUpdate(&self->ctx, cp, process);
|
|
||||||
len -= process;
|
|
||||||
cp += process;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
PyBuffer_Release(&view);
|
PyBuffer_Release(&view);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -329,7 +390,7 @@ static PyTypeObject EVPtype = {
|
||||||
sizeof(EVPobject), /*tp_basicsize*/
|
sizeof(EVPobject), /*tp_basicsize*/
|
||||||
0, /*tp_itemsize*/
|
0, /*tp_itemsize*/
|
||||||
/* methods */
|
/* methods */
|
||||||
EVP_dealloc, /*tp_dealloc*/
|
(destructor)EVP_dealloc, /*tp_dealloc*/
|
||||||
0, /*tp_print*/
|
0, /*tp_print*/
|
||||||
0, /*tp_getattr*/
|
0, /*tp_getattr*/
|
||||||
0, /*tp_setattr*/
|
0, /*tp_setattr*/
|
||||||
|
@ -389,17 +450,13 @@ EVPnew(PyObject *name_obj,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (cp && len) {
|
if (cp && len) {
|
||||||
if (len > 0 && len <= MUNCH_SIZE) {
|
if (len >= HASHLIB_GIL_MINSIZE)
|
||||||
EVP_DigestUpdate(&self->ctx, cp, Py_SAFE_DOWNCAST(len, Py_ssize_t,
|
{
|
||||||
unsigned int));
|
Py_BEGIN_ALLOW_THREADS
|
||||||
|
EVP_hash(self, cp, len);
|
||||||
|
Py_END_ALLOW_THREADS
|
||||||
} else {
|
} else {
|
||||||
Py_ssize_t offset = 0;
|
EVP_hash(self, cp, len);
|
||||||
while (len > 0) {
|
|
||||||
unsigned int process = len > MUNCH_SIZE ? MUNCH_SIZE : len;
|
|
||||||
EVP_DigestUpdate(&self->ctx, cp + offset, process);
|
|
||||||
len -= process;
|
|
||||||
offset += process;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue