mirror of
https://github.com/python/cpython.git
synced 2025-11-25 21:11:09 +00:00
ssue #19183: Implement PEP 456 'secure and interchangeable hash algorithm'.
Python now uses SipHash24 on all major platforms.
This commit is contained in:
parent
fe32aec25a
commit
985ecdcfc2
27 changed files with 1029 additions and 242 deletions
|
|
@ -68,6 +68,7 @@
|
|||
#include "object.h"
|
||||
#include "objimpl.h"
|
||||
#include "typeslots.h"
|
||||
#include "pyhash.h"
|
||||
|
||||
#include "pydebug.h"
|
||||
|
||||
|
|
|
|||
|
|
@ -562,23 +562,6 @@ PyAPI_FUNC(PyObject *) PyObject_Dir(PyObject *);
|
|||
PyAPI_FUNC(int) Py_ReprEnter(PyObject *);
|
||||
PyAPI_FUNC(void) Py_ReprLeave(PyObject *);
|
||||
|
||||
/* Helpers for hash functions */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(unsigned char*, Py_ssize_t);
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
Py_hash_t prefix;
|
||||
Py_hash_t suffix;
|
||||
} _Py_HashSecret_t;
|
||||
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
PyAPI_DATA(int) _Py_HashSecret_Initialized;
|
||||
#endif
|
||||
|
||||
/* Helper for passing objects to printf and the like */
|
||||
#define PyObject_REPR(obj) _PyUnicode_AsString(PyObject_Repr(obj))
|
||||
|
||||
|
|
|
|||
147
Include/pyhash.h
Normal file
147
Include/pyhash.h
Normal file
|
|
@ -0,0 +1,147 @@
|
|||
#ifndef Py_HASH_H
|
||||
|
||||
#define Py_HASH_H
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Helpers for hash functions */
|
||||
#ifndef Py_LIMITED_API
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(void*);
|
||||
PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
|
||||
#endif
|
||||
|
||||
/* Prime multiplier used in string and various other hashes. */
|
||||
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
|
||||
|
||||
/* Parameters used for the numeric hash implementation. See notes for
|
||||
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
|
||||
reduction modulo the prime 2**_PyHASH_BITS - 1. */
|
||||
|
||||
#if SIZEOF_VOID_P >= 8
|
||||
# define _PyHASH_BITS 61
|
||||
#else
|
||||
# define _PyHASH_BITS 31
|
||||
#endif
|
||||
|
||||
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
|
||||
#define _PyHASH_INF 314159
|
||||
#define _PyHASH_NAN 0
|
||||
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
|
||||
|
||||
|
||||
/* hash secret
|
||||
*
|
||||
* memory layout on 64 bit systems
|
||||
* cccccccc cccccccc cccccccc uc -- unsigned char[24]
|
||||
* pppppppp ssssssss ........ fnv -- two Py_hash_t
|
||||
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T
|
||||
* ........ ........ ssssssss djbx33a -- 16 bytes padding + one Py_hash_t
|
||||
* ........ ........ eeeeeeee pyexpat XML hash salt
|
||||
*
|
||||
* memory layout on 32 bit systems
|
||||
* cccccccc cccccccc cccccccc uc
|
||||
* ppppssss ........ ........ fnv -- two Py_hash_t
|
||||
* k0k0k0k0 k1k1k1k1 ........ siphash -- two PY_UINT64_T (*)
|
||||
* ........ ........ ssss.... djbx33a -- 16 bytes padding + one Py_hash_t
|
||||
* ........ ........ eeee.... pyexpat XML hash salt
|
||||
*
|
||||
* (*) The siphash member may not be available on 32 bit platforms without
|
||||
* an unsigned int64 data type.
|
||||
*/
|
||||
typedef union {
|
||||
/* ensure 24 bytes */
|
||||
unsigned char uc[24];
|
||||
/* two Py_hash_t for FNV */
|
||||
struct {
|
||||
Py_hash_t prefix;
|
||||
Py_hash_t suffix;
|
||||
} fnv;
|
||||
#ifdef PY_UINT64_T
|
||||
/* two uint64 for SipHash24 */
|
||||
struct {
|
||||
PY_UINT64_T k0;
|
||||
PY_UINT64_T k1;
|
||||
} siphash;
|
||||
#endif
|
||||
/* a different (!) Py_hash_t for small string optimization */
|
||||
struct {
|
||||
unsigned char padding[16];
|
||||
Py_hash_t suffix;
|
||||
} djbx33a;
|
||||
struct {
|
||||
unsigned char padding[16];
|
||||
Py_hash_t hashsalt;
|
||||
} expat;
|
||||
} _Py_HashSecret_t;
|
||||
PyAPI_DATA(_Py_HashSecret_t) _Py_HashSecret;
|
||||
|
||||
#ifdef Py_DEBUG
|
||||
PyAPI_DATA(int) _Py_HashSecret_Initialized;
|
||||
#endif
|
||||
|
||||
|
||||
/* hash function definition */
|
||||
#ifndef Py_LIMITED_API
|
||||
typedef struct {
|
||||
Py_hash_t (*const hash)(const void *, Py_ssize_t);
|
||||
const char *name;
|
||||
const int hash_bits;
|
||||
const int seed_bits;
|
||||
} PyHash_FuncDef;
|
||||
|
||||
PyAPI_FUNC(PyHash_FuncDef*) PyHash_GetFuncDef(void);
|
||||
#endif
|
||||
|
||||
|
||||
/* cutoff for small string DJBX33A optimization in range [1, cutoff).
|
||||
*
|
||||
* About 50% of the strings in a typical Python application are smaller than
|
||||
* 6 to 7 chars. However DJBX33A is vulnerable to hash collision attacks.
|
||||
* NEVER use DJBX33A for long strings!
|
||||
*
|
||||
* A Py_HASH_CUTOFF of 0 disables small string optimization. 32 bit platforms
|
||||
* should use a smaller cutoff because it is easier to create colliding
|
||||
* strings. A cutoff of 7 on 64bit platforms and 5 on 32bit platforms should
|
||||
* provide a decent safety margin.
|
||||
*/
|
||||
#ifndef Py_HASH_CUTOFF
|
||||
# define Py_HASH_CUTOFF 0
|
||||
#elif (Py_HASH_CUTOFF > 7 || Py_HASH_CUTOFF < 0)
|
||||
# error Py_HASH_CUTOFF must in range 0...7.
|
||||
#endif /* Py_HASH_CUTOFF */
|
||||
|
||||
|
||||
/* hash algorithm selection
|
||||
*
|
||||
* The values for Py_HASH_SIPHASH24 and Py_HASH_FNV are hard-coded in the
|
||||
* configure script.
|
||||
*
|
||||
* - FNV is available on all platforms and architectures.
|
||||
* - SIPHASH24 only works on plaforms that provide PY_UINT64_T and doesn't
|
||||
* require aligned memory for integers.
|
||||
* - With EXTERNAL embedders can provide an alternative implementation with::
|
||||
*
|
||||
* PyHash_FuncDef PyHash_Func = {...};
|
||||
*
|
||||
* XXX: Figure out __declspec() for extern PyHash_FuncDef.
|
||||
*/
|
||||
#define Py_HASH_EXTERNAL 0
|
||||
#define Py_HASH_SIPHASH24 1
|
||||
#define Py_HASH_FNV 2
|
||||
|
||||
#ifndef Py_HASH_ALGORITHM
|
||||
# if (defined(PY_UINT64_T) && defined(PY_UINT32_T) \
|
||||
&& !defined(HAVE_ALIGNED_REQUIRED))
|
||||
# define Py_HASH_ALGORITHM Py_HASH_SIPHASH24
|
||||
# else
|
||||
# define Py_HASH_ALGORITHM Py_HASH_FNV
|
||||
# endif /* uint64_t && uint32_t && aligned */
|
||||
#endif /* Py_HASH_ALGORITHM */
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* !Py_HASH_H */
|
||||
|
|
@ -144,23 +144,6 @@ Used in: PY_LONG_LONG
|
|||
#endif
|
||||
#endif
|
||||
|
||||
/* Prime multiplier used in string and various other hashes. */
|
||||
#define _PyHASH_MULTIPLIER 1000003UL /* 0xf4243 */
|
||||
|
||||
/* Parameters used for the numeric hash implementation. See notes for
|
||||
_Py_HashDouble in Objects/object.c. Numeric hashes are based on
|
||||
reduction modulo the prime 2**_PyHASH_BITS - 1. */
|
||||
|
||||
#if SIZEOF_VOID_P >= 8
|
||||
#define _PyHASH_BITS 61
|
||||
#else
|
||||
#define _PyHASH_BITS 31
|
||||
#endif
|
||||
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
|
||||
#define _PyHASH_INF 314159
|
||||
#define _PyHASH_NAN 0
|
||||
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
|
||||
|
||||
/* uintptr_t is the C9X name for an unsigned integral type such that a
|
||||
* legitimate void* can be cast to uintptr_t and then back to void* again
|
||||
* without loss of information. Similarly for intptr_t, wrt a signed
|
||||
|
|
@ -199,8 +182,10 @@ typedef Py_intptr_t Py_ssize_t;
|
|||
#endif
|
||||
|
||||
/* Py_hash_t is the same size as a pointer. */
|
||||
#define SIZEOF_PY_HASH_T SIZEOF_SIZE_T
|
||||
typedef Py_ssize_t Py_hash_t;
|
||||
/* Py_uhash_t is the unsigned equivalent needed to calculate numeric hash. */
|
||||
#define SIZEOF_PY_UHASH_T SIZEOF_SIZE_T
|
||||
typedef size_t Py_uhash_t;
|
||||
|
||||
/* Largest possible value of size_t.
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue