mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
bpo-29882: Add _Py_popcount32() function (GH-20518)
* Rename pycore_byteswap.h to pycore_bitutils.h. * Move popcount_digit() to pycore_bitutils.h as _Py_popcount32(). * _Py_popcount32() uses GCC and clang builtin function if available. * Add unit tests to _Py_popcount32().
This commit is contained in:
parent
301f0d4ff9
commit
c6b292cdee
11 changed files with 108 additions and 39 deletions
|
@ -1,5 +1,6 @@
|
|||
#include "Python.h"
|
||||
|
||||
#include "pycore_bitutils.h" // _Py_popcount32
|
||||
#include "pycore_hamt.h"
|
||||
#include "pycore_object.h" // _PyObject_GC_TRACK()
|
||||
#include <stddef.h> // offsetof()
|
||||
|
@ -433,30 +434,10 @@ hamt_bitpos(int32_t hash, uint32_t shift)
|
|||
return (uint32_t)1 << hamt_mask(hash, shift);
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
hamt_bitcount(uint32_t i)
|
||||
{
|
||||
/* We could use native popcount instruction but that would
|
||||
require to either add configure flags to enable SSE4.2
|
||||
support or to detect it dynamically. Otherwise, we have
|
||||
a risk of CPython not working properly on older hardware.
|
||||
|
||||
In practice, there's no observable difference in
|
||||
performance between using a popcount instruction or the
|
||||
following fallback code.
|
||||
|
||||
The algorithm is copied from:
|
||||
https://graphics.stanford.edu/~seander/bithacks.html
|
||||
*/
|
||||
i = i - ((i >> 1) & 0x55555555);
|
||||
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
|
||||
return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
|
||||
}
|
||||
|
||||
static inline uint32_t
|
||||
hamt_bitindex(uint32_t bitmap, uint32_t bit)
|
||||
{
|
||||
return hamt_bitcount(bitmap & (bit - 1));
|
||||
return (uint32_t)_Py_popcount32(bitmap & (bit - 1));
|
||||
}
|
||||
|
||||
|
||||
|
@ -820,7 +801,7 @@ hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
|
|||
else {
|
||||
/* There was no key before with the same (shift,hash). */
|
||||
|
||||
uint32_t n = hamt_bitcount(self->b_bitmap);
|
||||
uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap);
|
||||
|
||||
if (n >= 16) {
|
||||
/* When we have a situation where we want to store more
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue