bpo-29882: Add _Py_popcount32() function (GH-20518)

* Rename pycore_byteswap.h to pycore_bitutils.h.
* Move popcount_digit() to pycore_bitutils.h as _Py_popcount32().
* _Py_popcount32() uses GCC and clang builtin function if available.
* Add unit tests to _Py_popcount32().
This commit is contained in:
Victor Stinner 2020-06-08 16:30:33 +02:00 committed by GitHub
parent 301f0d4ff9
commit c6b292cdee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
11 changed files with 108 additions and 39 deletions

View file

@ -1,5 +1,6 @@
#include "Python.h"
#include "pycore_bitutils.h" // _Py_popcount32
#include "pycore_hamt.h"
#include "pycore_object.h" // _PyObject_GC_TRACK()
#include <stddef.h> // offsetof()
@ -433,30 +434,10 @@ hamt_bitpos(int32_t hash, uint32_t shift)
return (uint32_t)1 << hamt_mask(hash, shift);
}
static inline uint32_t
hamt_bitcount(uint32_t i)
{
/* We could use native popcount instruction but that would
require to either add configure flags to enable SSE4.2
support or to detect it dynamically. Otherwise, we have
a risk of CPython not working properly on older hardware.
In practice, there's no observable difference in
performance between using a popcount instruction or the
following fallback code.
The algorithm is copied from:
https://graphics.stanford.edu/~seander/bithacks.html
*/
i = i - ((i >> 1) & 0x55555555);
i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
return (((i + (i >> 4)) & 0xF0F0F0F) * 0x1010101) >> 24;
}
static inline uint32_t
hamt_bitindex(uint32_t bitmap, uint32_t bit)
{
return hamt_bitcount(bitmap & (bit - 1));
return (uint32_t)_Py_popcount32(bitmap & (bit - 1));
}
@ -820,7 +801,7 @@ hamt_node_bitmap_assoc(PyHamtNode_Bitmap *self,
else {
/* There was no key before with the same (shift,hash). */
uint32_t n = hamt_bitcount(self->b_bitmap);
uint32_t n = (uint32_t)_Py_popcount32(self->b_bitmap);
if (n >= 16) {
/* When we have a situation where we want to store more