bpo-38252: Use 8-byte step to detect ASCII sequence in 64bit Windows build (GH-16334)

This commit is contained in:
Ma Lin 2020-10-18 22:48:38 +08:00 committed by GitHub
parent 3635388f52
commit a0c603cb9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 53 additions and 52 deletions

View file

@ -100,14 +100,14 @@ Return True if B is empty or all characters in B are ASCII,\n\
False otherwise.");
// Optimization is copied from ascii_decode in unicodeobject.c
/* Mask to quickly check whether a C 'long' contains a
/* Mask to quickly check whether a C 'size_t' contains a
non-ASCII, UTF8-encoded char. */
#if (SIZEOF_LONG == 8)
# define ASCII_CHAR_MASK 0x8080808080808080UL
#elif (SIZEOF_LONG == 4)
# define ASCII_CHAR_MASK 0x80808080UL
#if (SIZEOF_SIZE_T == 8)
# define ASCII_CHAR_MASK 0x8080808080808080ULL
#elif (SIZEOF_SIZE_T == 4)
# define ASCII_CHAR_MASK 0x80808080U
#else
# error C 'long' size should be either 4 or 8!
# error C 'size_t' size should be either 4 or 8!
#endif
PyObject*
@ -115,20 +115,20 @@ _Py_bytes_isascii(const char *cptr, Py_ssize_t len)
{
const char *p = cptr;
const char *end = p + len;
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_LONG);
const char *aligned_end = (const char *) _Py_ALIGN_DOWN(end, SIZEOF_SIZE_T);
while (p < end) {
/* Fast path, see in STRINGLIB(utf8_decode) in stringlib/codecs.h
for an explanation. */
if (_Py_IS_ALIGNED(p, SIZEOF_LONG)) {
if (_Py_IS_ALIGNED(p, SIZEOF_SIZE_T)) {
/* Help allocation */
const char *_p = p;
while (_p < aligned_end) {
unsigned long value = *(const unsigned long *) _p;
size_t value = *(const size_t *) _p;
if (value & ASCII_CHAR_MASK) {
Py_RETURN_FALSE;
}
_p += SIZEOF_LONG;
_p += SIZEOF_SIZE_T;
}
p = _p;
if (_p == end)