mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-46881: Statically allocate and initialize the latin1 characters. (GH-31616)
This commit is contained in:
parent
e801e88744
commit
8714b6fa27
6 changed files with 317 additions and 66 deletions
|
@ -206,6 +206,11 @@ extern "C" {
|
|||
*_to++ = (to_type) *_iter++; \
|
||||
} while (0)
|
||||
|
||||
#define LATIN1(ch) \
|
||||
(ch < 128 \
|
||||
? (PyObject*)&_Py_SINGLETON(strings).ascii[ch] \
|
||||
: (PyObject*)&_Py_SINGLETON(strings).latin1[ch - 128])
|
||||
|
||||
#ifdef MS_WINDOWS
|
||||
/* On Windows, overallocate by 50% is the best factor */
|
||||
# define OVERALLOCATE_FACTOR 2
|
||||
|
@ -249,14 +254,6 @@ static int unicode_is_singleton(PyObject *unicode);
|
|||
#endif
|
||||
|
||||
|
||||
static struct _Py_unicode_state*
|
||||
get_unicode_state(void)
|
||||
{
|
||||
PyInterpreterState *interp = _PyInterpreterState_GET();
|
||||
return &interp->unicode;
|
||||
}
|
||||
|
||||
|
||||
// Return a borrowed reference to the empty string singleton.
|
||||
static inline PyObject* unicode_get_empty(void)
|
||||
{
|
||||
|
@ -680,24 +677,10 @@ unicode_result_ready(PyObject *unicode)
|
|||
if (kind == PyUnicode_1BYTE_KIND) {
|
||||
const Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
|
||||
Py_UCS1 ch = data[0];
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
PyObject *latin1_char = state->latin1[ch];
|
||||
if (latin1_char != NULL) {
|
||||
if (unicode != latin1_char) {
|
||||
Py_INCREF(latin1_char);
|
||||
Py_DECREF(unicode);
|
||||
}
|
||||
return latin1_char;
|
||||
if (unicode != LATIN1(ch)) {
|
||||
Py_DECREF(unicode);
|
||||
}
|
||||
else {
|
||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||
Py_INCREF(unicode);
|
||||
state->latin1[ch] = unicode;
|
||||
return unicode;
|
||||
}
|
||||
}
|
||||
else {
|
||||
assert(PyUnicode_READ_CHAR(unicode, 0) >= 256);
|
||||
return get_latin1_char(ch);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1990,11 +1973,10 @@ unicode_is_singleton(PyObject *unicode)
|
|||
return 1;
|
||||
}
|
||||
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1) {
|
||||
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
||||
if (ch < 256 && state->latin1[ch] == unicode) {
|
||||
if (ch < 256 && LATIN1(ch) == unicode) {
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
@ -2137,25 +2119,7 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
|
|||
static PyObject*
|
||||
get_latin1_char(Py_UCS1 ch)
|
||||
{
|
||||
struct _Py_unicode_state *state = get_unicode_state();
|
||||
|
||||
PyObject *unicode = state->latin1[ch];
|
||||
if (unicode) {
|
||||
Py_INCREF(unicode);
|
||||
return unicode;
|
||||
}
|
||||
|
||||
unicode = PyUnicode_New(1, ch);
|
||||
if (!unicode) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
|
||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||
|
||||
Py_INCREF(unicode);
|
||||
state->latin1[ch] = unicode;
|
||||
return unicode;
|
||||
return Py_NewRef(LATIN1(ch));
|
||||
}
|
||||
|
||||
static PyObject*
|
||||
|
@ -15535,6 +15499,10 @@ _PyUnicode_InitGlobalObjects(PyInterpreterState *interp)
|
|||
|
||||
#ifdef Py_DEBUG
|
||||
assert(_PyUnicode_CheckConsistency(&_Py_STR(empty), 1));
|
||||
|
||||
for (int i = 0; i < 256; i++) {
|
||||
assert(_PyUnicode_CheckConsistency(LATIN1(i), 1));
|
||||
}
|
||||
#endif
|
||||
|
||||
return _PyStatus_OK();
|
||||
|
@ -16113,10 +16081,6 @@ _PyUnicode_Fini(PyInterpreterState *interp)
|
|||
_PyUnicode_FiniEncodings(&state->fs_codec);
|
||||
|
||||
unicode_clear_identifiers(state);
|
||||
|
||||
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||
Py_CLEAR(state->latin1[i]);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue