mirror of
https://github.com/python/cpython.git
synced 2025-07-23 19:25:40 +00:00
bpo-40521: Make Unicode latin1 singletons per interpreter (GH-21101)
Each interpreter now has its own Unicode latin1 singletons. Remove "ifdef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS" and "ifdef LATIN1_SINGLETONS": always enable latin1 singletons. Optimize unicode_result_ready(): only attempt to get a latin1 singleton for PyUnicode_1BYTE_KIND.
This commit is contained in:
parent
bbf36e8903
commit
2f9ada96e0
3 changed files with 36 additions and 43 deletions
|
@ -73,6 +73,9 @@ struct _Py_bytes_state {
|
||||||
struct _Py_unicode_state {
|
struct _Py_unicode_state {
|
||||||
// The empty Unicode object is a singleton to improve performance.
|
// The empty Unicode object is a singleton to improve performance.
|
||||||
PyObject *empty;
|
PyObject *empty;
|
||||||
|
/* Single character Unicode strings in the Latin-1 range are being
|
||||||
|
shared as well. */
|
||||||
|
PyObject *latin1[256];
|
||||||
struct _Py_unicode_fs_codec fs_codec;
|
struct _Py_unicode_fs_codec fs_codec;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
|
@ -3,7 +3,7 @@ Each interpreter now its has own free lists, singletons and caches:
|
||||||
* Free lists: float, tuple, list, dict, frame, context,
|
* Free lists: float, tuple, list, dict, frame, context,
|
||||||
asynchronous generator, MemoryError.
|
asynchronous generator, MemoryError.
|
||||||
* Singletons: empty tuple, empty bytes string, empty Unicode string,
|
* Singletons: empty tuple, empty bytes string, empty Unicode string,
|
||||||
single byte character.
|
single byte character, single Unicode (latin1) character.
|
||||||
* Slice cache.
|
* Slice cache.
|
||||||
|
|
||||||
They are no longer shared by all interpreters.
|
They are no longer shared by all interpreters.
|
||||||
|
|
|
@ -303,17 +303,6 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||||
/* List of static strings. */
|
/* List of static strings. */
|
||||||
static _Py_Identifier *static_strings = NULL;
|
static _Py_Identifier *static_strings = NULL;
|
||||||
|
|
||||||
/* bpo-40521: Latin1 singletons are shared by all interpreters. */
|
|
||||||
#ifndef EXPERIMENTAL_ISOLATED_SUBINTERPRETERS
|
|
||||||
# define LATIN1_SINGLETONS
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#ifdef LATIN1_SINGLETONS
|
|
||||||
/* Single character Unicode strings in the Latin-1 range are being
|
|
||||||
shared as well. */
|
|
||||||
static PyObject *unicode_latin1[256] = {NULL};
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* Fast detection of the most frequent whitespace characters */
|
/* Fast detection of the most frequent whitespace characters */
|
||||||
const unsigned char _Py_ascii_whitespace[] = {
|
const unsigned char _Py_ascii_whitespace[] = {
|
||||||
0, 0, 0, 0, 0, 0, 0, 0,
|
0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
@ -657,9 +646,8 @@ unicode_result_wchar(PyObject *unicode)
|
||||||
if (len == 1) {
|
if (len == 1) {
|
||||||
wchar_t ch = _PyUnicode_WSTR(unicode)[0];
|
wchar_t ch = _PyUnicode_WSTR(unicode)[0];
|
||||||
if ((Py_UCS4)ch < 256) {
|
if ((Py_UCS4)ch < 256) {
|
||||||
PyObject *latin1_char = get_latin1_char((unsigned char)ch);
|
|
||||||
Py_DECREF(unicode);
|
Py_DECREF(unicode);
|
||||||
return latin1_char;
|
return get_latin1_char((unsigned char)ch);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -692,13 +680,13 @@ unicode_result_ready(PyObject *unicode)
|
||||||
return empty;
|
return empty;
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifdef LATIN1_SINGLETONS
|
|
||||||
if (length == 1) {
|
if (length == 1) {
|
||||||
const void *data = PyUnicode_DATA(unicode);
|
|
||||||
int kind = PyUnicode_KIND(unicode);
|
int kind = PyUnicode_KIND(unicode);
|
||||||
Py_UCS4 ch = PyUnicode_READ(kind, data, 0);
|
if (kind == PyUnicode_1BYTE_KIND) {
|
||||||
if (ch < 256) {
|
Py_UCS1 *data = PyUnicode_1BYTE_DATA(unicode);
|
||||||
PyObject *latin1_char = unicode_latin1[ch];
|
Py_UCS1 ch = data[0];
|
||||||
|
struct _Py_unicode_state *state = get_unicode_state();
|
||||||
|
PyObject *latin1_char = state->latin1[ch];
|
||||||
if (latin1_char != NULL) {
|
if (latin1_char != NULL) {
|
||||||
if (unicode != latin1_char) {
|
if (unicode != latin1_char) {
|
||||||
Py_INCREF(latin1_char);
|
Py_INCREF(latin1_char);
|
||||||
|
@ -709,12 +697,14 @@ unicode_result_ready(PyObject *unicode)
|
||||||
else {
|
else {
|
||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
Py_INCREF(unicode);
|
Py_INCREF(unicode);
|
||||||
unicode_latin1[ch] = unicode;
|
state->latin1[ch] = unicode;
|
||||||
return unicode;
|
return unicode;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else {
|
||||||
|
assert(PyUnicode_READ_CHAR(unicode, 0) >= 256);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
return unicode;
|
return unicode;
|
||||||
|
@ -1981,18 +1971,18 @@ unicode_dealloc(PyObject *unicode)
|
||||||
static int
|
static int
|
||||||
unicode_is_singleton(PyObject *unicode)
|
unicode_is_singleton(PyObject *unicode)
|
||||||
{
|
{
|
||||||
if (unicode == unicode_get_empty()) {
|
struct _Py_unicode_state *state = get_unicode_state();
|
||||||
|
if (unicode == state->empty) {
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
#ifdef LATIN1_SINGLETONS
|
|
||||||
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
PyASCIIObject *ascii = (PyASCIIObject *)unicode;
|
||||||
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
|
if (ascii->state.kind != PyUnicode_WCHAR_KIND && ascii->length == 1)
|
||||||
{
|
{
|
||||||
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
Py_UCS4 ch = PyUnicode_READ_CHAR(unicode, 0);
|
||||||
if (ch < 256 && unicode_latin1[ch] == unicode)
|
if (ch < 256 && state->latin1[ch] == unicode) {
|
||||||
return 1;
|
return 1;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
@ -2130,17 +2120,15 @@ unicode_write_cstr(PyObject *unicode, Py_ssize_t index,
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject*
|
static PyObject*
|
||||||
get_latin1_char(unsigned char ch)
|
get_latin1_char(Py_UCS1 ch)
|
||||||
{
|
{
|
||||||
PyObject *unicode;
|
struct _Py_unicode_state *state = get_unicode_state();
|
||||||
|
|
||||||
#ifdef LATIN1_SINGLETONS
|
PyObject *unicode = state->latin1[ch];
|
||||||
unicode = unicode_latin1[ch];
|
|
||||||
if (unicode) {
|
if (unicode) {
|
||||||
Py_INCREF(unicode);
|
Py_INCREF(unicode);
|
||||||
return unicode;
|
return unicode;
|
||||||
}
|
}
|
||||||
#endif
|
|
||||||
|
|
||||||
unicode = PyUnicode_New(1, ch);
|
unicode = PyUnicode_New(1, ch);
|
||||||
if (!unicode) {
|
if (!unicode) {
|
||||||
|
@ -2150,10 +2138,8 @@ get_latin1_char(unsigned char ch)
|
||||||
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
|
PyUnicode_1BYTE_DATA(unicode)[0] = ch;
|
||||||
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
assert(_PyUnicode_CheckConsistency(unicode, 1));
|
||||||
|
|
||||||
#ifdef LATIN1_SINGLETONS
|
|
||||||
Py_INCREF(unicode);
|
Py_INCREF(unicode);
|
||||||
unicode_latin1[ch] = unicode;
|
state->latin1[ch] = unicode;
|
||||||
#endif
|
|
||||||
return unicode;
|
return unicode;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2164,8 +2150,9 @@ unicode_char(Py_UCS4 ch)
|
||||||
|
|
||||||
assert(ch <= MAX_UNICODE);
|
assert(ch <= MAX_UNICODE);
|
||||||
|
|
||||||
if (ch < 256)
|
if (ch < 256) {
|
||||||
return get_latin1_char(ch);
|
return get_latin1_char(ch);
|
||||||
|
}
|
||||||
|
|
||||||
unicode = PyUnicode_New(1, ch);
|
unicode = PyUnicode_New(1, ch);
|
||||||
if (unicode == NULL)
|
if (unicode == NULL)
|
||||||
|
@ -2367,11 +2354,13 @@ _PyUnicode_FromUCS1(const Py_UCS1* u, Py_ssize_t size)
|
||||||
PyObject *res;
|
PyObject *res;
|
||||||
unsigned char max_char;
|
unsigned char max_char;
|
||||||
|
|
||||||
if (size == 0)
|
if (size == 0) {
|
||||||
_Py_RETURN_UNICODE_EMPTY();
|
_Py_RETURN_UNICODE_EMPTY();
|
||||||
|
}
|
||||||
assert(size > 0);
|
assert(size > 0);
|
||||||
if (size == 1)
|
if (size == 1) {
|
||||||
return get_latin1_char(u[0]);
|
return get_latin1_char(u[0]);
|
||||||
|
}
|
||||||
|
|
||||||
max_char = ucs1lib_find_max_char(u, u + size);
|
max_char = ucs1lib_find_max_char(u, u + size);
|
||||||
res = PyUnicode_New(size, max_char);
|
res = PyUnicode_New(size, max_char);
|
||||||
|
@ -5008,8 +4997,9 @@ unicode_decode_utf8(const char *s, Py_ssize_t size,
|
||||||
|
|
||||||
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
|
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
|
||||||
if (size == 1 && (unsigned char)s[0] < 128) {
|
if (size == 1 && (unsigned char)s[0] < 128) {
|
||||||
if (consumed)
|
if (consumed) {
|
||||||
*consumed = 1;
|
*consumed = 1;
|
||||||
|
}
|
||||||
return get_latin1_char((unsigned char)s[0]);
|
return get_latin1_char((unsigned char)s[0]);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -7176,8 +7166,9 @@ PyUnicode_DecodeASCII(const char *s,
|
||||||
_Py_RETURN_UNICODE_EMPTY();
|
_Py_RETURN_UNICODE_EMPTY();
|
||||||
|
|
||||||
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
|
/* ASCII is equivalent to the first 128 ordinals in Unicode. */
|
||||||
if (size == 1 && (unsigned char)s[0] < 128)
|
if (size == 1 && (unsigned char)s[0] < 128) {
|
||||||
return get_latin1_char((unsigned char)s[0]);
|
return get_latin1_char((unsigned char)s[0]);
|
||||||
|
}
|
||||||
|
|
||||||
// Shortcut for simple case
|
// Shortcut for simple case
|
||||||
PyObject *u = PyUnicode_New(size, 127);
|
PyObject *u = PyUnicode_New(size, 127);
|
||||||
|
@ -16234,12 +16225,11 @@ _PyUnicode_Fini(PyThreadState *tstate)
|
||||||
|
|
||||||
Py_CLEAR(state->empty);
|
Py_CLEAR(state->empty);
|
||||||
|
|
||||||
|
for (Py_ssize_t i = 0; i < 256; i++) {
|
||||||
|
Py_CLEAR(state->latin1[i]);
|
||||||
|
}
|
||||||
|
|
||||||
if (is_main_interp) {
|
if (is_main_interp) {
|
||||||
#ifdef LATIN1_SINGLETONS
|
|
||||||
for (Py_ssize_t i = 0; i < 256; i++) {
|
|
||||||
Py_CLEAR(unicode_latin1[i]);
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
unicode_clear_static_strings();
|
unicode_clear_static_strings();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue