bpo-40521: Add PyInterpreterState.unicode (GH-20081)

Move PyInterpreterState.fs_codec into a new
PyInterpreterState.unicode structure.

Give a name to the fs_codec structure and use this structure in
unicodeobject.c.
This commit is contained in:
Victor Stinner 2020-05-14 01:48:38 +02:00 committed by GitHub
parent 75cd8e48c6
commit 3d17c045b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 48 additions and 40 deletions

View file

@ -51,6 +51,19 @@ struct _ceval_state {
#endif #endif
}; };
/* fs_codec.encoding is initialized to NULL.
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct _Py_unicode_fs_codec {
char *encoding; // Filesystem encoding (encoded to UTF-8)
int utf8; // encoding=="utf-8"?
char *errors; // Filesystem errors (encoded to UTF-8)
_Py_error_handler error_handler;
};
struct _Py_unicode_state {
struct _Py_unicode_fs_codec fs_codec;
};
/* interpreter state */ /* interpreter state */
@ -97,14 +110,7 @@ struct _is {
PyObject *codec_error_registry; PyObject *codec_error_registry;
int codecs_initialized; int codecs_initialized;
/* fs_codec.encoding is initialized to NULL. struct _Py_unicode_state unicode;
Later, it is set to a non-NULL string by _PyUnicode_InitEncodings(). */
struct {
char *encoding; /* Filesystem encoding (encoded to UTF-8) */
int utf8; /* encoding=="utf-8"? */
char *errors; /* Filesystem errors (encoded to UTF-8) */
_Py_error_handler error_handler;
} fs_codec;
PyConfig config; PyConfig config;
#ifdef HAVE_DLOPEN #ifdef HAVE_DLOPEN

View file

@ -1007,7 +1007,7 @@ io_check_errors(PyObject *errors)
/* Avoid calling PyCodec_LookupError() before the codec registry is ready: /* Avoid calling PyCodec_LookupError() before the codec registry is ready:
before_PyUnicode_InitEncodings() is called. */ before_PyUnicode_InitEncodings() is called. */
if (!interp->fs_codec.encoding) { if (!interp->unicode.fs_codec.encoding) {
return 0; return 0;
} }

View file

@ -463,7 +463,7 @@ unicode_check_encoding_errors(const char *encoding, const char *errors)
/* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the /* Avoid calling _PyCodec_Lookup() and PyCodec_LookupError() before the
codec registry is ready: before_PyUnicode_InitEncodings() is called. */ codec registry is ready: before_PyUnicode_InitEncodings() is called. */
if (!interp->fs_codec.encoding) { if (!interp->unicode.fs_codec.encoding) {
return 0; return 0;
} }
@ -3650,16 +3650,17 @@ PyObject *
PyUnicode_EncodeFSDefault(PyObject *unicode) PyUnicode_EncodeFSDefault(PyObject *unicode)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET(); PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->fs_codec.utf8) { struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
if (fs_codec->utf8) {
return unicode_encode_utf8(unicode, return unicode_encode_utf8(unicode,
interp->fs_codec.error_handler, fs_codec->error_handler,
interp->fs_codec.errors); fs_codec->errors);
} }
#ifndef _Py_FORCE_UTF8_FS_ENCODING #ifndef _Py_FORCE_UTF8_FS_ENCODING
else if (interp->fs_codec.encoding) { else if (fs_codec->encoding) {
return PyUnicode_AsEncodedString(unicode, return PyUnicode_AsEncodedString(unicode,
interp->fs_codec.encoding, fs_codec->encoding,
interp->fs_codec.errors); fs_codec->errors);
} }
#endif #endif
else { else {
@ -3886,17 +3887,18 @@ PyObject*
PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size) PyUnicode_DecodeFSDefaultAndSize(const char *s, Py_ssize_t size)
{ {
PyInterpreterState *interp = _PyInterpreterState_GET(); PyInterpreterState *interp = _PyInterpreterState_GET();
if (interp->fs_codec.utf8) { struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
if (fs_codec->utf8) {
return unicode_decode_utf8(s, size, return unicode_decode_utf8(s, size,
interp->fs_codec.error_handler, fs_codec->error_handler,
interp->fs_codec.errors, fs_codec->errors,
NULL); NULL);
} }
#ifndef _Py_FORCE_UTF8_FS_ENCODING #ifndef _Py_FORCE_UTF8_FS_ENCODING
else if (interp->fs_codec.encoding) { else if (fs_codec->encoding) {
return PyUnicode_Decode(s, size, return PyUnicode_Decode(s, size,
interp->fs_codec.encoding, fs_codec->encoding,
interp->fs_codec.errors); fs_codec->errors);
} }
#endif #endif
else { else {
@ -16071,16 +16073,17 @@ init_fs_codec(PyInterpreterState *interp)
return -1; return -1;
} }
PyMem_RawFree(interp->fs_codec.encoding); struct _Py_unicode_fs_codec *fs_codec = &interp->unicode.fs_codec;
interp->fs_codec.encoding = encoding; PyMem_RawFree(fs_codec->encoding);
fs_codec->encoding = encoding;
/* encoding has been normalized by init_fs_encoding() */ /* encoding has been normalized by init_fs_encoding() */
interp->fs_codec.utf8 = (strcmp(encoding, "utf-8") == 0); fs_codec->utf8 = (strcmp(encoding, "utf-8") == 0);
PyMem_RawFree(interp->fs_codec.errors); PyMem_RawFree(fs_codec->errors);
interp->fs_codec.errors = errors; fs_codec->errors = errors;
interp->fs_codec.error_handler = error_handler; fs_codec->error_handler = error_handler;
#ifdef _Py_FORCE_UTF8_FS_ENCODING #ifdef _Py_FORCE_UTF8_FS_ENCODING
assert(interp->fs_codec.utf8 == 1); assert(fs_codec->utf8 == 1);
#endif #endif
/* At this point, PyUnicode_EncodeFSDefault() and /* At this point, PyUnicode_EncodeFSDefault() and
@ -16089,8 +16092,8 @@ init_fs_codec(PyInterpreterState *interp)
/* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors /* Set Py_FileSystemDefaultEncoding and Py_FileSystemDefaultEncodeErrors
global configuration variables. */ global configuration variables. */
if (_Py_SetFileSystemEncoding(interp->fs_codec.encoding, if (_Py_SetFileSystemEncoding(fs_codec->encoding,
interp->fs_codec.errors) < 0) { fs_codec->errors) < 0) {
PyErr_NoMemory(); PyErr_NoMemory();
return -1; return -1;
} }
@ -16133,15 +16136,14 @@ _PyUnicode_InitEncodings(PyThreadState *tstate)
static void static void
_PyUnicode_FiniEncodings(PyThreadState *tstate) _PyUnicode_FiniEncodings(struct _Py_unicode_fs_codec *fs_codec)
{ {
PyInterpreterState *interp = tstate->interp; PyMem_RawFree(fs_codec->encoding);
PyMem_RawFree(interp->fs_codec.encoding); fs_codec->encoding = NULL;
interp->fs_codec.encoding = NULL; fs_codec->utf8 = 0;
interp->fs_codec.utf8 = 0; PyMem_RawFree(fs_codec->errors);
PyMem_RawFree(interp->fs_codec.errors); fs_codec->errors = NULL;
interp->fs_codec.errors = NULL; fs_codec->error_handler = _Py_ERROR_UNKNOWN;
interp->fs_codec.error_handler = _Py_ERROR_UNKNOWN;
} }
@ -16199,7 +16201,7 @@ _PyUnicode_Fini(PyThreadState *tstate)
unicode_clear_static_strings(); unicode_clear_static_strings();
} }
_PyUnicode_FiniEncodings(tstate); _PyUnicode_FiniEncodings(&tstate->interp->unicode.fs_codec);
} }