mirror of
https://github.com/python/cpython.git
synced 2025-10-07 07:31:46 +00:00
bpo-47000: Add locale.getencoding()
(GH-32068)
This commit is contained in:
parent
cd29bd13ef
commit
6773203487
11 changed files with 88 additions and 46 deletions
|
@ -706,15 +706,15 @@ Glossary
|
||||||
|
|
||||||
locale encoding
|
locale encoding
|
||||||
On Unix, it is the encoding of the LC_CTYPE locale. It can be set with
|
On Unix, it is the encoding of the LC_CTYPE locale. It can be set with
|
||||||
``locale.setlocale(locale.LC_CTYPE, new_locale)``.
|
:func:`locale.setlocale(locale.LC_CTYPE, new_locale) <locale.setlocale>`.
|
||||||
|
|
||||||
On Windows, it is the ANSI code page (ex: ``cp1252``).
|
On Windows, it is the ANSI code page (ex: ``"cp1252"``).
|
||||||
|
|
||||||
``locale.getpreferredencoding(False)`` can be used to get the locale
|
On Android and VxWorks, Python uses ``"utf-8"`` as the locale encoding.
|
||||||
encoding.
|
|
||||||
|
|
||||||
Python uses the :term:`filesystem encoding and error handler` to convert
|
``locale.getencoding()`` can be used to get the locale encoding.
|
||||||
between Unicode filenames and bytes filenames.
|
|
||||||
|
See also the :term:`filesystem encoding and error handler`.
|
||||||
|
|
||||||
list
|
list
|
||||||
A built-in Python :term:`sequence`. Despite its name it is more akin
|
A built-in Python :term:`sequence`. Despite its name it is more akin
|
||||||
|
|
|
@ -327,17 +327,37 @@ The :mod:`locale` module defines the following exception and functions:
|
||||||
is not necessary or desired, *do_setlocale* should be set to ``False``.
|
is not necessary or desired, *do_setlocale* should be set to ``False``.
|
||||||
|
|
||||||
On Android or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, always
|
On Android or if the :ref:`Python UTF-8 Mode <utf8-mode>` is enabled, always
|
||||||
return ``'UTF-8'``, the :term:`locale encoding` and the *do_setlocale*
|
return ``'utf-8'``, the :term:`locale encoding` and the *do_setlocale*
|
||||||
argument are ignored.
|
argument are ignored.
|
||||||
|
|
||||||
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
|
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
|
||||||
locale. See also the :term:`filesystem encoding and error handler`.
|
locale. See also the :term:`filesystem encoding and error handler`.
|
||||||
|
|
||||||
.. versionchanged:: 3.7
|
.. versionchanged:: 3.7
|
||||||
The function now always returns ``UTF-8`` on Android or if the
|
The function now always returns ``"utf-8"`` on Android or if the
|
||||||
:ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
|
:ref:`Python UTF-8 Mode <utf8-mode>` is enabled.
|
||||||
|
|
||||||
|
|
||||||
|
.. function:: getencoding()
|
||||||
|
|
||||||
|
Get the current :term:`locale encoding`:
|
||||||
|
|
||||||
|
* On Android and VxWorks, return ``"utf-8"``.
|
||||||
|
* On Unix, return the encoding of the current :data:`LC_CTYPE` locale.
|
||||||
|
Return ``"utf-8"`` if ``nl_langinfo(CODESET)`` returns an empty string:
|
||||||
|
for example, if the current LC_CTYPE locale is not supported.
|
||||||
|
* On Windows, return the ANSI code page.
|
||||||
|
|
||||||
|
The :ref:`Python preinitialization <c-preinit>` configures the LC_CTYPE
|
||||||
|
locale. See also the :term:`filesystem encoding and error handler`.
|
||||||
|
|
||||||
|
This function is similar to
|
||||||
|
:func:`getpreferredencoding(False) <getpreferredencoding>` except this
|
||||||
|
function ignores the :ref:`Python UTF-8 Mode <utf8-mode>`.
|
||||||
|
|
||||||
|
.. versionadded:: 3.11
|
||||||
|
|
||||||
|
|
||||||
.. function:: normalize(localename)
|
.. function:: normalize(localename)
|
||||||
|
|
||||||
Returns a normalized locale code for the given locale name. The returned locale
|
Returns a normalized locale code for the given locale name. The returned locale
|
||||||
|
|
|
@ -285,6 +285,13 @@ inspect
|
||||||
* Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a
|
* Add :func:`inspect.ismethodwrapper` for checking if the type of an object is a
|
||||||
:class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.)
|
:class:`~types.MethodWrapperType`. (Contributed by Hakan Çelik in :issue:`29418`.)
|
||||||
|
|
||||||
|
locale
|
||||||
|
------
|
||||||
|
|
||||||
|
* Add :func:`locale.getencoding` to get the current locale encoding. It is similar to
|
||||||
|
``locale.getpreferredencoding(False)`` but ignores the
|
||||||
|
:ref:`Python UTF-8 Mode <utf8-mode>`.
|
||||||
|
|
||||||
math
|
math
|
||||||
----
|
----
|
||||||
|
|
||||||
|
|
|
@ -28,7 +28,7 @@ __all__ = ["getlocale", "getdefaultlocale", "getpreferredencoding", "Error",
|
||||||
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
|
"setlocale", "resetlocale", "localeconv", "strcoll", "strxfrm",
|
||||||
"str", "atof", "atoi", "format", "format_string", "currency",
|
"str", "atof", "atoi", "format", "format_string", "currency",
|
||||||
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
|
"normalize", "LC_CTYPE", "LC_COLLATE", "LC_TIME", "LC_MONETARY",
|
||||||
"LC_NUMERIC", "LC_ALL", "CHAR_MAX"]
|
"LC_NUMERIC", "LC_ALL", "CHAR_MAX", "getencoding"]
|
||||||
|
|
||||||
def _strcoll(a,b):
|
def _strcoll(a,b):
|
||||||
""" strcoll(string,string) -> int.
|
""" strcoll(string,string) -> int.
|
||||||
|
@ -637,19 +637,17 @@ def resetlocale(category=LC_ALL):
|
||||||
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from _locale import _get_locale_encoding
|
from _locale import getencoding
|
||||||
except ImportError:
|
except ImportError:
|
||||||
def _get_locale_encoding():
|
def getencoding():
|
||||||
if hasattr(sys, 'getandroidapilevel'):
|
if hasattr(sys, 'getandroidapilevel'):
|
||||||
# On Android langinfo.h and CODESET are missing, and UTF-8 is
|
# On Android langinfo.h and CODESET are missing, and UTF-8 is
|
||||||
# always used in mbstowcs() and wcstombs().
|
# always used in mbstowcs() and wcstombs().
|
||||||
return 'UTF-8'
|
return 'utf-8'
|
||||||
if sys.flags.utf8_mode:
|
|
||||||
return 'UTF-8'
|
|
||||||
encoding = getdefaultlocale()[1]
|
encoding = getdefaultlocale()[1]
|
||||||
if encoding is None:
|
if encoding is None:
|
||||||
# LANG not set, default conservatively to ASCII
|
# LANG not set, default to UTF-8
|
||||||
encoding = 'ascii'
|
encoding = 'utf-8'
|
||||||
return encoding
|
return encoding
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
@ -657,17 +655,19 @@ try:
|
||||||
except NameError:
|
except NameError:
|
||||||
def getpreferredencoding(do_setlocale=True):
|
def getpreferredencoding(do_setlocale=True):
|
||||||
"""Return the charset that the user is likely using."""
|
"""Return the charset that the user is likely using."""
|
||||||
return _get_locale_encoding()
|
if sys.flags.utf8_mode:
|
||||||
|
return 'utf-8'
|
||||||
|
return getencoding()
|
||||||
else:
|
else:
|
||||||
# On Unix, if CODESET is available, use that.
|
# On Unix, if CODESET is available, use that.
|
||||||
def getpreferredencoding(do_setlocale=True):
|
def getpreferredencoding(do_setlocale=True):
|
||||||
"""Return the charset that the user is likely using,
|
"""Return the charset that the user is likely using,
|
||||||
according to the system configuration."""
|
according to the system configuration."""
|
||||||
if sys.flags.utf8_mode:
|
if sys.flags.utf8_mode:
|
||||||
return 'UTF-8'
|
return 'utf-8'
|
||||||
|
|
||||||
if not do_setlocale:
|
if not do_setlocale:
|
||||||
return _get_locale_encoding()
|
return getencoding()
|
||||||
|
|
||||||
old_loc = setlocale(LC_CTYPE)
|
old_loc = setlocale(LC_CTYPE)
|
||||||
try:
|
try:
|
||||||
|
@ -675,7 +675,7 @@ else:
|
||||||
setlocale(LC_CTYPE, "")
|
setlocale(LC_CTYPE, "")
|
||||||
except Error:
|
except Error:
|
||||||
pass
|
pass
|
||||||
return _get_locale_encoding()
|
return getencoding()
|
||||||
finally:
|
finally:
|
||||||
setlocale(LC_CTYPE, old_loc)
|
setlocale(LC_CTYPE, old_loc)
|
||||||
|
|
||||||
|
|
|
@ -203,12 +203,12 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
def test_locale_getpreferredencoding(self):
|
def test_locale_getpreferredencoding(self):
|
||||||
code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
|
code = 'import locale; print(locale.getpreferredencoding(False), locale.getpreferredencoding(True))'
|
||||||
out = self.get_output('-X', 'utf8', '-c', code)
|
out = self.get_output('-X', 'utf8', '-c', code)
|
||||||
self.assertEqual(out, 'UTF-8 UTF-8')
|
self.assertEqual(out, 'utf-8 utf-8')
|
||||||
|
|
||||||
for loc in POSIX_LOCALES:
|
for loc in POSIX_LOCALES:
|
||||||
with self.subTest(LC_ALL=loc):
|
with self.subTest(LC_ALL=loc):
|
||||||
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
|
out = self.get_output('-X', 'utf8', '-c', code, LC_ALL=loc)
|
||||||
self.assertEqual(out, 'UTF-8 UTF-8')
|
self.assertEqual(out, 'utf-8 utf-8')
|
||||||
|
|
||||||
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
|
@unittest.skipIf(MS_WINDOWS, 'test specific to Unix')
|
||||||
def test_cmd_line(self):
|
def test_cmd_line(self):
|
||||||
|
@ -276,7 +276,7 @@ class UTF8ModeTests(unittest.TestCase):
|
||||||
# In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
|
# In UTF-8 Mode, device_encoding(fd) returns "UTF-8" if fd is a TTY
|
||||||
with open(filename, encoding="utf8") as fp:
|
with open(filename, encoding="utf8") as fp:
|
||||||
out = fp.read().rstrip()
|
out = fp.read().rstrip()
|
||||||
self.assertEqual(out, 'True UTF-8')
|
self.assertEqual(out, 'True utf-8')
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Add :func:`locale.getencoding` to get the current locale encoding.
|
||||||
|
It is similar to ``locale.getpreferredencoding(False)`` but ignores the
|
||||||
|
:ref:`Python UTF-8 Mode <utf8-mode>`.
|
|
@ -1145,7 +1145,13 @@ _io_TextIOWrapper___init___impl(textio *self, PyObject *buffer,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (encoding == NULL && self->encoding == NULL) {
|
if (encoding == NULL && self->encoding == NULL) {
|
||||||
|
if (_PyRuntime.preconfig.utf8_mode) {
|
||||||
|
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||||
|
self->encoding = Py_NewRef(&_Py_STR(utf_8));
|
||||||
|
}
|
||||||
|
else {
|
||||||
self->encoding = _Py_GetLocaleEncodingObject();
|
self->encoding = _Py_GetLocaleEncodingObject();
|
||||||
|
}
|
||||||
if (self->encoding == NULL) {
|
if (self->encoding == NULL) {
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
|
@ -773,14 +773,14 @@ _locale_bind_textdomain_codeset_impl(PyObject *module, const char *domain,
|
||||||
|
|
||||||
|
|
||||||
/*[clinic input]
|
/*[clinic input]
|
||||||
_locale._get_locale_encoding
|
_locale.getencoding
|
||||||
|
|
||||||
Get the current locale encoding.
|
Get the current locale encoding.
|
||||||
[clinic start generated code]*/
|
[clinic start generated code]*/
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_locale__get_locale_encoding_impl(PyObject *module)
|
_locale_getencoding_impl(PyObject *module)
|
||||||
/*[clinic end generated code: output=e8e2f6f6f184591a input=513d9961d2f45c76]*/
|
/*[clinic end generated code: output=86b326b971872e46 input=6503d11e5958b360]*/
|
||||||
{
|
{
|
||||||
return _Py_GetLocaleEncodingObject();
|
return _Py_GetLocaleEncodingObject();
|
||||||
}
|
}
|
||||||
|
@ -811,7 +811,7 @@ static struct PyMethodDef PyLocale_Methods[] = {
|
||||||
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
_LOCALE__GET_LOCALE_ENCODING_METHODDEF
|
_LOCALE_GETENCODING_METHODDEF
|
||||||
{NULL, NULL}
|
{NULL, NULL}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
16
Modules/clinic/_localemodule.c.h
generated
16
Modules/clinic/_localemodule.c.h
generated
|
@ -545,22 +545,22 @@ exit:
|
||||||
|
|
||||||
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
|
#endif /* defined(HAVE_LIBINTL_H) && defined(HAVE_BIND_TEXTDOMAIN_CODESET) */
|
||||||
|
|
||||||
PyDoc_STRVAR(_locale__get_locale_encoding__doc__,
|
PyDoc_STRVAR(_locale_getencoding__doc__,
|
||||||
"_get_locale_encoding($module, /)\n"
|
"getencoding($module, /)\n"
|
||||||
"--\n"
|
"--\n"
|
||||||
"\n"
|
"\n"
|
||||||
"Get the current locale encoding.");
|
"Get the current locale encoding.");
|
||||||
|
|
||||||
#define _LOCALE__GET_LOCALE_ENCODING_METHODDEF \
|
#define _LOCALE_GETENCODING_METHODDEF \
|
||||||
{"_get_locale_encoding", (PyCFunction)_locale__get_locale_encoding, METH_NOARGS, _locale__get_locale_encoding__doc__},
|
{"getencoding", (PyCFunction)_locale_getencoding, METH_NOARGS, _locale_getencoding__doc__},
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_locale__get_locale_encoding_impl(PyObject *module);
|
_locale_getencoding_impl(PyObject *module);
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
_locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
|
_locale_getencoding(PyObject *module, PyObject *Py_UNUSED(ignored))
|
||||||
{
|
{
|
||||||
return _locale__get_locale_encoding_impl(module);
|
return _locale_getencoding_impl(module);
|
||||||
}
|
}
|
||||||
|
|
||||||
#ifndef _LOCALE_STRCOLL_METHODDEF
|
#ifndef _LOCALE_STRCOLL_METHODDEF
|
||||||
|
@ -602,4 +602,4 @@ _locale__get_locale_encoding(PyObject *module, PyObject *Py_UNUSED(ignored))
|
||||||
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
#ifndef _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||||
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
#define _LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF
|
||||||
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
|
#endif /* !defined(_LOCALE_BIND_TEXTDOMAIN_CODESET_METHODDEF) */
|
||||||
/*[clinic end generated code: output=cd703c8a3a75fcf4 input=a9049054013a1b77]*/
|
/*[clinic end generated code: output=ea71e9b94bdaa47d input=a9049054013a1b77]*/
|
||||||
|
|
|
@ -93,6 +93,10 @@ _Py_device_encoding(int fd)
|
||||||
|
|
||||||
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
|
return PyUnicode_FromFormat("cp%u", (unsigned int)cp);
|
||||||
#else
|
#else
|
||||||
|
if (_PyRuntime.preconfig.utf8_mode) {
|
||||||
|
_Py_DECLARE_STR(utf_8, "utf-8");
|
||||||
|
return Py_NewRef(&_Py_STR(utf_8));
|
||||||
|
}
|
||||||
return _Py_GetLocaleEncodingObject();
|
return _Py_GetLocaleEncodingObject();
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
@ -873,10 +877,10 @@ _Py_EncodeLocaleEx(const wchar_t *text, char **str,
|
||||||
|
|
||||||
// Get the current locale encoding name:
|
// Get the current locale encoding name:
|
||||||
//
|
//
|
||||||
// - Return "UTF-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
|
// - Return "utf-8" if _Py_FORCE_UTF8_LOCALE macro is defined (ex: on Android)
|
||||||
// - Return "UTF-8" if the UTF-8 Mode is enabled
|
// - Return "utf-8" if the UTF-8 Mode is enabled
|
||||||
// - On Windows, return the ANSI code page (ex: "cp1250")
|
// - On Windows, return the ANSI code page (ex: "cp1250")
|
||||||
// - Return "UTF-8" if nl_langinfo(CODESET) returns an empty string.
|
// - Return "utf-8" if nl_langinfo(CODESET) returns an empty string.
|
||||||
// - Otherwise, return nl_langinfo(CODESET).
|
// - Otherwise, return nl_langinfo(CODESET).
|
||||||
//
|
//
|
||||||
// Return NULL on memory allocation failure.
|
// Return NULL on memory allocation failure.
|
||||||
|
@ -888,12 +892,8 @@ _Py_GetLocaleEncoding(void)
|
||||||
#ifdef _Py_FORCE_UTF8_LOCALE
|
#ifdef _Py_FORCE_UTF8_LOCALE
|
||||||
// On Android langinfo.h and CODESET are missing,
|
// On Android langinfo.h and CODESET are missing,
|
||||||
// and UTF-8 is always used in mbstowcs() and wcstombs().
|
// and UTF-8 is always used in mbstowcs() and wcstombs().
|
||||||
return _PyMem_RawWcsdup(L"UTF-8");
|
return _PyMem_RawWcsdup(L"utf-8");
|
||||||
#else
|
#else
|
||||||
const PyPreConfig *preconfig = &_PyRuntime.preconfig;
|
|
||||||
if (preconfig->utf8_mode) {
|
|
||||||
return _PyMem_RawWcsdup(L"UTF-8");
|
|
||||||
}
|
|
||||||
|
|
||||||
#ifdef MS_WINDOWS
|
#ifdef MS_WINDOWS
|
||||||
wchar_t encoding[23];
|
wchar_t encoding[23];
|
||||||
|
@ -906,7 +906,7 @@ _Py_GetLocaleEncoding(void)
|
||||||
if (!encoding || encoding[0] == '\0') {
|
if (!encoding || encoding[0] == '\0') {
|
||||||
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
|
// Use UTF-8 if nl_langinfo() returns an empty string. It can happen on
|
||||||
// macOS if the LC_CTYPE locale is not supported.
|
// macOS if the LC_CTYPE locale is not supported.
|
||||||
return _PyMem_RawWcsdup(L"UTF-8");
|
return _PyMem_RawWcsdup(L"utf-8");
|
||||||
}
|
}
|
||||||
|
|
||||||
wchar_t *wstr;
|
wchar_t *wstr;
|
||||||
|
|
|
@ -1779,7 +1779,13 @@ static PyStatus
|
||||||
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
|
config_get_locale_encoding(PyConfig *config, const PyPreConfig *preconfig,
|
||||||
wchar_t **locale_encoding)
|
wchar_t **locale_encoding)
|
||||||
{
|
{
|
||||||
wchar_t *encoding = _Py_GetLocaleEncoding();
|
wchar_t *encoding;
|
||||||
|
if (preconfig->utf8_mode) {
|
||||||
|
encoding = _PyMem_RawWcsdup(L"utf-8");
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
encoding = _Py_GetLocaleEncoding();
|
||||||
|
}
|
||||||
if (encoding == NULL) {
|
if (encoding == NULL) {
|
||||||
return _PyStatus_NO_MEMORY();
|
return _PyStatus_NO_MEMORY();
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue