Issue #11303: Added shortcuts for utf8 and latin1 encodings.

Documented the list of optimized encodings as CPython implementation detail.
2025-12-09 10:37:17 +00:00 · 2011-02-25 19:19:57 +00:00 · 2011-02-25 19:19:57 +00:00 · 1d52146a25
commit 1d52146a25
parent eea22d2d66
2 changed files with 19 additions and 4 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
    char lower[11];  /* Enough for any encoding shortcut */

    if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_DecodeUTF8(s, size, errors);

    /* Shortcuts for common default encodings */
    if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
            return PyUnicode_DecodeUTF8(s, size, errors);
        else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                 (strcmp(lower, "iso-8859-1") == 0))
            return PyUnicode_DecodeLatin1(s, size, errors);
 #if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
    }

    if (encoding == NULL)
-        encoding = PyUnicode_GetDefaultEncoding();
+        return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
+                                    PyUnicode_GET_SIZE(unicode),
+                                    errors);

    /* Shortcuts for common default encodings */
    if (normalize_encoding(encoding, lower, sizeof(lower))) {
-        if (strcmp(lower, "utf-8") == 0)
+        if ((strcmp(lower, "utf-8") == 0) ||
+            (strcmp(lower, "utf8") == 0))
            return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
                                        PyUnicode_GET_SIZE(unicode),
                                        errors);
        else if ((strcmp(lower, "latin-1") == 0) ||
+                 (strcmp(lower, "latin1") == 0) ||
                 (strcmp(lower, "iso-8859-1") == 0))
            return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
                                          PyUnicode_GET_SIZE(unicode),