Issue #11303: Added shortcuts for utf8 and latin1 encodings.

Documented the list of optimized encodings as CPython implementation
detail.
This commit is contained in:
Alexander Belopolsky 2011-02-25 19:19:57 +00:00
parent eea22d2d66
commit 1d52146a25
2 changed files with 19 additions and 4 deletions

View file

@ -1462,13 +1462,15 @@ PyObject *PyUnicode_Decode(const char *s,
char lower[11]; /* Enough for any encoding shortcut */
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
return PyUnicode_DecodeUTF8(s, size, errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if (strcmp(lower, "utf-8") == 0)
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
return PyUnicode_DecodeUTF8(s, size, errors);
else if ((strcmp(lower, "latin-1") == 0) ||
(strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_DecodeLatin1(s, size, errors);
#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
@ -1670,15 +1672,19 @@ PyObject *PyUnicode_AsEncodedString(PyObject *unicode,
}
if (encoding == NULL)
encoding = PyUnicode_GetDefaultEncoding();
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
/* Shortcuts for common default encodings */
if (normalize_encoding(encoding, lower, sizeof(lower))) {
if (strcmp(lower, "utf-8") == 0)
if ((strcmp(lower, "utf-8") == 0) ||
(strcmp(lower, "utf8") == 0))
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),
errors);
else if ((strcmp(lower, "latin-1") == 0) ||
(strcmp(lower, "latin1") == 0) ||
(strcmp(lower, "iso-8859-1") == 0))
return PyUnicode_EncodeLatin1(PyUnicode_AS_UNICODE(unicode),
PyUnicode_GET_SIZE(unicode),