mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Issue #8923: cache str.encode() result
When a string is encoded to UTF-8 in strict mode, the result is cached into the object. Examples: str.encode(), str.encode('utf-8'), PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8", NULL).
This commit is contained in:
parent
f3fd733f92
commit
a5c68c3cb7
2 changed files with 21 additions and 9 deletions
|
@ -10,6 +10,11 @@ What's New in Python 3.3 Alpha 1?
|
||||||
Core and Builtins
|
Core and Builtins
|
||||||
-----------------
|
-----------------
|
||||||
|
|
||||||
|
- Issue #8923: When a string is encoded to UTF-8 in strict mode, the result is
|
||||||
|
cached into the object. Examples: str.encode(), str.encode('utf-8'),
|
||||||
|
PyUnicode_AsUTF8String() and PyUnicode_AsEncodedString(unicode, "utf-8",
|
||||||
|
NULL).
|
||||||
|
|
||||||
- Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats.
|
- Issue #10831: PyUnicode_FromFormat() supports %li, %lli and %zi formats.
|
||||||
|
|
||||||
- Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
|
- Issue #10829: Refactor PyUnicode_FromFormat(), use the same function to parse
|
||||||
|
|
|
@ -1710,17 +1710,21 @@ PyUnicode_AsEncodedString(PyObject *unicode,
|
||||||
}
|
}
|
||||||
|
|
||||||
if (encoding == NULL)
|
if (encoding == NULL)
|
||||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
return PyUnicode_AsUTF8String(unicode);
|
||||||
PyUnicode_GET_SIZE(unicode),
|
|
||||||
errors);
|
|
||||||
|
|
||||||
/* Shortcuts for common default encodings */
|
/* Shortcuts for common default encodings */
|
||||||
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
if (normalize_encoding(encoding, lower, sizeof(lower))) {
|
||||||
if ((strcmp(lower, "utf-8") == 0) ||
|
if ((strcmp(lower, "utf-8") == 0) ||
|
||||||
(strcmp(lower, "utf8") == 0))
|
(strcmp(lower, "utf8") == 0))
|
||||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
{
|
||||||
PyUnicode_GET_SIZE(unicode),
|
if (errors == NULL || strcmp(errors, "strict") == 0) {
|
||||||
errors);
|
return PyUnicode_AsUTF8String(unicode);
|
||||||
|
} else {
|
||||||
|
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
||||||
|
PyUnicode_GET_SIZE(unicode),
|
||||||
|
errors);
|
||||||
|
}
|
||||||
|
}
|
||||||
else if ((strcmp(lower, "latin-1") == 0) ||
|
else if ((strcmp(lower, "latin-1") == 0) ||
|
||||||
(strcmp(lower, "latin1") == 0) ||
|
(strcmp(lower, "latin1") == 0) ||
|
||||||
(strcmp(lower, "iso-8859-1") == 0))
|
(strcmp(lower, "iso-8859-1") == 0))
|
||||||
|
@ -3077,13 +3081,16 @@ PyUnicode_EncodeUTF8(const Py_UNICODE *s,
|
||||||
PyObject *
|
PyObject *
|
||||||
PyUnicode_AsUTF8String(PyObject *unicode)
|
PyUnicode_AsUTF8String(PyObject *unicode)
|
||||||
{
|
{
|
||||||
|
PyObject *utf8;
|
||||||
if (!PyUnicode_Check(unicode)) {
|
if (!PyUnicode_Check(unicode)) {
|
||||||
PyErr_BadArgument();
|
PyErr_BadArgument();
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
return PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(unicode),
|
utf8 = _PyUnicode_AsDefaultEncodedString(unicode);
|
||||||
PyUnicode_GET_SIZE(unicode),
|
if (utf8 == NULL)
|
||||||
NULL);
|
return NULL;
|
||||||
|
Py_INCREF(utf8);
|
||||||
|
return utf8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* --- UTF-32 Codec ------------------------------------------------------- */
|
/* --- UTF-32 Codec ------------------------------------------------------- */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue