mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
Fix a bug in the memory reallocation code of PyUnicode_TranslateCharmap().
charmaptranslate_makespace() allocated more memory than required for the next replacement but didn't remember that fact, so memory size was growing exponentially every time a replacement string is longer that one character. This fixes SF bug #828737.
This commit is contained in:
parent
6a5b027742
commit
4894c30626
2 changed files with 32 additions and 19 deletions
|
@ -690,6 +690,18 @@ class CodecCallbackTest(unittest.TestCase):
|
||||||
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
|
self.assertRaises(TypeError, u"\xff".translate, {0xff: sys.maxunicode+1})
|
||||||
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
|
self.assertRaises(TypeError, u"\xff".translate, {0xff: ()})
|
||||||
|
|
||||||
|
def test_bug828737(self):
|
||||||
|
charmap = {
|
||||||
|
ord("&"): u"&",
|
||||||
|
ord("<"): u"<",
|
||||||
|
ord(">"): u">",
|
||||||
|
ord('"'): u""",
|
||||||
|
}
|
||||||
|
|
||||||
|
for n in (1, 10, 100, 1000):
|
||||||
|
text = u'abc<def>ghi'*n
|
||||||
|
text.translate(charmap)
|
||||||
|
|
||||||
def test_main():
|
def test_main():
|
||||||
test.test_support.run_unittest(CodecCallbackTest)
|
test.test_support.run_unittest(CodecCallbackTest)
|
||||||
|
|
||||||
|
|
|
@ -3222,19 +3222,19 @@ int charmaptranslate_lookup(Py_UNICODE c, PyObject *mapping, PyObject **result)
|
||||||
if not reallocate and adjust various state variables.
|
if not reallocate and adjust various state variables.
|
||||||
Return 0 on success, -1 on error */
|
Return 0 on success, -1 on error */
|
||||||
static
|
static
|
||||||
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsize,
|
int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp,
|
||||||
int requiredsize)
|
int requiredsize)
|
||||||
{
|
{
|
||||||
if (requiredsize > *outsize) {
|
int oldsize = PyUnicode_GET_SIZE(*outobj);
|
||||||
|
if (requiredsize > oldsize) {
|
||||||
/* remember old output position */
|
/* remember old output position */
|
||||||
int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
|
int outpos = *outp-PyUnicode_AS_UNICODE(*outobj);
|
||||||
/* exponentially overallocate to minimize reallocations */
|
/* exponentially overallocate to minimize reallocations */
|
||||||
if (requiredsize < 2 * *outsize)
|
if (requiredsize < 2 * oldsize)
|
||||||
requiredsize = 2 * *outsize;
|
requiredsize = 2 * oldsize;
|
||||||
if (_PyUnicode_Resize(outobj, requiredsize) < 0)
|
if (_PyUnicode_Resize(outobj, requiredsize) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
|
*outp = PyUnicode_AS_UNICODE(*outobj) + outpos;
|
||||||
*outsize = requiredsize;
|
|
||||||
}
|
}
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
@ -3245,14 +3245,15 @@ int charmaptranslate_makespace(PyObject **outobj, Py_UNICODE **outp, int *outsiz
|
||||||
The called must decref result.
|
The called must decref result.
|
||||||
Return 0 on success, -1 on error. */
|
Return 0 on success, -1 on error. */
|
||||||
static
|
static
|
||||||
int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
|
int charmaptranslate_output(const Py_UNICODE *startinp, const Py_UNICODE *curinp,
|
||||||
PyObject **outobj, int *outsize, Py_UNICODE **outp, PyObject **res)
|
int insize, PyObject *mapping, PyObject **outobj, Py_UNICODE **outp,
|
||||||
|
PyObject **res)
|
||||||
{
|
{
|
||||||
if (charmaptranslate_lookup(c, mapping, res))
|
if (charmaptranslate_lookup(*curinp, mapping, res))
|
||||||
return -1;
|
return -1;
|
||||||
if (*res==NULL) {
|
if (*res==NULL) {
|
||||||
/* not found => default to 1:1 mapping */
|
/* not found => default to 1:1 mapping */
|
||||||
*(*outp)++ = (Py_UNICODE)c;
|
*(*outp)++ = *curinp;
|
||||||
}
|
}
|
||||||
else if (*res==Py_None)
|
else if (*res==Py_None)
|
||||||
;
|
;
|
||||||
|
@ -3268,8 +3269,10 @@ int charmaptranslate_output(Py_UNICODE c, PyObject *mapping,
|
||||||
}
|
}
|
||||||
else if (repsize!=0) {
|
else if (repsize!=0) {
|
||||||
/* more than one character */
|
/* more than one character */
|
||||||
int requiredsize = *outsize + repsize - 1;
|
int requiredsize = (*outp-PyUnicode_AS_UNICODE(*outobj)) +
|
||||||
if (charmaptranslate_makespace(outobj, outp, outsize, requiredsize))
|
(insize - (*curinp-*startinp)) +
|
||||||
|
repsize - 1;
|
||||||
|
if (charmaptranslate_makespace(outobj, outp, requiredsize))
|
||||||
return -1;
|
return -1;
|
||||||
memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
|
memcpy(*outp, PyUnicode_AS_UNICODE(*res), sizeof(Py_UNICODE)*repsize);
|
||||||
*outp += repsize;
|
*outp += repsize;
|
||||||
|
@ -3294,7 +3297,6 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
Py_UNICODE *str;
|
Py_UNICODE *str;
|
||||||
/* current output position */
|
/* current output position */
|
||||||
int respos = 0;
|
int respos = 0;
|
||||||
int ressize;
|
|
||||||
char *reason = "character maps to <undefined>";
|
char *reason = "character maps to <undefined>";
|
||||||
PyObject *errorHandler = NULL;
|
PyObject *errorHandler = NULL;
|
||||||
PyObject *exc = NULL;
|
PyObject *exc = NULL;
|
||||||
|
@ -3312,16 +3314,15 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
replacements, if we need more, we'll resize */
|
replacements, if we need more, we'll resize */
|
||||||
res = PyUnicode_FromUnicode(NULL, size);
|
res = PyUnicode_FromUnicode(NULL, size);
|
||||||
if (res == NULL)
|
if (res == NULL)
|
||||||
goto onError;
|
goto onError;
|
||||||
if (size == 0)
|
if (size == 0)
|
||||||
return res;
|
return res;
|
||||||
str = PyUnicode_AS_UNICODE(res);
|
str = PyUnicode_AS_UNICODE(res);
|
||||||
ressize = size;
|
|
||||||
|
|
||||||
while (p<endp) {
|
while (p<endp) {
|
||||||
/* try to encode it */
|
/* try to encode it */
|
||||||
PyObject *x = NULL;
|
PyObject *x = NULL;
|
||||||
if (charmaptranslate_output(*p, mapping, &res, &ressize, &str, &x)) {
|
if (charmaptranslate_output(startp, p, size, mapping, &res, &str, &x)) {
|
||||||
Py_XDECREF(x);
|
Py_XDECREF(x);
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
@ -3340,7 +3341,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
|
|
||||||
/* find all untranslatable characters */
|
/* find all untranslatable characters */
|
||||||
while (collend < endp) {
|
while (collend < endp) {
|
||||||
if (charmaptranslate_lookup(*collend, mapping, &x))
|
if (charmaptranslate_lookup(*collend, mapping, &x))
|
||||||
goto onError;
|
goto onError;
|
||||||
Py_XDECREF(x);
|
Py_XDECREF(x);
|
||||||
if (x!=Py_None)
|
if (x!=Py_None)
|
||||||
|
@ -3379,7 +3380,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
char buffer[2+29+1+1];
|
char buffer[2+29+1+1];
|
||||||
char *cp;
|
char *cp;
|
||||||
sprintf(buffer, "&#%d;", (int)*p);
|
sprintf(buffer, "&#%d;", (int)*p);
|
||||||
if (charmaptranslate_makespace(&res, &str, &ressize,
|
if (charmaptranslate_makespace(&res, &str,
|
||||||
(str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
|
(str-PyUnicode_AS_UNICODE(res))+strlen(buffer)+(endp-collend)))
|
||||||
goto onError;
|
goto onError;
|
||||||
for (cp = buffer; *cp; ++cp)
|
for (cp = buffer; *cp; ++cp)
|
||||||
|
@ -3395,7 +3396,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
goto onError;
|
goto onError;
|
||||||
/* generate replacement */
|
/* generate replacement */
|
||||||
repsize = PyUnicode_GET_SIZE(repunicode);
|
repsize = PyUnicode_GET_SIZE(repunicode);
|
||||||
if (charmaptranslate_makespace(&res, &str, &ressize,
|
if (charmaptranslate_makespace(&res, &str,
|
||||||
(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
|
(str-PyUnicode_AS_UNICODE(res))+repsize+(endp-collend))) {
|
||||||
Py_DECREF(repunicode);
|
Py_DECREF(repunicode);
|
||||||
goto onError;
|
goto onError;
|
||||||
|
@ -3409,7 +3410,7 @@ PyObject *PyUnicode_TranslateCharmap(const Py_UNICODE *p,
|
||||||
}
|
}
|
||||||
/* Resize if we allocated to much */
|
/* Resize if we allocated to much */
|
||||||
respos = str-PyUnicode_AS_UNICODE(res);
|
respos = str-PyUnicode_AS_UNICODE(res);
|
||||||
if (respos<ressize) {
|
if (respos<PyUnicode_GET_SIZE(res)) {
|
||||||
if (_PyUnicode_Resize(&res, respos) < 0)
|
if (_PyUnicode_Resize(&res, respos) < 0)
|
||||||
goto onError;
|
goto onError;
|
||||||
}
|
}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue