From bd534f03498c97273dc5bf00182e6405a3a92e01 Mon Sep 17 00:00:00 2001 From: Georg Brandl Date: Sun, 1 Aug 2010 08:49:18 +0000 Subject: [PATCH] #8821: do not rely on Unicode strings being terminated with a \u0000, rather explicitly check range before looking for a second surrogate character. --- Objects/unicodeobject.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Objects/unicodeobject.c b/Objects/unicodeobject.c index f2d666de126..bfd19ebbbfe 100644 --- a/Objects/unicodeobject.c +++ b/Objects/unicodeobject.c @@ -3734,7 +3734,7 @@ PyObject *PyUnicode_EncodeUnicodeEscape(const Py_UNICODE *s, ch2 = *s++; size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF && size) { ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; *p++ = '\\'; *p++ = 'U'; @@ -3976,7 +3976,7 @@ PyObject *PyUnicode_EncodeRawUnicodeEscape(const Py_UNICODE *s, ch2 = *s++; size--; - if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) { + if (ch2 >= 0xDC00 && ch2 <= 0xDFFF && size) { ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000; *p++ = '\\'; *p++ = 'U';