[3.11] gh-101006: Improve error handling when read marshal data (GH-101007) (GH-106227)

* EOFError no longer overrides other errors such as MemoryError or OSError at
  the start of the object.
* Raise more relevant error when the NULL object occurs as a code object
  component.
* Minimize an overhead of calling PyErr_Occurred().
(cherry picked from commit 8bf6904b22)

Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
Miss Islington (bot) 2023-06-29 03:47:30 -07:00 committed by GitHub
parent ce091c96cf
commit b937ca0a5a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 72 additions and 61 deletions

View file

@ -750,23 +750,28 @@ r_string(Py_ssize_t n, RFILE *p)
static int
r_byte(RFILE *p)
{
int c = EOF;
if (p->ptr != NULL) {
if (p->ptr < p->end)
c = (unsigned char) *p->ptr++;
return c;
if (p->ptr < p->end) {
return (unsigned char) *p->ptr++;
}
}
if (!p->readable) {
else if (!p->readable) {
assert(p->fp);
c = getc(p->fp);
int c = getc(p->fp);
if (c != EOF) {
return c;
}
}
else {
const char *ptr = r_string(1, p);
if (ptr != NULL)
c = *(const unsigned char *) ptr;
if (ptr != NULL) {
return *(const unsigned char *) ptr;
}
return EOF;
}
return c;
PyErr_SetString(PyExc_EOFError,
"EOF read where not expected");
return EOF;
}
static int
@ -827,10 +832,11 @@ r_PyLong(RFILE *p)
digit d;
n = r_long(p);
if (PyErr_Occurred())
return NULL;
if (n == 0)
return (PyObject *)_PyLong_New(0);
if (n == -1 && PyErr_Occurred()) {
return NULL;
}
if (n < -SIZE32_MAX || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (long size out of range)");
@ -849,10 +855,6 @@ r_PyLong(RFILE *p)
d = 0;
for (j=0; j < PyLong_MARSHAL_RATIO; j++) {
md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit;
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
@ -863,10 +865,6 @@ r_PyLong(RFILE *p)
d = 0;
for (j=0; j < shorts_in_top_digit; j++) {
md = r_short(p);
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
if (md < 0 || md > PyLong_MARSHAL_BASE)
goto bad_digit;
/* topmost marshal digit should be nonzero */
@ -878,18 +876,17 @@ r_PyLong(RFILE *p)
}
d += (digit)md << j*PyLong_MARSHAL_SHIFT;
}
if (PyErr_Occurred()) {
Py_DECREF(ob);
return NULL;
}
assert(!PyErr_Occurred());
/* top digit should be nonzero, else the resulting PyLong won't be
normalized */
ob->ob_digit[size-1] = d;
return (PyObject *)ob;
bad_digit:
Py_DECREF(ob);
PyErr_SetString(PyExc_ValueError,
"bad marshal data (digit out of range in long)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (digit out of range in long)");
}
return NULL;
}
@ -912,8 +909,6 @@ r_float_str(RFILE *p)
const char *ptr;
n = r_byte(p);
if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
return -1;
}
ptr = r_string(n, p);
@ -992,8 +987,10 @@ r_object(RFILE *p)
PyObject *retval = NULL;
if (code == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
if (PyErr_ExceptionMatches(PyExc_EOFError)) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
}
return NULL;
}
@ -1045,7 +1042,10 @@ r_object(RFILE *p)
case TYPE_INT:
n = r_long(p);
retval = PyErr_Occurred() ? NULL : PyLong_FromLong(n);
if (n == -1 && PyErr_Occurred()) {
break;
}
retval = PyLong_FromLong(n);
R_REF(retval);
break;
@ -1111,10 +1111,11 @@ r_object(RFILE *p)
{
const char *ptr;
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (bytes object size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (bytes object size out of range)");
}
break;
}
v = PyBytes_FromStringAndSize((char *)NULL, n);
@ -1136,10 +1137,11 @@ r_object(RFILE *p)
/* fall through */
case TYPE_ASCII:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break;
}
goto _read_ascii;
@ -1150,8 +1152,6 @@ r_object(RFILE *p)
case TYPE_SHORT_ASCII:
n = r_byte(p);
if (n == EOF) {
PyErr_SetString(PyExc_EOFError,
"EOF read where object expected");
break;
}
_read_ascii:
@ -1178,10 +1178,11 @@ r_object(RFILE *p)
const char *buffer;
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (string size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (string size out of range)");
}
break;
}
if (n != 0) {
@ -1203,16 +1204,18 @@ r_object(RFILE *p)
}
case TYPE_SMALL_TUPLE:
n = (unsigned char) r_byte(p);
if (PyErr_Occurred())
n = r_byte(p);
if (n == EOF) {
break;
}
goto _read_tuple;
case TYPE_TUPLE:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (tuple size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (tuple size out of range)");
}
break;
}
_read_tuple:
@ -1238,10 +1241,11 @@ r_object(RFILE *p)
case TYPE_LIST:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (list size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (list size out of range)");
}
break;
}
v = PyList_New(n);
@ -1296,10 +1300,11 @@ r_object(RFILE *p)
case TYPE_SET:
case TYPE_FROZENSET:
n = r_long(p);
if (PyErr_Occurred())
break;
if (n < 0 || n > SIZE32_MAX) {
PyErr_SetString(PyExc_ValueError, "bad marshal data (set size out of range)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (set size out of range)");
}
break;
}
@ -1377,20 +1382,20 @@ r_object(RFILE *p)
/* XXX ignore long->int overflows for now */
argcount = (int)r_long(p);
if (PyErr_Occurred())
if (argcount == -1 && PyErr_Occurred())
goto code_error;
posonlyargcount = (int)r_long(p);
if (PyErr_Occurred()) {
if (posonlyargcount == -1 && PyErr_Occurred()) {
goto code_error;
}
kwonlyargcount = (int)r_long(p);
if (PyErr_Occurred())
if (kwonlyargcount == -1 && PyErr_Occurred())
goto code_error;
stacksize = (int)r_long(p);
if (PyErr_Occurred())
if (stacksize == -1 && PyErr_Occurred())
goto code_error;
flags = (int)r_long(p);
if (PyErr_Occurred())
if (flags == -1 && PyErr_Occurred())
goto code_error;
code = r_object(p);
if (code == NULL)
@ -1463,6 +1468,10 @@ r_object(RFILE *p)
v = r_ref_insert(v, idx, flag, p);
code_error:
if (v == NULL && !PyErr_Occurred()) {
PyErr_SetString(PyExc_TypeError,
"NULL object in marshal data for code object");
}
Py_XDECREF(code);
Py_XDECREF(consts);
Py_XDECREF(names);
@ -1480,9 +1489,10 @@ r_object(RFILE *p)
case TYPE_REF:
n = r_long(p);
if (n < 0 || n >= PyList_GET_SIZE(p->refs)) {
if (n == -1 && PyErr_Occurred())
break;
PyErr_SetString(PyExc_ValueError, "bad marshal data (invalid reference)");
if (!PyErr_Occurred()) {
PyErr_SetString(PyExc_ValueError,
"bad marshal data (invalid reference)");
}
break;
}
v = PyList_GET_ITEM(p->refs, n);