mirror of
https://github.com/python/cpython.git
synced 2025-11-03 11:23:31 +00:00
gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797)
ValueError is now raised if the same character is used in different roles.
This commit is contained in:
parent
2f2ddabd1a
commit
c8351a617b
3 changed files with 96 additions and 13 deletions
|
|
@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
|
|||
in TestDialectRegistry.
|
||||
"""
|
||||
def _test_arg_valid(self, ctor, arg):
|
||||
ctor(arg)
|
||||
self.assertRaises(TypeError, ctor)
|
||||
self.assertRaises(TypeError, ctor, None)
|
||||
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
|
||||
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter='')
|
||||
self.assertRaises(TypeError, ctor, arg, escapechar='')
|
||||
self.assertRaises(TypeError, ctor, arg, quotechar='')
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
|
||||
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
|
||||
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
|
||||
self.assertRaises(csv.Error, ctor, arg, 'foo')
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter=None)
|
||||
self.assertRaises(TypeError, ctor, arg, delimiter=1)
|
||||
self.assertRaises(TypeError, ctor, arg, escapechar=1)
|
||||
self.assertRaises(TypeError, ctor, arg, quotechar=1)
|
||||
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
|
||||
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
|
||||
|
|
@ -46,6 +52,40 @@ class Test_Csv(unittest.TestCase):
|
|||
quoting=csv.QUOTE_ALL, quotechar=None)
|
||||
self.assertRaises(TypeError, ctor, arg,
|
||||
quoting=csv.QUOTE_NONE, quotechar='')
|
||||
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
|
||||
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
|
||||
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
|
||||
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
|
||||
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
|
||||
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
|
||||
ctor(arg, delimiter=' ')
|
||||
ctor(arg, escapechar=' ')
|
||||
ctor(arg, quotechar=' ')
|
||||
ctor(arg, delimiter='\t', skipinitialspace=True)
|
||||
ctor(arg, escapechar='\t', skipinitialspace=True)
|
||||
ctor(arg, quotechar='\t', skipinitialspace=True)
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
delimiter=' ', skipinitialspace=True)
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
escapechar=' ', skipinitialspace=True)
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
quotechar=' ', skipinitialspace=True)
|
||||
ctor(arg, delimiter='^')
|
||||
ctor(arg, escapechar='^')
|
||||
ctor(arg, quotechar='^')
|
||||
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
|
||||
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
|
||||
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
|
||||
ctor(arg, delimiter='\x85')
|
||||
ctor(arg, escapechar='\x85')
|
||||
ctor(arg, quotechar='\x85')
|
||||
ctor(arg, lineterminator='\x85')
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
delimiter='\x85', lineterminator='\x85')
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
escapechar='\x85', lineterminator='\x85')
|
||||
self.assertRaises(ValueError, ctor, arg,
|
||||
quotechar='\x85', lineterminator='\x85')
|
||||
|
||||
def test_reader_arg_valid(self):
|
||||
self._test_arg_valid(csv.reader, [])
|
||||
|
|
@ -535,14 +575,6 @@ class TestDialectRegistry(unittest.TestCase):
|
|||
finally:
|
||||
csv.unregister_dialect('testC')
|
||||
|
||||
def test_bad_dialect(self):
|
||||
# Unknown parameter
|
||||
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
|
||||
# Bad values
|
||||
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
|
||||
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
|
||||
self.assertRaises(TypeError, csv.reader, [], quoting = 100)
|
||||
|
||||
def test_copy(self):
|
||||
for name in csv.list_dialects():
|
||||
dialect = csv.get_dialect(name)
|
||||
|
|
@ -1088,10 +1120,15 @@ class TestDialectValidity(unittest.TestCase):
|
|||
'"lineterminator" must be a string')
|
||||
|
||||
def test_invalid_chars(self):
|
||||
def create_invalid(field_name, value):
|
||||
def create_invalid(field_name, value, **kwargs):
|
||||
class mydialect(csv.Dialect):
|
||||
pass
|
||||
delimiter = ','
|
||||
quoting = csv.QUOTE_ALL
|
||||
quotechar = '"'
|
||||
lineterminator = '\r\n'
|
||||
setattr(mydialect, field_name, value)
|
||||
for field_name, value in kwargs.items():
|
||||
setattr(mydialect, field_name, value)
|
||||
d = mydialect()
|
||||
|
||||
for field_name in ("delimiter", "escapechar", "quotechar"):
|
||||
|
|
@ -1100,6 +1137,10 @@ class TestDialectValidity(unittest.TestCase):
|
|||
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
|
||||
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
|
||||
self.assertRaises(csv.Error, create_invalid, field_name, 5)
|
||||
self.assertRaises(ValueError, create_invalid, field_name, "\n")
|
||||
self.assertRaises(ValueError, create_invalid, field_name, "\r")
|
||||
self.assertRaises(ValueError, create_invalid, field_name, " ",
|
||||
skipinitialspace=True)
|
||||
|
||||
|
||||
class TestSniffer(unittest.TestCase):
|
||||
|
|
|
|||
|
|
@ -0,0 +1,3 @@
|
|||
Add more validation checks in the :class:`csv.Dialect` constructor.
|
||||
:exc:`ValueError` is now raised if the same character is used in different
|
||||
roles.
|
||||
|
|
@ -331,6 +331,33 @@ dialect_check_quoting(int quoting)
|
|||
return -1;
|
||||
}
|
||||
|
||||
static int
|
||||
dialect_check_char(const char *name, Py_UCS4 c, DialectObj *dialect)
|
||||
{
|
||||
if (c == '\r' || c == '\n' || (dialect->skipinitialspace && c == ' ')) {
|
||||
PyErr_Format(PyExc_ValueError, "bad %s value", name);
|
||||
return -1;
|
||||
}
|
||||
if (PyUnicode_FindChar(
|
||||
dialect->lineterminator, c, 0,
|
||||
PyUnicode_GET_LENGTH(dialect->lineterminator), 1) >= 0)
|
||||
{
|
||||
PyErr_Format(PyExc_ValueError, "bad %s or lineterminator value", name);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
dialect_check_chars(const char *name1, const char *name2, Py_UCS4 c1, Py_UCS4 c2)
|
||||
{
|
||||
if (c1 == c2 && c1 != NOT_SET) {
|
||||
PyErr_Format(PyExc_ValueError, "bad %s or %s value", name1, name2);
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
#define D_OFF(x) offsetof(DialectObj, x)
|
||||
|
||||
static struct PyMemberDef Dialect_memberlist[] = {
|
||||
|
|
@ -508,6 +535,18 @@ dialect_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
|||
PyErr_SetString(PyExc_TypeError, "lineterminator must be set");
|
||||
goto err;
|
||||
}
|
||||
if (dialect_check_char("delimiter", self->delimiter, self) ||
|
||||
dialect_check_char("escapechar", self->escapechar, self) ||
|
||||
dialect_check_char("quotechar", self->quotechar, self) ||
|
||||
dialect_check_chars("delimiter", "escapechar",
|
||||
self->delimiter, self->escapechar) ||
|
||||
dialect_check_chars("delimiter", "quotechar",
|
||||
self->delimiter, self->quotechar) ||
|
||||
dialect_check_chars("escapechar", "quotechar",
|
||||
self->escapechar, self->quotechar))
|
||||
{
|
||||
goto err;
|
||||
}
|
||||
|
||||
ret = Py_NewRef(self);
|
||||
err:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue