gh-113796: Add more validation checks in the csv.Dialect constructor (GH-113797)

ValueError is now raised if the same character is used in different roles.
This commit is contained in:
Serhiy Storchaka 2024-01-22 15:34:16 +02:00 committed by GitHub
parent 2f2ddabd1a
commit c8351a617b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 96 additions and 13 deletions

View file

@ -28,14 +28,20 @@ class Test_Csv(unittest.TestCase):
in TestDialectRegistry.
"""
def _test_arg_valid(self, ctor, arg):
ctor(arg)
self.assertRaises(TypeError, ctor)
self.assertRaises(TypeError, ctor, None)
self.assertRaises(TypeError, ctor, arg, bad_attr = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 0)
self.assertRaises(TypeError, ctor, arg, delimiter = 'XX')
self.assertRaises(TypeError, ctor, arg, bad_attr=0)
self.assertRaises(TypeError, ctor, arg, delimiter='')
self.assertRaises(TypeError, ctor, arg, escapechar='')
self.assertRaises(TypeError, ctor, arg, quotechar='')
self.assertRaises(TypeError, ctor, arg, delimiter='^^')
self.assertRaises(TypeError, ctor, arg, escapechar='^^')
self.assertRaises(TypeError, ctor, arg, quotechar='^^')
self.assertRaises(csv.Error, ctor, arg, 'foo')
self.assertRaises(TypeError, ctor, arg, delimiter=None)
self.assertRaises(TypeError, ctor, arg, delimiter=1)
self.assertRaises(TypeError, ctor, arg, escapechar=1)
self.assertRaises(TypeError, ctor, arg, quotechar=1)
self.assertRaises(TypeError, ctor, arg, lineterminator=None)
self.assertRaises(TypeError, ctor, arg, lineterminator=1)
@ -46,6 +52,40 @@ class Test_Csv(unittest.TestCase):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
self.assertRaises(ValueError, ctor, arg, delimiter='\n')
self.assertRaises(ValueError, ctor, arg, escapechar='\n')
self.assertRaises(ValueError, ctor, arg, quotechar='\n')
self.assertRaises(ValueError, ctor, arg, delimiter='\r')
self.assertRaises(ValueError, ctor, arg, escapechar='\r')
self.assertRaises(ValueError, ctor, arg, quotechar='\r')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
delimiter=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
escapechar=' ', skipinitialspace=True)
self.assertRaises(ValueError, ctor, arg,
quotechar=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', escapechar='^')
self.assertRaises(ValueError, ctor, arg, delimiter='^', quotechar='^')
self.assertRaises(ValueError, ctor, arg, escapechar='^', quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
delimiter='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
escapechar='\x85', lineterminator='\x85')
self.assertRaises(ValueError, ctor, arg,
quotechar='\x85', lineterminator='\x85')
def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
@ -535,14 +575,6 @@ class TestDialectRegistry(unittest.TestCase):
finally:
csv.unregister_dialect('testC')
def test_bad_dialect(self):
# Unknown parameter
self.assertRaises(TypeError, csv.reader, [], bad_attr = 0)
# Bad values
self.assertRaises(TypeError, csv.reader, [], delimiter = None)
self.assertRaises(TypeError, csv.reader, [], quoting = -1)
self.assertRaises(TypeError, csv.reader, [], quoting = 100)
def test_copy(self):
for name in csv.list_dialects():
dialect = csv.get_dialect(name)
@ -1088,10 +1120,15 @@ class TestDialectValidity(unittest.TestCase):
'"lineterminator" must be a string')
def test_invalid_chars(self):
def create_invalid(field_name, value):
def create_invalid(field_name, value, **kwargs):
class mydialect(csv.Dialect):
pass
delimiter = ','
quoting = csv.QUOTE_ALL
quotechar = '"'
lineterminator = '\r\n'
setattr(mydialect, field_name, value)
for field_name, value in kwargs.items():
setattr(mydialect, field_name, value)
d = mydialect()
for field_name in ("delimiter", "escapechar", "quotechar"):
@ -1100,6 +1137,10 @@ class TestDialectValidity(unittest.TestCase):
self.assertRaises(csv.Error, create_invalid, field_name, "abc")
self.assertRaises(csv.Error, create_invalid, field_name, b'x')
self.assertRaises(csv.Error, create_invalid, field_name, 5)
self.assertRaises(ValueError, create_invalid, field_name, "\n")
self.assertRaises(ValueError, create_invalid, field_name, "\r")
self.assertRaises(ValueError, create_invalid, field_name, " ",
skipinitialspace=True)
class TestSniffer(unittest.TestCase):