[3.12] gh-115712: Support CSV dialects with delimiter=' ' and skipinitialspace=True (GH-115721) (GH-115729)

csv.writer() now quotes empty fields if delimiter is a space and
skipinitialspace is true and raises exception if quoting is not possible.
(cherry picked from commit 937d282150)
This commit is contained in:
Serhiy Storchaka 2024-02-20 19:53:29 +02:00 committed by GitHub
parent 20907ca844
commit 5ea86f496a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 92 additions and 8 deletions

View file

@ -46,6 +46,20 @@ class Test_Csv(unittest.TestCase):
quoting=csv.QUOTE_ALL, quotechar=None)
self.assertRaises(TypeError, ctor, arg,
quoting=csv.QUOTE_NONE, quotechar='')
ctor(arg, delimiter=' ')
ctor(arg, escapechar=' ')
ctor(arg, quotechar=' ')
ctor(arg, delimiter='\t', skipinitialspace=True)
ctor(arg, escapechar='\t', skipinitialspace=True)
ctor(arg, quotechar='\t', skipinitialspace=True)
ctor(arg, delimiter=' ', skipinitialspace=True)
ctor(arg, delimiter='^')
ctor(arg, escapechar='^')
ctor(arg, quotechar='^')
ctor(arg, delimiter='\x85')
ctor(arg, escapechar='\x85')
ctor(arg, quotechar='\x85')
ctor(arg, lineterminator='\x85')
def test_reader_arg_valid(self):
self._test_arg_valid(csv.reader, [])
@ -152,9 +166,6 @@ class Test_Csv(unittest.TestCase):
def test_write_arg_valid(self):
self._write_error_test(csv.Error, None)
self._write_test((), '')
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting = csv.QUOTE_NONE)
# Check that exceptions are passed up the chain
self._write_error_test(OSError, BadIterable())
class BadList:
@ -168,7 +179,6 @@ class Test_Csv(unittest.TestCase):
def __str__(self):
raise OSError
self._write_error_test(OSError, [BadItem()])
def test_write_bigfield(self):
# This exercises the buffer realloc functionality
bigstring = 'X' * 50000
@ -275,6 +285,49 @@ class Test_Csv(unittest.TestCase):
fileobj.seek(0)
self.assertEqual(fileobj.read(), 'a\r\n""\r\n')
def test_write_empty_fields(self):
self._write_test((), '')
self._write_test([''], '""')
self._write_error_test(csv.Error, [''], quoting=csv.QUOTE_NONE)
self._write_test([''], '""', quoting=csv.QUOTE_STRINGS)
self._write_test([''], '""', quoting=csv.QUOTE_NOTNULL)
self._write_test([None], '""')
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_STRINGS)
self._write_error_test(csv.Error, [None], quoting=csv.QUOTE_NOTNULL)
self._write_test(['', ''], ',')
self._write_test([None, None], ',')
def test_write_empty_fields_space_delimiter(self):
self._write_test([''], '""', delimiter=' ', skipinitialspace=False)
self._write_test([''], '""', delimiter=' ', skipinitialspace=True)
self._write_test([None], '""', delimiter=' ', skipinitialspace=False)
self._write_test([None], '""', delimiter=' ', skipinitialspace=True)
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True)
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False)
self._write_test([None, None], '"" ""', delimiter=' ', skipinitialspace=True)
self._write_test(['', ''], ' ', delimiter=' ', skipinitialspace=False,
quoting=csv.QUOTE_NONE)
self._write_error_test(csv.Error, ['', ''],
delimiter=' ', skipinitialspace=True,
quoting=csv.QUOTE_NONE)
for quoting in csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=False,
quoting=quoting)
self._write_test(['', ''], '"" ""', delimiter=' ', skipinitialspace=True,
quoting=quoting)
for quoting in csv.QUOTE_NONE, csv.QUOTE_STRINGS, csv.QUOTE_NOTNULL:
self._write_test([None, None], ' ', delimiter=' ', skipinitialspace=False,
quoting=quoting)
self._write_error_test(csv.Error, [None, None],
delimiter=' ', skipinitialspace=True,
quoting=quoting)
def test_writerows_errors(self):
with TemporaryFile("w+", encoding="utf-8", newline='') as fileobj:
writer = csv.writer(fileobj)
@ -376,6 +429,14 @@ class Test_Csv(unittest.TestCase):
[['no space', 'space', 'spaces', '\ttab']],
skipinitialspace=True)
def test_read_space_delimiter(self):
self._read_test(['a b', ' a ', ' ', ''],
[['a', '', '', 'b'], ['', '', 'a', '', ''], ['', '', ''], []],
delimiter=' ', skipinitialspace=False)
self._read_test(['a b', ' a ', ' ', ''],
[['a', 'b'], ['a', ''], [''], []],
delimiter=' ', skipinitialspace=True)
def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size
# limits.
@ -502,10 +563,10 @@ class TestDialectRegistry(unittest.TestCase):
escapechar = "\\"
with TemporaryFile("w+", encoding="utf-8") as fileobj:
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.write("abc def\nc1ccccc1 benzene\n")
fileobj.seek(0)
reader = csv.reader(fileobj, dialect=space())
self.assertEqual(next(reader), ["abc", "def"])
self.assertEqual(next(reader), ["abc", "", "", "def"])
self.assertEqual(next(reader), ["c1ccccc1", "benzene"])
def compare_dialect_123(self, expected, *writeargs, **kwwriteargs):