mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.
This fix means that such values are correctly roundtripped, since cvs.writer already does the correct escaping. Patch by Michael Johnson.
This commit is contained in:
parent
04cbe0c35b
commit
c7c42efb16
4 changed files with 25 additions and 1 deletions
|
@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
|
||||||
for i, row in enumerate(csv.reader(fileobj)):
|
for i, row in enumerate(csv.reader(fileobj)):
|
||||||
self.assertEqual(row, rows[i])
|
self.assertEqual(row, rows[i])
|
||||||
|
|
||||||
|
def test_roundtrip_escaped_unquoted_newlines(self):
|
||||||
|
with TemporaryFile("w+", newline='') as fileobj:
|
||||||
|
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
|
||||||
|
rows = [['a\nb','b'],['c','x\r\nd']]
|
||||||
|
writer.writerows(rows)
|
||||||
|
fileobj.seek(0)
|
||||||
|
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
|
||||||
|
self.assertEqual(row,rows[i])
|
||||||
|
|
||||||
class TestDialectRegistry(unittest.TestCase):
|
class TestDialectRegistry(unittest.TestCase):
|
||||||
def test_registry_badargs(self):
|
def test_registry_badargs(self):
|
||||||
self.assertRaises(TypeError, csv.list_dialects, None)
|
self.assertRaises(TypeError, csv.list_dialects, None)
|
||||||
|
|
|
@ -591,6 +591,7 @@ Orjan Johansen
|
||||||
Fredrik Johansson
|
Fredrik Johansson
|
||||||
Gregory K. Johnson
|
Gregory K. Johnson
|
||||||
Kent Johnson
|
Kent Johnson
|
||||||
|
Michael Johnson
|
||||||
Simon Johnston
|
Simon Johnston
|
||||||
Matt Joiner
|
Matt Joiner
|
||||||
Thomas Jollans
|
Thomas Jollans
|
||||||
|
|
|
@ -289,6 +289,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #15927: CVS now correctly parses escaped newlines and carriage
|
||||||
|
when parsing with quoting turned off.
|
||||||
|
|
||||||
- Issue #17467: add readline and readlines support to mock_open in
|
- Issue #17467: add readline and readlines support to mock_open in
|
||||||
unittest.mock.
|
unittest.mock.
|
||||||
|
|
||||||
|
|
|
@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
|
||||||
typedef enum {
|
typedef enum {
|
||||||
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
|
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
|
||||||
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
|
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
|
||||||
EAT_CRNL
|
EAT_CRNL,AFTER_ESCAPED_CRNL
|
||||||
} ParserState;
|
} ParserState;
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
||||||
break;
|
break;
|
||||||
|
|
||||||
case ESCAPED_CHAR:
|
case ESCAPED_CHAR:
|
||||||
|
if (c == '\n' | c=='\r') {
|
||||||
|
if (parse_add_char(self, c) < 0)
|
||||||
|
return -1;
|
||||||
|
self->state = AFTER_ESCAPED_CRNL;
|
||||||
|
break;
|
||||||
|
}
|
||||||
if (c == '\0')
|
if (c == '\0')
|
||||||
c = '\n';
|
c = '\n';
|
||||||
if (parse_add_char(self, c) < 0)
|
if (parse_add_char(self, c) < 0)
|
||||||
|
@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
|
||||||
self->state = IN_FIELD;
|
self->state = IN_FIELD;
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case AFTER_ESCAPED_CRNL:
|
||||||
|
if (c == '\0')
|
||||||
|
break;
|
||||||
|
/*fallthru*/
|
||||||
|
|
||||||
case IN_FIELD:
|
case IN_FIELD:
|
||||||
/* in unquoted field */
|
/* in unquoted field */
|
||||||
if (c == '\n' || c == '\r' || c == '\0') {
|
if (c == '\n' || c == '\r' || c == '\0') {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue