#15927: Fix cvs.reader parsing of escaped \r\n with quoting off.

This fix means that such values are correctly roundtripped, since
cvs.writer already does the correct escaping.

Patch by Michael Johnson.
This commit is contained in:
R David Murray 2013-03-19 22:41:47 -04:00
parent 04cbe0c35b
commit c7c42efb16
4 changed files with 25 additions and 1 deletions

View file

@ -308,6 +308,15 @@ class Test_Csv(unittest.TestCase):
for i, row in enumerate(csv.reader(fileobj)): for i, row in enumerate(csv.reader(fileobj)):
self.assertEqual(row, rows[i]) self.assertEqual(row, rows[i])
def test_roundtrip_escaped_unquoted_newlines(self):
with TemporaryFile("w+", newline='') as fileobj:
writer = csv.writer(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")
rows = [['a\nb','b'],['c','x\r\nd']]
writer.writerows(rows)
fileobj.seek(0)
for i, row in enumerate(csv.reader(fileobj,quoting=csv.QUOTE_NONE,escapechar="\\")):
self.assertEqual(row,rows[i])
class TestDialectRegistry(unittest.TestCase): class TestDialectRegistry(unittest.TestCase):
def test_registry_badargs(self): def test_registry_badargs(self):
self.assertRaises(TypeError, csv.list_dialects, None) self.assertRaises(TypeError, csv.list_dialects, None)

View file

@ -591,6 +591,7 @@ Orjan Johansen
Fredrik Johansson Fredrik Johansson
Gregory K. Johnson Gregory K. Johnson
Kent Johnson Kent Johnson
Michael Johnson
Simon Johnston Simon Johnston
Matt Joiner Matt Joiner
Thomas Jollans Thomas Jollans

View file

@ -289,6 +289,9 @@ Core and Builtins
Library Library
------- -------
- Issue #15927: CVS now correctly parses escaped newlines and carriage
when parsing with quoting turned off.
- Issue #17467: add readline and readlines support to mock_open in - Issue #17467: add readline and readlines support to mock_open in
unittest.mock. unittest.mock.

View file

@ -51,7 +51,7 @@ static struct PyModuleDef _csvmodule;
typedef enum { typedef enum {
START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD, START_RECORD, START_FIELD, ESCAPED_CHAR, IN_FIELD,
IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD, IN_QUOTED_FIELD, ESCAPE_IN_QUOTED_FIELD, QUOTE_IN_QUOTED_FIELD,
EAT_CRNL EAT_CRNL,AFTER_ESCAPED_CRNL
} ParserState; } ParserState;
typedef enum { typedef enum {
@ -644,6 +644,12 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
break; break;
case ESCAPED_CHAR: case ESCAPED_CHAR:
if (c == '\n' | c=='\r') {
if (parse_add_char(self, c) < 0)
return -1;
self->state = AFTER_ESCAPED_CRNL;
break;
}
if (c == '\0') if (c == '\0')
c = '\n'; c = '\n';
if (parse_add_char(self, c) < 0) if (parse_add_char(self, c) < 0)
@ -651,6 +657,11 @@ parse_process_char(ReaderObj *self, Py_UCS4 c)
self->state = IN_FIELD; self->state = IN_FIELD;
break; break;
case AFTER_ESCAPED_CRNL:
if (c == '\0')
break;
/*fallthru*/
case IN_FIELD: case IN_FIELD:
/* in unquoted field */ /* in unquoted field */
if (c == '\n' || c == '\r' || c == '\0') { if (c == '\n' || c == '\r' || c == '\0') {