mirror of
https://github.com/python/cpython.git
synced 2025-09-29 19:56:59 +00:00
When quoting=QUOTE_NONNUMERIC, the reader now casts unquoted fields
to floats.
This commit is contained in:
parent
5d45a8dc22
commit
0f0599ddc1
3 changed files with 70 additions and 32 deletions
|
@ -241,6 +241,12 @@ class Test_Csv(unittest.TestCase):
|
||||||
quotechar=None, escapechar='\\')
|
quotechar=None, escapechar='\\')
|
||||||
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
|
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
|
||||||
quoting=csv.QUOTE_NONE, escapechar='\\')
|
quoting=csv.QUOTE_NONE, escapechar='\\')
|
||||||
|
# will this fail where locale uses comma for decimals?
|
||||||
|
self._read_test([',3,"5",7.3'], [['', 3, '5', 7.3]],
|
||||||
|
quoting=csv.QUOTE_NONNUMERIC)
|
||||||
|
self.assertRaises(ValueError, self._read_test,
|
||||||
|
['abc,3'], [[]],
|
||||||
|
quoting=csv.QUOTE_NONNUMERIC)
|
||||||
|
|
||||||
def test_read_bigfield(self):
|
def test_read_bigfield(self):
|
||||||
# This exercises the buffer realloc functionality and field size
|
# This exercises the buffer realloc functionality and field size
|
||||||
|
|
|
@ -45,8 +45,11 @@ Library
|
||||||
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
|
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
|
||||||
dictates.
|
dictates.
|
||||||
+ the parser now removes the escapechar prefix from escaped characters.
|
+ the parser now removes the escapechar prefix from escaped characters.
|
||||||
+ QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting
|
+ when quoting=QUOTE_NONNUMERIC, the writer now tests for numeric
|
||||||
to cast to float.
|
objects, rather than attempting to cast to float, and using the
|
||||||
|
success of that as the determinator.
|
||||||
|
+ when quoting=QUOTE_NONNUMERIC, the reader now casts unquoted fields
|
||||||
|
to floats.
|
||||||
+ writer doublequote handling improved.
|
+ writer doublequote handling improved.
|
||||||
+ Dialect classes passed to the module are no longer instantiated by
|
+ Dialect classes passed to the module are no longer instantiated by
|
||||||
the module before being parsed (the former validation scheme required
|
the module before being parsed (the former validation scheme required
|
||||||
|
|
|
@ -97,6 +97,7 @@ typedef struct {
|
||||||
int field_size; /* size of allocated buffer */
|
int field_size; /* size of allocated buffer */
|
||||||
int field_len; /* length of current field */
|
int field_len; /* length of current field */
|
||||||
int had_parse_error; /* did we have a parse error? */
|
int had_parse_error; /* did we have a parse error? */
|
||||||
|
int numeric_field; /* treat field as numeric */
|
||||||
} ReaderObj;
|
} ReaderObj;
|
||||||
|
|
||||||
staticforward PyTypeObject Reader_Type;
|
staticforward PyTypeObject Reader_Type;
|
||||||
|
@ -495,17 +496,30 @@ _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
|
||||||
return dialect;
|
return dialect;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static int
|
||||||
parse_save_field(ReaderObj *self)
|
parse_save_field(ReaderObj *self)
|
||||||
{
|
{
|
||||||
PyObject *field;
|
PyObject *field;
|
||||||
|
|
||||||
field = PyString_FromStringAndSize(self->field, self->field_len);
|
field = PyString_FromStringAndSize(self->field, self->field_len);
|
||||||
if (field != NULL) {
|
if (field == NULL)
|
||||||
PyList_Append(self->fields, field);
|
return -1;
|
||||||
Py_XDECREF(field);
|
|
||||||
}
|
|
||||||
self->field_len = 0;
|
self->field_len = 0;
|
||||||
|
if (self->numeric_field) {
|
||||||
|
PyObject *tmp;
|
||||||
|
|
||||||
|
self->numeric_field = 0;
|
||||||
|
tmp = PyNumber_Float(field);
|
||||||
|
if (tmp == NULL) {
|
||||||
|
Py_DECREF(field);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
Py_DECREF(field);
|
||||||
|
field = tmp;
|
||||||
|
}
|
||||||
|
PyList_Append(self->fields, field);
|
||||||
|
Py_DECREF(field);
|
||||||
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
|
@ -528,6 +542,22 @@ parse_grow_buff(ReaderObj *self)
|
||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
parse_reset(ReaderObj *self)
|
||||||
|
{
|
||||||
|
if (self->fields) {
|
||||||
|
Py_DECREF(self->fields);
|
||||||
|
}
|
||||||
|
self->fields = PyList_New(0);
|
||||||
|
if (self->fields == NULL)
|
||||||
|
return -1;
|
||||||
|
self->field_len = 0;
|
||||||
|
self->state = START_RECORD;
|
||||||
|
self->had_parse_error = 0;
|
||||||
|
self->numeric_field = 0;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
parse_add_char(ReaderObj *self, char c)
|
parse_add_char(ReaderObj *self, char c)
|
||||||
{
|
{
|
||||||
|
@ -560,7 +590,8 @@ parse_process_char(ReaderObj *self, char c)
|
||||||
/* expecting field */
|
/* expecting field */
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
/* save empty field - return [fields] */
|
/* save empty field - return [fields] */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
self->state = START_RECORD;
|
self->state = START_RECORD;
|
||||||
}
|
}
|
||||||
else if (c == dialect->quotechar &&
|
else if (c == dialect->quotechar &&
|
||||||
|
@ -577,10 +608,13 @@ parse_process_char(ReaderObj *self, char c)
|
||||||
;
|
;
|
||||||
else if (c == dialect->delimiter) {
|
else if (c == dialect->delimiter) {
|
||||||
/* save empty field */
|
/* save empty field */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
/* begin new unquoted field */
|
/* begin new unquoted field */
|
||||||
|
if (dialect->quoting == QUOTE_NONNUMERIC)
|
||||||
|
self->numeric_field = 1;
|
||||||
if (parse_add_char(self, c) < 0)
|
if (parse_add_char(self, c) < 0)
|
||||||
return -1;
|
return -1;
|
||||||
self->state = IN_FIELD;
|
self->state = IN_FIELD;
|
||||||
|
@ -597,7 +631,8 @@ parse_process_char(ReaderObj *self, char c)
|
||||||
/* in unquoted field */
|
/* in unquoted field */
|
||||||
if (c == '\n') {
|
if (c == '\n') {
|
||||||
/* end of line - return [fields] */
|
/* end of line - return [fields] */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
self->state = START_RECORD;
|
self->state = START_RECORD;
|
||||||
}
|
}
|
||||||
else if (c == dialect->escapechar) {
|
else if (c == dialect->escapechar) {
|
||||||
|
@ -606,7 +641,8 @@ parse_process_char(ReaderObj *self, char c)
|
||||||
}
|
}
|
||||||
else if (c == dialect->delimiter) {
|
else if (c == dialect->delimiter) {
|
||||||
/* save field - wait for new field */
|
/* save field - wait for new field */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
self->state = START_FIELD;
|
self->state = START_FIELD;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
|
@ -662,12 +698,14 @@ parse_process_char(ReaderObj *self, char c)
|
||||||
}
|
}
|
||||||
else if (c == dialect->delimiter) {
|
else if (c == dialect->delimiter) {
|
||||||
/* save field - wait for new field */
|
/* save field - wait for new field */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
self->state = START_FIELD;
|
self->state = START_FIELD;
|
||||||
}
|
}
|
||||||
else if (c == '\n') {
|
else if (c == '\n') {
|
||||||
/* end of line - return [fields] */
|
/* end of line - return [fields] */
|
||||||
parse_save_field(self);
|
if (parse_save_field(self) < 0)
|
||||||
|
return -1;
|
||||||
self->state = START_RECORD;
|
self->state = START_RECORD;
|
||||||
}
|
}
|
||||||
else if (!dialect->strict) {
|
else if (!dialect->strict) {
|
||||||
|
@ -716,14 +754,10 @@ Reader_iternext(ReaderObj *self)
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (self->had_parse_error) {
|
if (self->had_parse_error)
|
||||||
if (self->fields) {
|
if (parse_reset(self) < 0) {
|
||||||
Py_XDECREF(self->fields);
|
Py_DECREF(lineobj);
|
||||||
}
|
return NULL;
|
||||||
self->fields = PyList_New(0);
|
|
||||||
self->field_len = 0;
|
|
||||||
self->state = START_RECORD;
|
|
||||||
self->had_parse_error = 0;
|
|
||||||
}
|
}
|
||||||
line = PyString_AsString(lineobj);
|
line = PyString_AsString(lineobj);
|
||||||
|
|
||||||
|
@ -886,15 +920,15 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|
||||||
self->dialect = NULL;
|
self->dialect = NULL;
|
||||||
self->input_iter = self->fields = NULL;
|
|
||||||
|
|
||||||
self->fields = NULL;
|
self->fields = NULL;
|
||||||
self->input_iter = NULL;
|
self->input_iter = NULL;
|
||||||
self->had_parse_error = 0;
|
|
||||||
self->field = NULL;
|
self->field = NULL;
|
||||||
self->field_size = 0;
|
self->field_size = 0;
|
||||||
self->field_len = 0;
|
|
||||||
self->state = START_RECORD;
|
if (parse_reset(self) < 0) {
|
||||||
|
Py_DECREF(self);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
|
if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
|
||||||
Py_DECREF(self);
|
Py_DECREF(self);
|
||||||
|
@ -912,11 +946,6 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
|
||||||
Py_DECREF(self);
|
Py_DECREF(self);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
self->fields = PyList_New(0);
|
|
||||||
if (self->fields == NULL) {
|
|
||||||
Py_DECREF(self);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
PyObject_GC_Track(self);
|
PyObject_GC_Track(self);
|
||||||
return (PyObject *)self;
|
return (PyObject *)self;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue