When quoting=QUOTE_NONNUMERIC, the reader now casts unquoted fields

to floats.
This commit is contained in:
Andrew McNamara 2005-01-12 09:45:18 +00:00
parent 5d45a8dc22
commit 0f0599ddc1
3 changed files with 70 additions and 32 deletions

View file

@ -241,6 +241,12 @@ class Test_Csv(unittest.TestCase):
quotechar=None, escapechar='\\') quotechar=None, escapechar='\\')
self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']], self._read_test(['1,",3,",5'], [['1', '"', '3', '"', '5']],
quoting=csv.QUOTE_NONE, escapechar='\\') quoting=csv.QUOTE_NONE, escapechar='\\')
# will this fail where locale uses comma for decimals?
self._read_test([',3,"5",7.3'], [['', 3, '5', 7.3]],
quoting=csv.QUOTE_NONNUMERIC)
self.assertRaises(ValueError, self._read_test,
['abc,3'], [[]],
quoting=csv.QUOTE_NONNUMERIC)
def test_read_bigfield(self): def test_read_bigfield(self):
# This exercises the buffer realloc functionality and field size # This exercises the buffer realloc functionality and field size

View file

@ -45,8 +45,11 @@ Library
+ quotechar=None and quoting=QUOTE_NONE now work the way PEP 305 + quotechar=None and quoting=QUOTE_NONE now work the way PEP 305
dictates. dictates.
+ the parser now removes the escapechar prefix from escaped characters. + the parser now removes the escapechar prefix from escaped characters.
+ QUOTE_NONNUMERIC now tests for numeric objects, rather than attempting + when quoting=QUOTE_NONNUMERIC, the writer now tests for numeric
to cast to float. objects, rather than attempting to cast to float, and using the
success of that as the determinator.
+ when quoting=QUOTE_NONNUMERIC, the reader now casts unquoted fields
to floats.
+ writer doublequote handling improved. + writer doublequote handling improved.
+ Dialect classes passed to the module are no longer instantiated by + Dialect classes passed to the module are no longer instantiated by
the module before being parsed (the former validation scheme required the module before being parsed (the former validation scheme required

View file

@ -97,6 +97,7 @@ typedef struct {
int field_size; /* size of allocated buffer */ int field_size; /* size of allocated buffer */
int field_len; /* length of current field */ int field_len; /* length of current field */
int had_parse_error; /* did we have a parse error? */ int had_parse_error; /* did we have a parse error? */
int numeric_field; /* treat field as numeric */
} ReaderObj; } ReaderObj;
staticforward PyTypeObject Reader_Type; staticforward PyTypeObject Reader_Type;
@ -495,17 +496,30 @@ _call_dialect(PyObject *dialect_inst, PyObject *kwargs)
return dialect; return dialect;
} }
static void static int
parse_save_field(ReaderObj *self) parse_save_field(ReaderObj *self)
{ {
PyObject *field; PyObject *field;
field = PyString_FromStringAndSize(self->field, self->field_len); field = PyString_FromStringAndSize(self->field, self->field_len);
if (field != NULL) { if (field == NULL)
PyList_Append(self->fields, field); return -1;
Py_XDECREF(field);
}
self->field_len = 0; self->field_len = 0;
if (self->numeric_field) {
PyObject *tmp;
self->numeric_field = 0;
tmp = PyNumber_Float(field);
if (tmp == NULL) {
Py_DECREF(field);
return -1;
}
Py_DECREF(field);
field = tmp;
}
PyList_Append(self->fields, field);
Py_DECREF(field);
return 0;
} }
static int static int
@ -528,6 +542,22 @@ parse_grow_buff(ReaderObj *self)
return 1; return 1;
} }
static int
parse_reset(ReaderObj *self)
{
if (self->fields) {
Py_DECREF(self->fields);
}
self->fields = PyList_New(0);
if (self->fields == NULL)
return -1;
self->field_len = 0;
self->state = START_RECORD;
self->had_parse_error = 0;
self->numeric_field = 0;
return 0;
}
static int static int
parse_add_char(ReaderObj *self, char c) parse_add_char(ReaderObj *self, char c)
{ {
@ -560,7 +590,8 @@ parse_process_char(ReaderObj *self, char c)
/* expecting field */ /* expecting field */
if (c == '\n') { if (c == '\n') {
/* save empty field - return [fields] */ /* save empty field - return [fields] */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
self->state = START_RECORD; self->state = START_RECORD;
} }
else if (c == dialect->quotechar && else if (c == dialect->quotechar &&
@ -577,10 +608,13 @@ parse_process_char(ReaderObj *self, char c)
; ;
else if (c == dialect->delimiter) { else if (c == dialect->delimiter) {
/* save empty field */ /* save empty field */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
} }
else { else {
/* begin new unquoted field */ /* begin new unquoted field */
if (dialect->quoting == QUOTE_NONNUMERIC)
self->numeric_field = 1;
if (parse_add_char(self, c) < 0) if (parse_add_char(self, c) < 0)
return -1; return -1;
self->state = IN_FIELD; self->state = IN_FIELD;
@ -597,7 +631,8 @@ parse_process_char(ReaderObj *self, char c)
/* in unquoted field */ /* in unquoted field */
if (c == '\n') { if (c == '\n') {
/* end of line - return [fields] */ /* end of line - return [fields] */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
self->state = START_RECORD; self->state = START_RECORD;
} }
else if (c == dialect->escapechar) { else if (c == dialect->escapechar) {
@ -606,7 +641,8 @@ parse_process_char(ReaderObj *self, char c)
} }
else if (c == dialect->delimiter) { else if (c == dialect->delimiter) {
/* save field - wait for new field */ /* save field - wait for new field */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
self->state = START_FIELD; self->state = START_FIELD;
} }
else { else {
@ -662,12 +698,14 @@ parse_process_char(ReaderObj *self, char c)
} }
else if (c == dialect->delimiter) { else if (c == dialect->delimiter) {
/* save field - wait for new field */ /* save field - wait for new field */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
self->state = START_FIELD; self->state = START_FIELD;
} }
else if (c == '\n') { else if (c == '\n') {
/* end of line - return [fields] */ /* end of line - return [fields] */
parse_save_field(self); if (parse_save_field(self) < 0)
return -1;
self->state = START_RECORD; self->state = START_RECORD;
} }
else if (!dialect->strict) { else if (!dialect->strict) {
@ -716,14 +754,10 @@ Reader_iternext(ReaderObj *self)
return NULL; return NULL;
} }
if (self->had_parse_error) { if (self->had_parse_error)
if (self->fields) { if (parse_reset(self) < 0) {
Py_XDECREF(self->fields); Py_DECREF(lineobj);
} return NULL;
self->fields = PyList_New(0);
self->field_len = 0;
self->state = START_RECORD;
self->had_parse_error = 0;
} }
line = PyString_AsString(lineobj); line = PyString_AsString(lineobj);
@ -886,15 +920,15 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
return NULL; return NULL;
self->dialect = NULL; self->dialect = NULL;
self->input_iter = self->fields = NULL;
self->fields = NULL; self->fields = NULL;
self->input_iter = NULL; self->input_iter = NULL;
self->had_parse_error = 0;
self->field = NULL; self->field = NULL;
self->field_size = 0; self->field_size = 0;
self->field_len = 0;
self->state = START_RECORD; if (parse_reset(self) < 0) {
Py_DECREF(self);
return NULL;
}
if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) { if (!PyArg_UnpackTuple(args, "", 1, 2, &iterator, &dialect)) {
Py_DECREF(self); Py_DECREF(self);
@ -912,11 +946,6 @@ csv_reader(PyObject *module, PyObject *args, PyObject *keyword_args)
Py_DECREF(self); Py_DECREF(self);
return NULL; return NULL;
} }
self->fields = PyList_New(0);
if (self->fields == NULL) {
Py_DECREF(self);
return NULL;
}
PyObject_GC_Track(self); PyObject_GC_Track(self);
return (PyObject *)self; return (PyObject *)self;