gh-113732: Fix support of QUOTE_NOTNULL and QUOTE_STRINGS in csv.reader (GH-113738)

This commit is contained in:
Serhiy Storchaka 2024-01-30 14:21:12 +02:00 committed by GitHub
parent 58f883b91b
commit ea30a28c3e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 57 additions and 18 deletions

View file

@ -131,7 +131,7 @@ typedef struct {
Py_UCS4 *field; /* temporary buffer */
Py_ssize_t field_size; /* size of allocated buffer */
Py_ssize_t field_len; /* length of current field */
int numeric_field; /* treat field as numeric */
bool unquoted_field; /* true if no quotes around the current field */
unsigned long line_num; /* Source-file line number */
} ReaderObj;
@ -644,22 +644,33 @@ _call_dialect(_csvstate *module_state, PyObject *dialect_inst, PyObject *kwargs)
static int
parse_save_field(ReaderObj *self)
{
int quoting = self->dialect->quoting;
PyObject *field;
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
(void *) self->field, self->field_len);
if (field == NULL)
return -1;
self->field_len = 0;
if (self->numeric_field) {
PyObject *tmp;
self->numeric_field = 0;
tmp = PyNumber_Float(field);
Py_DECREF(field);
if (tmp == NULL)
if (self->unquoted_field &&
self->field_len == 0 &&
(quoting == QUOTE_NOTNULL || quoting == QUOTE_STRINGS))
{
field = Py_NewRef(Py_None);
}
else {
field = PyUnicode_FromKindAndData(PyUnicode_4BYTE_KIND,
(void *) self->field, self->field_len);
if (field == NULL) {
return -1;
field = tmp;
}
if (self->unquoted_field &&
self->field_len != 0 &&
(quoting == QUOTE_NONNUMERIC || quoting == QUOTE_STRINGS))
{
PyObject *tmp = PyNumber_Float(field);
Py_DECREF(field);
if (tmp == NULL) {
return -1;
}
field = tmp;
}
self->field_len = 0;
}
if (PyList_Append(self->fields, field) < 0) {
Py_DECREF(field);
@ -721,6 +732,7 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
/* fallthru */
case START_FIELD:
/* expecting field */
self->unquoted_field = true;
if (c == '\n' || c == '\r' || c == EOL) {
/* save empty field - return [fields] */
if (parse_save_field(self) < 0)
@ -730,10 +742,12 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
else if (c == dialect->quotechar &&
dialect->quoting != QUOTE_NONE) {
/* start quoted field */
self->unquoted_field = false;
self->state = IN_QUOTED_FIELD;
}
else if (c == dialect->escapechar) {
/* possible escaped character */
self->unquoted_field = false;
self->state = ESCAPED_CHAR;
}
else if (c == ' ' && dialect->skipinitialspace)
@ -746,8 +760,6 @@ parse_process_char(ReaderObj *self, _csvstate *module_state, Py_UCS4 c)
}
else {
/* begin new unquoted field */
if (dialect->quoting == QUOTE_NONNUMERIC)
self->numeric_field = 1;
if (parse_add_char(self, module_state, c) < 0)
return -1;
self->state = IN_FIELD;
@ -892,7 +904,7 @@ parse_reset(ReaderObj *self)
return -1;
self->field_len = 0;
self->state = START_RECORD;
self->numeric_field = 0;
self->unquoted_field = false;
return 0;
}