rewrite the parsing of field names to be more consistent wrt recursive expansion

2025-09-26 10:19:53 +00:00 · 2013-05-17 18:22:31 -05:00 · 2013-05-17 18:22:31 -05:00 · 4d94474ba3
commit 4d94474ba3
parent 48953632df
2 changed files with 62 additions and 63 deletions
--- a/Lib/test/test_unicode.py
+++ b/Lib/test/test_unicode.py
@ -892,7 +892,7 @@ class UnicodeTest(string_tests.CommonTest,
        self.assertRaises(ValueError, "{0".format)
        self.assertRaises(IndexError, "{0.}".format)
        self.assertRaises(ValueError, "{0.}".format, 0)
-        self.assertRaises(IndexError, "{0[}".format)
+        self.assertRaises(ValueError, "{0[}".format)
        self.assertRaises(ValueError, "{0[}".format, [])
        self.assertRaises(KeyError,   "{0]}".format)
        self.assertRaises(ValueError, "{0.[]}".format, 0)
@ -944,6 +944,14 @@ class UnicodeTest(string_tests.CommonTest,
                         '')
        self.assertEqual("{[{}]}".format({"{}": 5}), "5")
        self.assertEqual("{[{}]}".format({"{}" : "a"}), "a")
        self.assertEqual("{[{]}".format({"{" : "a"}), "a")
        self.assertEqual("{[}]}".format({"}" : "a"}), "a")
        self.assertEqual("{[[]}".format({"[" : "a"}), "a")
        self.assertEqual("{[!]}".format({"!" : "a"}), "a")
        self.assertRaises(ValueError, "{a{}b}".format, 42)
        self.assertRaises(ValueError, "{a{b}".format, 42)
        self.assertRaises(ValueError, "{[}".format, 42)
    def test_format_map(self):
        self.assertEqual(''.format_map({}), '')
--- a/Objects/stringlib/unicode_format.h
+++ b/Objects/stringlib/unicode_format.h
@ -543,7 +543,7 @@ done:
 static int
 parse_field(SubString *str, SubString *field_name, SubString *format_spec,
-            Py_UCS4 *conversion)
+            int *format_spec_needs_expanding, Py_UCS4 *conversion)
 {
    /* Note this function works if the field name is zero length,
       which is good.  Zero length field names are handled later, in
@ -561,6 +561,15 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
    field_name->start = str->start;
    while (str->start < str->end) {
        switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
        case '{':
            PyErr_SetString(PyExc_ValueError, "unexpected '{' in field name");
            return 0;
        case '[':
            for (; str->start < str->end; str->start++)
                if (PyUnicode_READ_CHAR(str->str, str->start) == ']')
                    break;
            continue;
        case '}':
        case ':':
        case '!':
            break;
@ -570,41 +579,62 @@ parse_field(SubString *str, SubString *field_name, SubString *format_spec,
        break;
    }
    field_name->end = str->start - 1;
    if (c == '!' || c == ':') {
        Py_ssize_t count;
        /* we have a format specifier and/or a conversion */
        /* don't include the last character */
        field_name->end = str->start-1;
        /* the format specifier is the rest of the string */
        format_spec->str = str->str;
        format_spec->start = str->start;
        format_spec->end = str->end;
        /* see if there's a conversion specifier */
        if (c == '!') {
            /* there must be another character present */
-            if (format_spec->start >= format_spec->end) {
+            if (str->start >= str->end) {
                PyErr_SetString(PyExc_ValueError,
-                                "end of format while looking for conversion "
+                                "end of string while looking for conversion "
                                "specifier");
                return 0;
            }
-            *conversion = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+            *conversion = PyUnicode_READ_CHAR(str->str, str->start++);
-            /* if there is another character, it must be a colon */
+            if (str->start < str->end) {
-            if (format_spec->start < format_spec->end) {
+                c = PyUnicode_READ_CHAR(str->str, str->start++);
-                c = PyUnicode_READ_CHAR(format_spec->str, format_spec->start++);
+                if (c == '}')
                    return 1;
                if (c != ':') {
                    PyErr_SetString(PyExc_ValueError,
-                                    "expected ':' after format specifier");
+                                    "expected ':' after conversion specifier");
                    return 0;
                }
            }
        }
        format_spec->str = str->str;
        format_spec->start = str->start;
        count = 1;
        while (str->start < str->end) {
            switch ((c = PyUnicode_READ_CHAR(str->str, str->start++))) {
            case '{':
                *format_spec_needs_expanding = 1;
                count++;
                break;
            case '}':
                count--;
                if (count == 0) {
                    format_spec->end = str->start - 1;
                    return 1;
                }
                break;
            default:
                break;
            }
        }
        PyErr_SetString(PyExc_ValueError, "unmatched '{' in format spec");
        return 0;
    }
    else if (c != '}') {
        PyErr_SetString(PyExc_ValueError, "expected '}' before end of string");
        return 0;
    }
    else
        /* end of string, there's no format_spec or conversion */
        field_name->end = str->start;
    return 1;
 }
@ -638,10 +668,9 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
                    SubString *format_spec, Py_UCS4 *conversion,
                    int *format_spec_needs_expanding)
 {
-    int at_end, hit_format_spec;
+    int at_end;
    Py_UCS4 c = 0;
    Py_ssize_t start;
    int count;
    Py_ssize_t len;
    int markup_follows = 0;
@ -713,50 +742,12 @@ MarkupIterator_next(MarkupIterator *self, SubString *literal,
    if (!markup_follows)
        return 2;
-    /* this is markup, find the end of the string by counting nested
+    /* this is markup; parse the field */
       braces.  note that this prohibits escaped braces, so that
       format_specs cannot have braces in them. */
    *field_present = 1;
-    count = 1;
+    if (!parse_field(&self->str, field_name, format_spec,
-
+                     format_spec_needs_expanding, conversion))
    start = self->str.start;
    /* we know we can't have a zero length string, so don't worry
       about that case */
    hit_format_spec = 0;
    while (self->str.start < self->str.end) {
        switch (c = PyUnicode_READ_CHAR(self->str.str, self->str.start++)) {
        case ':':
            hit_format_spec = 1;
            count = 1;
            break;
        case '{':
            /* the format spec needs to be recursively expanded.
               this is an optimization, and not strictly needed */
            if (hit_format_spec)
                *format_spec_needs_expanding = 1;
            count++;
            break;
        case '}':
            count--;
            if (count <= 0) {
                /* we're done.  parse and get out */
                SubString s;
                SubString_init(&s, self->str.str, start, self->str.start - 1);
                if (parse_field(&s, field_name, format_spec, conversion) == 0)
        return 0;
                /* success */
    return 2;
            }
            break;
        }
    }
    /* end of string while searching for matching '}' */
    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
    return 0;
 }