Patch #2477: Added from __future__ import unicode_literals

The new PyParser_*Ex() functions are based on Neal's suggestion and initial patch. The new __future__ feature makes all '' and r'' unicode strings. b'' and br'' stay (byte) strings.
2025-07-28 13:44:43 +00:00 · 2008-03-26 22:01:37 +00:00 · 2008-03-26 22:01:37 +00:00 · 3c60833e1e
commit 3c60833e1e
parent 0cb3e86c47
12 changed files with 107 additions and 33 deletions
--- a/Python/ast.c
+++ b/Python/ast.c
@ -18,6 +18,7 @@
 /* Data structure used internally */
 struct compiling {
    char *c_encoding; /* source encoding */
+    int c_future_unicode; /* __future__ unicode literals flag */
    PyArena *c_arena; /* arena for allocating memeory */
    const char *c_filename; /* filename */
 };
@ -36,7 +37,7 @@ static expr_ty ast_for_testlist_gexp(struct compiling *, const node *);
 static expr_ty ast_for_call(struct compiling *, const node *, expr_ty);

 static PyObject *parsenumber(const char *);
-static PyObject *parsestr(const char *s, const char *encoding);
+static PyObject *parsestr(struct compiling *, const char *);
 static PyObject *parsestrplus(struct compiling *, const node *n);

 #ifndef LINENO
@ -198,6 +199,7 @@ PyAST_FromNode(const node *n, PyCompilerFlags *flags, const char *filename,
    } else {
        c.c_encoding = NULL;
    }
+    c.c_future_unicode = flags && flags->cf_flags & CO_FUTURE_UNICODE_LITERALS;
    c.c_arena = arena;
    c.c_filename = filename;

@ -3247,13 +3249,13 @@ decode_unicode(const char *s, size_t len, int rawmode, const char *encoding)
 * parsestr parses it, and returns the decoded Python string object.
 */
 static PyObject *
-parsestr(const char *s, const char *encoding)
+parsestr(struct compiling *c, const char *s)
 {
        size_t len;
        int quote = Py_CHARMASK(*s);
        int rawmode = 0;
        int need_encoding;
-        int unicode = 0;
+        int unicode = c->c_future_unicode;

        if (isalpha(quote) || quote == '_') {
                if (quote == 'u' || quote == 'U') {
@ -3262,6 +3264,7 @@ parsestr(const char *s, const char *encoding)
                }
                if (quote == 'b' || quote == 'B') {
                        quote = *++s;
+                        unicode = 0;
                }
                if (quote == 'r' || quote == 'R') {
                        quote = *++s;
@ -3293,12 +3296,12 @@ parsestr(const char *s, const char *encoding)
        }
 #ifdef Py_USING_UNICODE
        if (unicode || Py_UnicodeFlag) {
-                return decode_unicode(s, len, rawmode, encoding);
+                return decode_unicode(s, len, rawmode, c->c_encoding);
        }
 #endif
-        need_encoding = (encoding != NULL &&
-                         strcmp(encoding, "utf-8") != 0 &&
-                         strcmp(encoding, "iso-8859-1") != 0);
+        need_encoding = (c->c_encoding != NULL &&
+                         strcmp(c->c_encoding, "utf-8") != 0 &&
+                         strcmp(c->c_encoding, "iso-8859-1") != 0);
        if (rawmode || strchr(s, '\\') == NULL) {
                if (need_encoding) {
 #ifndef Py_USING_UNICODE
@ -3310,7 +3313,7 @@ parsestr(const char *s, const char *encoding)
                        PyObject *v, *u = PyUnicode_DecodeUTF8(s, len, NULL);
                        if (u == NULL)
                                return NULL;
-                        v = PyUnicode_AsEncodedString(u, encoding, NULL);
+                        v = PyUnicode_AsEncodedString(u, c->c_encoding, NULL);
                        Py_DECREF(u);
                        return v;
 #endif
@ -3320,7 +3323,7 @@ parsestr(const char *s, const char *encoding)
        }

        return PyString_DecodeEscape(s, len, NULL, unicode,
-                                     need_encoding ? encoding : NULL);
+                                     need_encoding ? c->c_encoding : NULL);
 }

 /* Build a Python string object out of a STRING atom.  This takes care of
@ -3333,11 +3336,11 @@ parsestrplus(struct compiling *c, const node *n)
        PyObject *v;
        int i;
        REQ(CHILD(n, 0), STRING);
-        if ((v = parsestr(STR(CHILD(n, 0)), c->c_encoding)) != NULL) {
+        if ((v = parsestr(c, STR(CHILD(n, 0)))) != NULL) {
                /* String literal concatenation */
                for (i = 1; i < NCH(n); i++) {
                        PyObject *s;
-                        s = parsestr(STR(CHILD(n, i)), c->c_encoding);
+                        s = parsestr(c, STR(CHILD(n, i)));
                        if (s == NULL)
                                goto onError;
                        if (PyString_Check(v) && PyString_Check(s)) {