Remove meaning of -ttt, but still accept -t option on cmdline for compatibility.

2025-07-17 08:15:19 +00:00 · 2008-06-04 11:41:32 +00:00 · 2008-06-04 11:41:32 +00:00 · f954c4b9fb
commit f954c4b9fb
parent e5d68aceb5
37 changed files with 1184 additions and 1024 deletions
--- a/Objects/unicodeobject.c
+++ b/Objects/unicodeobject.c
@ -7149,6 +7149,36 @@ unicode_isidentifier(PyObject *self)
    return PyBool_FromLong(PyUnicode_IsIdentifier(self));
 }

+PyDoc_STRVAR(isprintable__doc__,
+"S.isprintable() -> bool\n\
+\n\
+Return True if all characters in S are considered\n\
+printable in repr() and there is at least one character\n\
+in S, False otherwise.");
+
+static PyObject*
+unicode_isprintable(PyObject *self)
+{
+    register const Py_UNICODE *p = PyUnicode_AS_UNICODE(self);
+    register const Py_UNICODE *e;
+
+    /* Shortcut for single character strings */
+    if (PyUnicode_GET_SIZE(self) == 1 &&
+	Py_UNICODE_ISPRINTABLE(*p))
+	return PyBool_FromLong(1);
+
+    /* Special case for empty strings */
+    if (PyUnicode_GET_SIZE(self) == 0)
+	return PyBool_FromLong(0);
+
+    e = p + PyUnicode_GET_SIZE(self);
+    for (; p < e; p++) {
+	if (!Py_UNICODE_ISPRINTABLE(*p))
+	    return PyBool_FromLong(0);
+    }
+    return PyBool_FromLong(1);
+}
+
 PyDoc_STRVAR(join__doc__,
 "S.join(sequence) -> str\n\
 \n\
@ -7526,61 +7556,8 @@ PyObject *unicode_repr(PyObject *unicode)
            continue;
        }

-#ifdef Py_UNICODE_WIDE
-        /* Map 21-bit characters to '\U00xxxxxx' */
-        else if (ch >= 0x10000) {
-            *p++ = '\\';
-            *p++ = 'U';
-            *p++ = hexdigits[(ch >> 28) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 24) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 20) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 16) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 12) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 8) & 0x0000000F];
-            *p++ = hexdigits[(ch >> 4) & 0x0000000F];
-            *p++ = hexdigits[ch & 0x0000000F];
-	    continue;
-        }
-#else
-	/* Map UTF-16 surrogate pairs to '\U00xxxxxx' */
-	else if (ch >= 0xD800 && ch < 0xDC00) {
-	    Py_UNICODE ch2;
-	    Py_UCS4 ucs;
-
-	    ch2 = *s++;
-	    size--;
-	    if (ch2 >= 0xDC00 && ch2 <= 0xDFFF) {
-		ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) + 0x00010000;
-		*p++ = '\\';
-		*p++ = 'U';
-		*p++ = hexdigits[(ucs >> 28) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 24) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 20) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 16) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 12) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 8) & 0x0000000F];
-		*p++ = hexdigits[(ucs >> 4) & 0x0000000F];
-		*p++ = hexdigits[ucs & 0x0000000F];
-		continue;
-	    }
-	    /* Fall through: isolated surrogates are copied as-is */
-	    s--;
-	    size++;
-	}
-#endif
-
-        /* Map 16-bit characters to '\uxxxx' */
-        if (ch >= 256) {
-            *p++ = '\\';
-            *p++ = 'u';
-            *p++ = hexdigits[(ch >> 12) & 0x000F];
-            *p++ = hexdigits[(ch >> 8) & 0x000F];
-            *p++ = hexdigits[(ch >> 4) & 0x000F];
-            *p++ = hexdigits[ch & 0x000F];
-        }
-
-        /* Map special whitespace to '\t', \n', '\r' */
-        else if (ch == '\t') {
+	/* Map special whitespace to '\t', \n', '\r' */
+        if (ch == '\t') {
            *p++ = '\\';
            *p++ = 't';
        }
@ -7594,16 +7571,79 @@ PyObject *unicode_repr(PyObject *unicode)
        }

        /* Map non-printable US ASCII to '\xhh' */
-        else if (ch < ' ' || ch >= 0x7F) {
+        else if (ch < ' ' || ch == 0x7F) {
            *p++ = '\\';
            *p++ = 'x';
            *p++ = hexdigits[(ch >> 4) & 0x000F];
            *p++ = hexdigits[ch & 0x000F];
        }

-        /* Copy everything else as-is */
-        else
-            *p++ = (char) ch;
+        /* Copy ASCII characters as-is */
+        else if (ch < 0x7F) {
+            *p++ = ch;
+        }
+
+	/* Non-ASCII characters */
+        else {
+            Py_UCS4 ucs = ch;
+
+#ifndef Py_UNICODE_WIDE
+            Py_UNICODE ch2 = 0;
+            /* Get code point from surrogate pair */
+            if (size > 0) {
+                ch2 = *s;
+                if (ch >= 0xD800 && ch < 0xDC00 && ch2 >= 0xDC00
+                            && ch2 <= 0xDFFF) {
+                    ucs = (((ch & 0x03FF) << 10) | (ch2 & 0x03FF)) 
+                            + 0x00010000;
+                    s++; 
+                    size--;
+                }
+            }
+#endif
+            /* Map Unicode whitespace and control characters 
+               (categories Z* and C* except ASCII space)
+            */
+            if (!Py_UNICODE_ISPRINTABLE(ucs)) {
+                /* Map 8-bit characters to '\xhh' */
+                if (ucs <= 0xff) {
+                    *p++ = '\\';
+                    *p++ = 'x';
+                    *p++ = hexdigits[(ch >> 4) & 0x000F];
+                    *p++ = hexdigits[ch & 0x000F];
+                }
+                /* Map 21-bit characters to '\U00xxxxxx' */
+                else if (ucs >= 0x10000) {
+                    *p++ = '\\';
+                    *p++ = 'U';
+                    *p++ = hexdigits[(ucs >> 28) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 24) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 20) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 16) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 12) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 8) & 0x0000000F];
+                    *p++ = hexdigits[(ucs >> 4) & 0x0000000F];
+                    *p++ = hexdigits[ucs & 0x0000000F];
+                }
+                /* Map 16-bit characters to '\uxxxx' */
+                else {
+                    *p++ = '\\';
+                    *p++ = 'u';
+                    *p++ = hexdigits[(ucs >> 12) & 0x000F];
+                    *p++ = hexdigits[(ucs >> 8) & 0x000F];
+                    *p++ = hexdigits[(ucs >> 4) & 0x000F];
+                    *p++ = hexdigits[ucs & 0x000F];
+                }
+            }
+            /* Copy characters as-is */
+            else {
+                *p++ = ch;
+#ifndef Py_UNICODE_WIDE
+                if (ucs >= 0x10000)
+                    *p++ = ch2;
+#endif
+            }
+        }
    }
    /* Add quote */
    *p++ = PyUnicode_AS_UNICODE(repr)[0];
@ -8268,6 +8308,7 @@ static PyMethodDef unicode_methods[] = {
    {"isalpha", (PyCFunction) unicode_isalpha, METH_NOARGS, isalpha__doc__},
    {"isalnum", (PyCFunction) unicode_isalnum, METH_NOARGS, isalnum__doc__},
    {"isidentifier", (PyCFunction) unicode_isidentifier, METH_NOARGS, isidentifier__doc__},
+    {"isprintable", (PyCFunction) unicode_isprintable, METH_NOARGS, isprintable__doc__},
    {"zfill", (PyCFunction) unicode_zfill, METH_VARARGS, zfill__doc__},
    {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
    {"__format__", (PyCFunction) unicode__format__, METH_VARARGS, p_format__doc__},
@ -8853,6 +8894,7 @@ PyObject *PyUnicode_Format(PyObject *format,

 	    case 's':
 	    case 'r':
+	    case 'a':
 		if (PyUnicode_Check(v) && c == 's') {
 		    temp = v;
 		    Py_INCREF(temp);
@ -8872,6 +8914,22 @@ PyObject *PyUnicode_Format(PyObject *format,
 					"%s argument has non-string str()");
 			goto onError;
 		    }
+		    if (c == 'a') {
+		        PyObject *ascii = PyUnicode_EncodeASCII(
+				PyUnicode_AS_UNICODE(temp),
+				PyUnicode_GET_SIZE(temp),
+				"backslashreplace");
+
+			Py_DECREF(temp);
+			if (ascii == NULL) 
+			    goto onError;
+
+			temp = PyUnicode_FromEncodedObject(ascii, 
+				    "ASCII", NULL);
+			Py_DECREF(ascii);
+			if (temp == NULL)
+			    goto onError;
+		    }
 		}
 		pbuf = PyUnicode_AS_UNICODE(temp);
 		len = PyUnicode_GET_SIZE(temp);