mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	Use stringlib to specialize unicode_repr() for each string kind
(UCS1, UCS2, UCS4).
Benchmark:
+-------------------------------------+---------+----------------------+
| Benchmark                           | ref     | change2              |
+=====================================+=========+======================+
| repr('abc')                         | 100 ns  | 103 ns: 1.02x slower |
+-------------------------------------+---------+----------------------+
| repr('a' * 100)                     | 369 ns  | 369 ns: 1.00x slower |
+-------------------------------------+---------+----------------------+
| repr(('a' + squote) * 100)          | 1.21 us | 946 ns: 1.27x faster |
+-------------------------------------+---------+----------------------+
| repr(('a' + nl) * 100)              | 1.23 us | 907 ns: 1.36x faster |
+-------------------------------------+---------+----------------------+
| repr(dquote + ('a' + squote) * 100) | 1.08 us | 858 ns: 1.25x faster |
+-------------------------------------+---------+----------------------+
| Geometric mean                      | (ref)   | 1.16x faster         |
+-------------------------------------+---------+----------------------+
		
	
			
		
			
				
	
	
		
			95 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
			
		
		
	
	
			95 lines
		
	
	
	
		
			3.1 KiB
		
	
	
	
		
			C
		
	
	
	
	
	
/* stringlib: repr() implementation */
 | 
						|
 | 
						|
#ifndef STRINGLIB_FASTSEARCH_H
 | 
						|
#error must include "stringlib/fastsearch.h" before including this module
 | 
						|
#endif
 | 
						|
 | 
						|
 | 
						|
static void
 | 
						|
STRINGLIB(repr)(PyObject *unicode, Py_UCS4 quote,
 | 
						|
                STRINGLIB_CHAR *odata)
 | 
						|
{
 | 
						|
    Py_ssize_t isize = PyUnicode_GET_LENGTH(unicode);
 | 
						|
    const void *idata = PyUnicode_DATA(unicode);
 | 
						|
    int ikind = PyUnicode_KIND(unicode);
 | 
						|
 | 
						|
    *odata++ = quote;
 | 
						|
    for (Py_ssize_t i = 0; i < isize; i++) {
 | 
						|
        Py_UCS4 ch = PyUnicode_READ(ikind, idata, i);
 | 
						|
 | 
						|
        /* Escape quotes and backslashes */
 | 
						|
        if ((ch == quote) || (ch == '\\')) {
 | 
						|
            *odata++ = '\\';
 | 
						|
            *odata++ = ch;
 | 
						|
            continue;
 | 
						|
        }
 | 
						|
 | 
						|
        /* Map special whitespace to '\t', \n', '\r' */
 | 
						|
        if (ch == '\t') {
 | 
						|
            *odata++ = '\\';
 | 
						|
            *odata++ = 't';
 | 
						|
        }
 | 
						|
        else if (ch == '\n') {
 | 
						|
            *odata++ = '\\';
 | 
						|
            *odata++ = 'n';
 | 
						|
        }
 | 
						|
        else if (ch == '\r') {
 | 
						|
            *odata++ = '\\';
 | 
						|
            *odata++ = 'r';
 | 
						|
        }
 | 
						|
 | 
						|
        /* Map non-printable US ASCII to '\xhh' */
 | 
						|
        else if (ch < ' ' || ch == 0x7F) {
 | 
						|
            *odata++ = '\\';
 | 
						|
            *odata++ = 'x';
 | 
						|
            *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
 | 
						|
            *odata++ = Py_hexdigits[ch & 0x000F];
 | 
						|
        }
 | 
						|
 | 
						|
        /* Copy ASCII characters as-is */
 | 
						|
        else if (ch < 0x7F) {
 | 
						|
            *odata++ = ch;
 | 
						|
        }
 | 
						|
 | 
						|
        /* Non-ASCII characters */
 | 
						|
        else {
 | 
						|
            /* Map Unicode whitespace and control characters
 | 
						|
               (categories Z* and C* except ASCII space)
 | 
						|
            */
 | 
						|
            if (!Py_UNICODE_ISPRINTABLE(ch)) {
 | 
						|
                *odata++ = '\\';
 | 
						|
                /* Map 8-bit characters to '\xhh' */
 | 
						|
                if (ch <= 0xff) {
 | 
						|
                    *odata++ = 'x';
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 4) & 0x000F];
 | 
						|
                    *odata++ = Py_hexdigits[ch & 0x000F];
 | 
						|
                }
 | 
						|
                /* Map 16-bit characters to '\uxxxx' */
 | 
						|
                else if (ch <= 0xffff) {
 | 
						|
                    *odata++ = 'u';
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[ch & 0xF];
 | 
						|
                }
 | 
						|
                /* Map 21-bit characters to '\U00xxxxxx' */
 | 
						|
                else {
 | 
						|
                    *odata++ = 'U';
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 28) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 24) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 20) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 16) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 12) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 8) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[(ch >> 4) & 0xF];
 | 
						|
                    *odata++ = Py_hexdigits[ch & 0xF];
 | 
						|
                }
 | 
						|
            }
 | 
						|
            /* Copy characters as-is */
 | 
						|
            else {
 | 
						|
                *odata++ = ch;
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
    *odata = quote;
 | 
						|
}
 |