#2630: Implement PEP 3138.

The repr() of a string now contains printable Unicode characters unescaped.
The new ascii() builtin can be used to get a repr() with only ASCII characters in it.

PEP and patch were written by Atsuo Ishimoto.
This commit is contained in:
Georg Brandl 2008-06-11 18:37:52 +00:00
parent ea6d58d9d3
commit 559e5d7f4d
25 changed files with 1271 additions and 974 deletions

View file

@ -21,6 +21,7 @@
#define UPPER_MASK 0x80
#define XID_START_MASK 0x100
#define XID_CONTINUE_MASK 0x200
#define NONPRINTABLE_MASK 0x400
typedef struct {
const Py_UNICODE upper;
@ -675,6 +676,26 @@ int _PyUnicode_IsNumeric(Py_UNICODE ch)
return _PyUnicode_ToNumeric(ch) != -1.0;
}
/* Returns 1 for Unicode characters to be hex-escaped when repr()ed,
0 otherwise.
All characters except those characters defined in the Unicode character
database as following categories are considered printable.
* Cc (Other, Control)
* Cf (Other, Format)
* Cs (Other, Surrogate)
* Co (Other, Private Use)
* Cn (Other, Not Assigned)
* Zl Separator, Line ('\u2028', LINE SEPARATOR)
* Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
* Zs (Separator, Space) other than ASCII space('\x20').
*/
int _PyUnicode_IsPrintable(Py_UNICODE ch)
{
const _PyUnicode_TypeRecord *ctype = gettyperecord(ch);
return (ctype->flags & NONPRINTABLE_MASK) == 0;
}
#ifndef WANT_WCTYPE_FUNCTIONS
/* Returns 1 for Unicode characters having the bidirectional type