mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Merged revisions 79494,79496 via svnmerge from
svn+ssh://pythondev@svn.python.org/python/trunk ........ r79494 | florent.xicluna | 2010-03-30 10:24:06 +0200 (mar, 30 mar 2010) | 2 lines #7643: Unicode codepoints VT (0x0B) and FF (0x0C) are linebreaks according to Unicode Standard Annex #14. ........ r79496 | florent.xicluna | 2010-03-30 18:29:03 +0200 (mar, 30 mar 2010) | 2 lines Highlight the change of behavior related to r79494. Now VT and FF are linebreaks. ........
This commit is contained in:
parent
364129ef5a
commit
806d8cf0e8
5 changed files with 52 additions and 12 deletions
|
@ -126,9 +126,9 @@ static const char unicode_default_encoding[] = "utf-8";
|
|||
/* Fast detection of the most frequent whitespace characters */
|
||||
const unsigned char _Py_ascii_whitespace[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* case 0x0009: * HORIZONTAL TABULATION */
|
||||
/* case 0x0009: * CHARACTER TABULATION */
|
||||
/* case 0x000A: * LINE FEED */
|
||||
/* case 0x000B: * VERTICAL TABULATION */
|
||||
/* case 0x000B: * LINE TABULATION */
|
||||
/* case 0x000C: * FORM FEED */
|
||||
/* case 0x000D: * CARRIAGE RETURN */
|
||||
0, 1, 1, 1, 1, 1, 0, 0,
|
||||
|
@ -163,8 +163,10 @@ static PyObject *unicode_encode_call_errorhandler(const char *errors,
|
|||
static unsigned char ascii_linebreak[] = {
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x000A, * LINE FEED */
|
||||
/* 0x000B, * LINE TABULATION */
|
||||
/* 0x000C, * FORM FEED */
|
||||
/* 0x000D, * CARRIAGE RETURN */
|
||||
0, 0, 1, 0, 0, 1, 0, 0,
|
||||
0, 0, 1, 1, 1, 1, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0,
|
||||
/* 0x001C, * FILE SEPARATOR */
|
||||
/* 0x001D, * GROUP SEPARATOR */
|
||||
|
|
|
@ -694,7 +694,7 @@ static unsigned char index1[] = {
|
|||
};
|
||||
|
||||
static unsigned char index2[] = {
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 2, 2, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, 3, 3, 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
||||
1, 1, 1, 1, 3, 3, 3, 2, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
|
||||
6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 5, 5, 5, 5, 5, 5, 5, 16, 16, 16, 16,
|
||||
16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
|
||||
|
@ -3395,13 +3395,16 @@ int _PyUnicode_IsWhitespace(register const Py_UNICODE ch)
|
|||
#endif
|
||||
}
|
||||
|
||||
/* Returns 1 for Unicode characters having the category 'Zl',
|
||||
* 'Zp' or type 'B', 0 otherwise.
|
||||
/* Returns 1 for Unicode characters having the line break
|
||||
* property 'BK', 'CR', 'LF' or 'NL' or having bidirectional
|
||||
* type 'B', 0 otherwise.
|
||||
*/
|
||||
int _PyUnicode_IsLinebreak(register const Py_UNICODE ch)
|
||||
{
|
||||
switch (ch) {
|
||||
case 0x000A:
|
||||
case 0x000B:
|
||||
case 0x000C:
|
||||
case 0x000D:
|
||||
case 0x001C:
|
||||
case 0x001D:
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue