mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	Patch #626485: Support Unicode normalization.
This commit is contained in:
		
							parent
							
								
									74a530d42d
								
							
						
					
					
						commit
						677bde2dd1
					
				
					 6 changed files with 1053 additions and 23 deletions
				
			
		| 
						 | 
					@ -5,7 +5,7 @@
 | 
				
			||||||
\modulesynopsis{Access the Unicode Database.}
 | 
					\modulesynopsis{Access the Unicode Database.}
 | 
				
			||||||
\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
 | 
					\moduleauthor{Marc-Andre Lemburg}{mal@lemburg.com}
 | 
				
			||||||
\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
 | 
					\sectionauthor{Marc-Andre Lemburg}{mal@lemburg.com}
 | 
				
			||||||
 | 
					\sectionauthor{Martin v. L\"owis}{martin@v.loewis.de}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
\index{Unicode}
 | 
					\index{Unicode}
 | 
				
			||||||
\index{character}
 | 
					\index{character}
 | 
				
			||||||
| 
						 | 
					@ -14,10 +14,10 @@
 | 
				
			||||||
This module provides access to the Unicode Character Database which
 | 
					This module provides access to the Unicode Character Database which
 | 
				
			||||||
defines character properties for all Unicode characters. The data in
 | 
					defines character properties for all Unicode characters. The data in
 | 
				
			||||||
this database is based on the \file{UnicodeData.txt} file version
 | 
					this database is based on the \file{UnicodeData.txt} file version
 | 
				
			||||||
3.0.0 which is publically available from \url{ftp://ftp.unicode.org/}.
 | 
					3.2.0 which is publically available from \url{ftp://ftp.unicode.org/}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
The module uses the same names and symbols as defined by the
 | 
					The module uses the same names and symbols as defined by the
 | 
				
			||||||
UnicodeData File Format 3.0.0 (see
 | 
					UnicodeData File Format 3.2.0 (see
 | 
				
			||||||
\url{http://www.unicode.org/Public/UNIDATA/UnicodeData.html}).  It
 | 
					\url{http://www.unicode.org/Public/UNIDATA/UnicodeData.html}).  It
 | 
				
			||||||
defines the following functions:
 | 
					defines the following functions:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -83,3 +83,37 @@ defines the following functions:
 | 
				
			||||||
  character \var{unichr} as string. An empty string is returned in case
 | 
					  character \var{unichr} as string. An empty string is returned in case
 | 
				
			||||||
  no such mapping is defined.
 | 
					  no such mapping is defined.
 | 
				
			||||||
\end{funcdesc}
 | 
					\end{funcdesc}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{funcdesc}{normalize}{form, unistr}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Return the normal form \var{form} for the Unicode string \var{unistr}.
 | 
				
			||||||
 | 
					Valid values for \var{form} are 'NFC', 'NFKC', 'NFD', and 'NFKD'.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The Unicode standard defines various normalization forms of a Unicode
 | 
				
			||||||
 | 
					string, based on the definition of canonical equivalence and
 | 
				
			||||||
 | 
					compatibility equivalence. In Unicode, several characters can be
 | 
				
			||||||
 | 
					expressed in various way. For example, the character U+00C7 (LATIN
 | 
				
			||||||
 | 
					CAPITAL LETTER C WITH CEDILLA) can also be expressed as the sequence
 | 
				
			||||||
 | 
					U+0043 (LATIN CAPITAL LETTER C) U+0327 (COMBINING CEDILLA).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					For each character, there are two normal forms: normal form C and
 | 
				
			||||||
 | 
					normal form D. Normal form D (NFD) is also known as canonical
 | 
				
			||||||
 | 
					decomposition, and translates each character into its decomposed form.
 | 
				
			||||||
 | 
					Normal form C (NFC) first applies a canonical decomposition, then
 | 
				
			||||||
 | 
					composes pre-combined characters again.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In addition to these two forms, there two additional normal forms
 | 
				
			||||||
 | 
					based on compatibility equivalence. In Unicode, certain characters are
 | 
				
			||||||
 | 
					supported which normally would be unified with other characters. For
 | 
				
			||||||
 | 
					example, U+2160 (ROMAN NUMERAL ONE) is really the same thing as U+0049
 | 
				
			||||||
 | 
					(LATIN CAPITAL LETTER I). However, it is supported in Unicode for
 | 
				
			||||||
 | 
					compatibility with existing character sets (e.g. gb2312).
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The normal form KD (NFKD) will apply the compatibility decomposition,
 | 
				
			||||||
 | 
					i.e. replace all compatibility characters with their equivalents. The
 | 
				
			||||||
 | 
					normal form KC (NFKC) first applies the compatibility decomposition,
 | 
				
			||||||
 | 
					followed by the canonical composition.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\versionadded{2.3}
 | 
				
			||||||
 | 
					\end{funcdesc}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
							
								
								
									
										68
									
								
								Lib/test/test_normalization.py
									
										
									
									
									
										Normal file
									
								
							
							
						
						
									
										68
									
								
								Lib/test/test_normalization.py
									
										
									
									
									
										Normal file
									
								
							| 
						 | 
					@ -0,0 +1,68 @@
 | 
				
			||||||
 | 
					from test.test_support import verbose, TestFailed, TestSkipped, verify
 | 
				
			||||||
 | 
					import sys
 | 
				
			||||||
 | 
					from unicodedata import normalize
 | 
				
			||||||
 | 
					try:
 | 
				
			||||||
 | 
					    data = open("NormalizationTest.txt","r").readlines()
 | 
				
			||||||
 | 
					except IOError:
 | 
				
			||||||
 | 
					    raise TestSkipped("NormalizationTest.txt not found, download from http://www.unicode.org/Public/UNIDATA/NormalizationTest.txt")
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class RangeError:
 | 
				
			||||||
 | 
					    pass
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def NFC(str):
 | 
				
			||||||
 | 
					    return normalize("NFC", str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def NFKC(str):
 | 
				
			||||||
 | 
					    return normalize("NFKC", str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def NFD(str):
 | 
				
			||||||
 | 
					    return normalize("NFD", str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def NFKD(str):
 | 
				
			||||||
 | 
					    return normalize("NFKD", str)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					def unistr(data):
 | 
				
			||||||
 | 
					    data = [int(x, 16) for x in data.split(" ")]
 | 
				
			||||||
 | 
					    for x in data:
 | 
				
			||||||
 | 
					        if x > sys.maxunicode:
 | 
				
			||||||
 | 
					            raise RangeError
 | 
				
			||||||
 | 
					    return u"".join([unichr(x) for x in data])
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					part1_data = {}
 | 
				
			||||||
 | 
					for line in data:
 | 
				
			||||||
 | 
					    if '#' in line:
 | 
				
			||||||
 | 
					        line = line.split('#')[0]
 | 
				
			||||||
 | 
					    line = line.strip()
 | 
				
			||||||
 | 
					    if not line:
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					    if line.startswith("@Part"):
 | 
				
			||||||
 | 
					        part = line
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					    try:
 | 
				
			||||||
 | 
					        c1,c2,c3,c4,c5 = [unistr(x) for x in line.split(';')[:-1]]
 | 
				
			||||||
 | 
					    except RangeError:
 | 
				
			||||||
 | 
					        # Skip unsupported characters
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if verbose:
 | 
				
			||||||
 | 
					        print line
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
 | 
					    # Perform tests
 | 
				
			||||||
 | 
					    verify(c2 ==  NFC(c1) ==  NFC(c2) ==  NFC(c3), line)
 | 
				
			||||||
 | 
					    verify(c4 ==  NFC(c4) ==  NFC(c5), line)
 | 
				
			||||||
 | 
					    verify(c3 ==  NFD(c1) ==  NFD(c2) ==  NFD(c3), line)
 | 
				
			||||||
 | 
					    verify(c5 ==  NFD(c4) ==  NFD(c5), line)
 | 
				
			||||||
 | 
					    verify(c4 == NFKC(c1) == NFKC(c2) == NFKC(c3) == NFKC(c4) == NFKC(c5), line)
 | 
				
			||||||
 | 
					    verify(c5 == NFKD(c1) == NFKD(c2) == NFKD(c3) == NFKD(c4) == NFKD(c5), line)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    # Record part 1 data
 | 
				
			||||||
 | 
					    if part == "@Part1":
 | 
				
			||||||
 | 
					        part1_data[c1] = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					# Perform tests for all other data
 | 
				
			||||||
 | 
					for c in range(sys.maxunicode+1):
 | 
				
			||||||
 | 
					    X = unichr(c)
 | 
				
			||||||
 | 
					    if X in part1_data:
 | 
				
			||||||
 | 
					        continue
 | 
				
			||||||
 | 
					    assert X == NFC(X) == NFD(X) == NFKC(X) == NFKD(X), c
 | 
				
			||||||
 | 
					    
 | 
				
			||||||
| 
						 | 
					@ -317,8 +317,8 @@ Extension modules
 | 
				
			||||||
  available in source code, but not built automatically anymore, and
 | 
					  available in source code, but not built automatically anymore, and
 | 
				
			||||||
  is now named bsddb185.
 | 
					  is now named bsddb185.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- unicodedata was updated to Unicode 3.2. In now also supports names
 | 
					- unicodedata was updated to Unicode 3.2. It supports normalization
 | 
				
			||||||
  for Hangul syllables and CJK unified ideographs.
 | 
					  and names for Hangul syllables and CJK unified ideographs.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
- resource.getrlimit() now returns longs instead of ints.
 | 
					- resource.getrlimit() now returns longs instead of ints.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -30,13 +30,9 @@ typedef struct {
 | 
				
			||||||
#include "unicodedata_db.h"
 | 
					#include "unicodedata_db.h"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static const _PyUnicode_DatabaseRecord*
 | 
					static const _PyUnicode_DatabaseRecord*
 | 
				
			||||||
_getrecord(PyUnicodeObject* v)
 | 
					_getrecord_ex(Py_UCS4 code)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    int code;
 | 
					 | 
				
			||||||
    int index;
 | 
					    int index;
 | 
				
			||||||
 | 
					 | 
				
			||||||
    code = (int) *PyUnicode_AS_UNICODE(v);
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
    if (code < 0 || code >= 0x110000)
 | 
					    if (code < 0 || code >= 0x110000)
 | 
				
			||||||
        index = 0;
 | 
					        index = 0;
 | 
				
			||||||
    else {
 | 
					    else {
 | 
				
			||||||
| 
						 | 
					@ -47,6 +43,12 @@ _getrecord(PyUnicodeObject* v)
 | 
				
			||||||
    return &_PyUnicode_Database_Records[index];
 | 
					    return &_PyUnicode_Database_Records[index];
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static const _PyUnicode_DatabaseRecord*
 | 
				
			||||||
 | 
					_getrecord(PyUnicodeObject* v)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    return _getrecord_ex(*PyUnicode_AS_UNICODE(v));
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* --- Module API --------------------------------------------------------- */
 | 
					/* --- Module API --------------------------------------------------------- */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
static PyObject *
 | 
					static PyObject *
 | 
				
			||||||
| 
						 | 
					@ -253,6 +255,276 @@ unicodedata_decomposition(PyObject *self, PyObject *args)
 | 
				
			||||||
    return PyString_FromString(decomp);
 | 
					    return PyString_FromString(decomp);
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					void
 | 
				
			||||||
 | 
					get_decomp_record(Py_UCS4 code, int *index, int *prefix, int *count)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    if (code < 0 || code >= 0x110000) {
 | 
				
			||||||
 | 
					        *index = 0;
 | 
				
			||||||
 | 
					    } 
 | 
				
			||||||
 | 
					    else {
 | 
				
			||||||
 | 
					        *index = decomp_index1[(code>>DECOMP_SHIFT)];
 | 
				
			||||||
 | 
					        *index = decomp_index2[(*index<<DECOMP_SHIFT)+
 | 
				
			||||||
 | 
					                               (code&((1<<DECOMP_SHIFT)-1))];
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					    /* high byte is number of hex bytes (usually one or two), low byte
 | 
				
			||||||
 | 
					       is prefix code (from*/
 | 
				
			||||||
 | 
					    *count = decomp_data[*index] >> 8;
 | 
				
			||||||
 | 
					    *prefix = decomp_data[*index] & 255;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    (*index)++;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					#define SBase   0xAC00
 | 
				
			||||||
 | 
					#define LBase   0x1100
 | 
				
			||||||
 | 
					#define VBase   0x1161
 | 
				
			||||||
 | 
					#define TBase   0x11A7
 | 
				
			||||||
 | 
					#define LCount  19
 | 
				
			||||||
 | 
					#define VCount  21
 | 
				
			||||||
 | 
					#define TCount  28
 | 
				
			||||||
 | 
					#define NCount  (VCount*TCount)
 | 
				
			||||||
 | 
					#define SCount  (LCount*NCount)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static PyObject*
 | 
				
			||||||
 | 
					nfd_nfkd(PyObject *input, int k)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    PyObject *result;
 | 
				
			||||||
 | 
					    Py_UNICODE *i, *end, *o;
 | 
				
			||||||
 | 
					    /* Longest decomposition in Unicode 3.2: U+FDFA */
 | 
				
			||||||
 | 
					    Py_UNICODE stack[20]; 
 | 
				
			||||||
 | 
					    int space, stackptr, isize;
 | 
				
			||||||
 | 
					    int index, prefix, count;
 | 
				
			||||||
 | 
					    unsigned char prev, cur;
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					    stackptr = 0;
 | 
				
			||||||
 | 
					    isize = PyUnicode_GET_SIZE(input);
 | 
				
			||||||
 | 
					    /* Overallocate atmost 10 characters. */
 | 
				
			||||||
 | 
					    space = (isize > 10 ? 10 : isize) + isize;
 | 
				
			||||||
 | 
					    result = PyUnicode_FromUnicode(NULL, space);
 | 
				
			||||||
 | 
					    if (!result)
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					    i = PyUnicode_AS_UNICODE(input);
 | 
				
			||||||
 | 
					    end = i + isize;
 | 
				
			||||||
 | 
					    o = PyUnicode_AS_UNICODE(result);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    while (i < end) {
 | 
				
			||||||
 | 
					        stack[stackptr++] = *i++;
 | 
				
			||||||
 | 
					        while(stackptr) {
 | 
				
			||||||
 | 
					            Py_UNICODE code = stack[--stackptr];
 | 
				
			||||||
 | 
					            if (!space) {
 | 
				
			||||||
 | 
					                space = PyString_GET_SIZE(result) + 10;
 | 
				
			||||||
 | 
					                if (PyUnicode_Resize(&result, space) == -1)
 | 
				
			||||||
 | 
					                    return NULL;
 | 
				
			||||||
 | 
					                o = PyUnicode_AS_UNICODE(result) + space - 10;
 | 
				
			||||||
 | 
					                space = 10;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            /* Hangul Decomposition. */
 | 
				
			||||||
 | 
					            if (SBase <= code && code < (SBase+SCount)) {
 | 
				
			||||||
 | 
					                int SIndex = code - SBase;
 | 
				
			||||||
 | 
					                int L = LBase + SIndex / NCount;
 | 
				
			||||||
 | 
					                int V = VBase + (SIndex % NCount) / TCount;
 | 
				
			||||||
 | 
					                int T = TBase + SIndex % TCount;
 | 
				
			||||||
 | 
					                *o++ = L;
 | 
				
			||||||
 | 
					                *o++ = V;
 | 
				
			||||||
 | 
					                space -= 2;
 | 
				
			||||||
 | 
					                if (T != TBase) {
 | 
				
			||||||
 | 
					                    *o++ = T;
 | 
				
			||||||
 | 
					                    space --;
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            /* Other decompoistions. */
 | 
				
			||||||
 | 
					            get_decomp_record(code, &index, &prefix, &count);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					            /* Copy character if it is not decomposable, or has a
 | 
				
			||||||
 | 
					               compatibility decomposition, but we do NFD. */
 | 
				
			||||||
 | 
					            if (!count || (prefix && !k)) {
 | 
				
			||||||
 | 
					                *o++ = code;
 | 
				
			||||||
 | 
					                space--;
 | 
				
			||||||
 | 
					                continue;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					            /* Copy decomposition onto the stack, in reverse
 | 
				
			||||||
 | 
					               order.  */
 | 
				
			||||||
 | 
					            while(count) {
 | 
				
			||||||
 | 
					                code = decomp_data[index + (--count)];
 | 
				
			||||||
 | 
					                stack[stackptr++] = code;
 | 
				
			||||||
 | 
					            }
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Drop overallocation. Cannot fail. */
 | 
				
			||||||
 | 
					    PyUnicode_Resize(&result, PyUnicode_GET_SIZE(result) - space);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* Sort canonically. */
 | 
				
			||||||
 | 
					    i = PyUnicode_AS_UNICODE(result);
 | 
				
			||||||
 | 
					    prev = _getrecord_ex(*i)->combining;
 | 
				
			||||||
 | 
					    end = i + PyUnicode_GET_SIZE(result);
 | 
				
			||||||
 | 
					    for (i++; i < end; i++) {
 | 
				
			||||||
 | 
					        cur = _getrecord_ex(*i)->combining;
 | 
				
			||||||
 | 
					        if (prev == 0 || cur == 0 || prev <= cur) {
 | 
				
			||||||
 | 
					            prev = cur;
 | 
				
			||||||
 | 
					            continue;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        /* Non-canonical order. Need to switch *i with previous. */
 | 
				
			||||||
 | 
					        o = i - 1;
 | 
				
			||||||
 | 
					        while (1) {
 | 
				
			||||||
 | 
					            Py_UNICODE tmp = o[1];
 | 
				
			||||||
 | 
					            o[1] = o[0];
 | 
				
			||||||
 | 
					            o[0] = tmp;
 | 
				
			||||||
 | 
					            o--;
 | 
				
			||||||
 | 
					            if (o < PyUnicode_AS_UNICODE(result))
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					            prev = _getrecord_ex(*o)->combining;
 | 
				
			||||||
 | 
					            if (prev == 0 || prev <= cur)
 | 
				
			||||||
 | 
					                break;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					        prev = _getrecord_ex(*i)->combining;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static int
 | 
				
			||||||
 | 
					find_nfc_index(struct reindex* nfc, Py_UNICODE code)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    int index;
 | 
				
			||||||
 | 
					    for (index = 0; nfc[index].start; index++) {
 | 
				
			||||||
 | 
					        int start = nfc[index].start;
 | 
				
			||||||
 | 
					        if (code < start)
 | 
				
			||||||
 | 
					            return -1;
 | 
				
			||||||
 | 
					        if (code <= start + nfc[index].count) {
 | 
				
			||||||
 | 
					            int delta = code - start;
 | 
				
			||||||
 | 
					            return nfc[index].index + delta;
 | 
				
			||||||
 | 
					        }
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    return -1;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static PyObject*
 | 
				
			||||||
 | 
					nfc_nfkc(PyObject *input, int k)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    PyObject *result;
 | 
				
			||||||
 | 
					    Py_UNICODE *i, *i1, *o, *end;
 | 
				
			||||||
 | 
					    int f,l,index,index1,comb;
 | 
				
			||||||
 | 
					    Py_UNICODE code;
 | 
				
			||||||
 | 
					    Py_UNICODE *skipped[20];
 | 
				
			||||||
 | 
					    int cskipped = 0;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    result = nfd_nfkd(input, k);
 | 
				
			||||||
 | 
					    if (!result)
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    /* We are going to modify result in-place.
 | 
				
			||||||
 | 
					       If nfd_nfkd is changed to sometimes return the input,
 | 
				
			||||||
 | 
					       this code needs to be reviewed. */
 | 
				
			||||||
 | 
					    assert(result != input);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    i = PyUnicode_AS_UNICODE(result);
 | 
				
			||||||
 | 
					    end = i + PyUnicode_GET_SIZE(result);
 | 
				
			||||||
 | 
					    o = PyUnicode_AS_UNICODE(result);
 | 
				
			||||||
 | 
						
 | 
				
			||||||
 | 
					  again:
 | 
				
			||||||
 | 
					    while (i < end) {
 | 
				
			||||||
 | 
					      for (index = 0; index < cskipped; index++) {
 | 
				
			||||||
 | 
					          if (skipped[index] == i) {
 | 
				
			||||||
 | 
					              /* *i character is skipped. 
 | 
				
			||||||
 | 
					                 Remove from list. */
 | 
				
			||||||
 | 
					              skipped[index] = skipped[cskipped-1];
 | 
				
			||||||
 | 
					              cskipped--;
 | 
				
			||||||
 | 
					              i++;
 | 
				
			||||||
 | 
					              goto again; // continue while
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      /* Hangul Composition. We don't need to check for <LV,T>
 | 
				
			||||||
 | 
					         pairs, since we always have decomposed data. */
 | 
				
			||||||
 | 
					      if (LBase <= *i && *i < (LBase+LCount) &&
 | 
				
			||||||
 | 
					          i + 1 < end && 
 | 
				
			||||||
 | 
					          VBase <= i[1] && i[1] <= (VBase+VCount)) {
 | 
				
			||||||
 | 
					          int LIndex, VIndex;
 | 
				
			||||||
 | 
					          LIndex = i[0] - LBase;
 | 
				
			||||||
 | 
					          VIndex = i[1] - VBase;
 | 
				
			||||||
 | 
					          code = SBase + (LIndex*VCount+VIndex)*TCount;
 | 
				
			||||||
 | 
					          i+=2;
 | 
				
			||||||
 | 
					          if (i < end &&
 | 
				
			||||||
 | 
					              TBase <= *i && *i <= (TBase+TCount)) {
 | 
				
			||||||
 | 
					              code += *i-TBase;
 | 
				
			||||||
 | 
					              i++;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          *o++ = code;
 | 
				
			||||||
 | 
					          continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					      f = find_nfc_index(nfc_first, *i);
 | 
				
			||||||
 | 
					      if (f == -1) {
 | 
				
			||||||
 | 
					          *o++ = *i++;
 | 
				
			||||||
 | 
					          continue;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      /* Find next unblocked character. */
 | 
				
			||||||
 | 
					      i1 = i+1;
 | 
				
			||||||
 | 
					      comb = 0;
 | 
				
			||||||
 | 
					      while (i1 < end) {
 | 
				
			||||||
 | 
					          int comb1 = _getrecord_ex(*i1)->combining;
 | 
				
			||||||
 | 
					          if (comb1 && comb == comb1) {
 | 
				
			||||||
 | 
					              /* Character is blocked. */
 | 
				
			||||||
 | 
					              i1++;
 | 
				
			||||||
 | 
					              continue;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          l = find_nfc_index(nfc_last, *i1);
 | 
				
			||||||
 | 
					          /* *i1 cannot be combined with *i. If *i1
 | 
				
			||||||
 | 
					             is a starter, we don't need to look further.
 | 
				
			||||||
 | 
					             Otherwise, record the combining class. */
 | 
				
			||||||
 | 
					          if (l == -1) {
 | 
				
			||||||
 | 
					            not_combinable:
 | 
				
			||||||
 | 
					              if (comb1 == 0)
 | 
				
			||||||
 | 
					                  break;
 | 
				
			||||||
 | 
					              comb = comb1;
 | 
				
			||||||
 | 
					              i1++;
 | 
				
			||||||
 | 
					              continue;
 | 
				
			||||||
 | 
					          }
 | 
				
			||||||
 | 
					          index = f*TOTAL_LAST + l;
 | 
				
			||||||
 | 
					          index1 = comp_index[index >> COMP_SHIFT];
 | 
				
			||||||
 | 
					          code = comp_data[(index1<<COMP_SHIFT)+
 | 
				
			||||||
 | 
					                           (index&((1<<COMP_SHIFT)-1))];
 | 
				
			||||||
 | 
					          if (code == 0)
 | 
				
			||||||
 | 
					              goto not_combinable;
 | 
				
			||||||
 | 
								
 | 
				
			||||||
 | 
					          /* Replace the original character. */
 | 
				
			||||||
 | 
					          *i = code;
 | 
				
			||||||
 | 
					          /* Mark the second character unused. */
 | 
				
			||||||
 | 
					          skipped[cskipped++] = i1;
 | 
				
			||||||
 | 
					          i1++;
 | 
				
			||||||
 | 
					          f = find_nfc_index(nfc_first, *i);
 | 
				
			||||||
 | 
					          if (f == -1)
 | 
				
			||||||
 | 
					              break;
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      *o++ = *i++;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					    if (o != end)
 | 
				
			||||||
 | 
					        PyUnicode_Resize(&result, o - PyUnicode_AS_UNICODE(result));
 | 
				
			||||||
 | 
					    return result;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
							
 | 
				
			||||||
 | 
					static PyObject*
 | 
				
			||||||
 | 
					unicodedata_normalize(PyObject *self, PyObject *args)
 | 
				
			||||||
 | 
					{
 | 
				
			||||||
 | 
					    char *form;
 | 
				
			||||||
 | 
					    PyObject *input;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if(!PyArg_ParseTuple(args, "sO!:normalized",
 | 
				
			||||||
 | 
					                         &form, &PyUnicode_Type, &input))
 | 
				
			||||||
 | 
					        return NULL;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (strcmp(form, "NFC") == 0)
 | 
				
			||||||
 | 
					        return nfc_nfkc(input, 0);
 | 
				
			||||||
 | 
					    if (strcmp(form, "NFKC") == 0)
 | 
				
			||||||
 | 
					        return nfc_nfkc(input, 1);
 | 
				
			||||||
 | 
					    if (strcmp(form, "NFD") == 0)
 | 
				
			||||||
 | 
					        return nfd_nfkd(input, 0);
 | 
				
			||||||
 | 
					    if (strcmp(form, "NFKD") == 0)
 | 
				
			||||||
 | 
					        return nfd_nfkd(input, 1);
 | 
				
			||||||
 | 
					    PyErr_SetString(PyExc_ValueError, "invalid normalization form");
 | 
				
			||||||
 | 
					    return NULL;
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* -------------------------------------------------------------------- */
 | 
					/* -------------------------------------------------------------------- */
 | 
				
			||||||
/* unicode character name tables */
 | 
					/* unicode character name tables */
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -277,16 +549,6 @@ _gethash(const char *s, int len, int scale)
 | 
				
			||||||
    return h;
 | 
					    return h;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
#define SBase   0xAC00
 | 
					 | 
				
			||||||
#define LBase   0x1100
 | 
					 | 
				
			||||||
#define VBase   0x1161
 | 
					 | 
				
			||||||
#define TBase   0x11A7
 | 
					 | 
				
			||||||
#define LCount  19
 | 
					 | 
				
			||||||
#define VCount  21
 | 
					 | 
				
			||||||
#define TCount  28
 | 
					 | 
				
			||||||
#define NCount  (VCount*TCount)
 | 
					 | 
				
			||||||
#define SCount  (LCount*NCount)
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
static char *hangul_syllables[][3] = {
 | 
					static char *hangul_syllables[][3] = {
 | 
				
			||||||
    { "G",  "A",   ""   },
 | 
					    { "G",  "A",   ""   },
 | 
				
			||||||
    { "GG", "AE",  "G"  },
 | 
					    { "GG", "AE",  "G"  },
 | 
				
			||||||
| 
						 | 
					@ -594,6 +856,7 @@ static PyMethodDef unicodedata_functions[] = {
 | 
				
			||||||
    {"decomposition",unicodedata_decomposition, METH_VARARGS},
 | 
					    {"decomposition",unicodedata_decomposition, METH_VARARGS},
 | 
				
			||||||
    {"name", unicodedata_name, METH_VARARGS},
 | 
					    {"name", unicodedata_name, METH_VARARGS},
 | 
				
			||||||
    {"lookup", unicodedata_lookup, METH_VARARGS},
 | 
					    {"lookup", unicodedata_lookup, METH_VARARGS},
 | 
				
			||||||
 | 
					    {"normalize", unicodedata_normalize, METH_VARARGS},
 | 
				
			||||||
    {NULL, NULL}		/* sentinel */
 | 
					    {NULL, NULL}		/* sentinel */
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -618,5 +881,6 @@ initunicodedata(void)
 | 
				
			||||||
/* 
 | 
					/* 
 | 
				
			||||||
Local variables:
 | 
					Local variables:
 | 
				
			||||||
c-basic-offset: 4
 | 
					c-basic-offset: 4
 | 
				
			||||||
 | 
					indent-tabs-mode: nil
 | 
				
			||||||
End:
 | 
					End:
 | 
				
			||||||
*/
 | 
					*/
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -127,6 +127,241 @@ const _PyUnicode_DatabaseRecord _PyUnicode_Database_Records[] = {
 | 
				
			||||||
    {27, 0, 1, 0},
 | 
					    {27, 0, 1, 0},
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* Reindexing of NFC first characters. */
 | 
				
			||||||
 | 
					#define TOTAL_FIRST 356
 | 
				
			||||||
 | 
					#define TOTAL_LAST 53
 | 
				
			||||||
 | 
					struct reindex{int start;short count,index;};
 | 
				
			||||||
 | 
					struct reindex nfc_first[] = {
 | 
				
			||||||
 | 
					  { 60, 2, 0},
 | 
				
			||||||
 | 
					  { 65, 15, 3},
 | 
				
			||||||
 | 
					  { 82, 8, 19},
 | 
				
			||||||
 | 
					  { 97, 15, 28},
 | 
				
			||||||
 | 
					  { 114, 8, 44},
 | 
				
			||||||
 | 
					  { 168, 0, 53},
 | 
				
			||||||
 | 
					  { 194, 0, 54},
 | 
				
			||||||
 | 
					  { 196, 3, 55},
 | 
				
			||||||
 | 
					  { 202, 0, 59},
 | 
				
			||||||
 | 
					  { 207, 0, 60},
 | 
				
			||||||
 | 
					  { 212, 2, 61},
 | 
				
			||||||
 | 
					  { 216, 0, 64},
 | 
				
			||||||
 | 
					  { 220, 0, 65},
 | 
				
			||||||
 | 
					  { 226, 0, 66},
 | 
				
			||||||
 | 
					  { 228, 3, 67},
 | 
				
			||||||
 | 
					  { 234, 0, 71},
 | 
				
			||||||
 | 
					  { 239, 0, 72},
 | 
				
			||||||
 | 
					  { 244, 2, 73},
 | 
				
			||||||
 | 
					  { 248, 0, 76},
 | 
				
			||||||
 | 
					  { 252, 0, 77},
 | 
				
			||||||
 | 
					  { 258, 1, 78},
 | 
				
			||||||
 | 
					  { 274, 1, 80},
 | 
				
			||||||
 | 
					  { 332, 1, 82},
 | 
				
			||||||
 | 
					  { 346, 1, 84},
 | 
				
			||||||
 | 
					  { 352, 1, 86},
 | 
				
			||||||
 | 
					  { 360, 3, 88},
 | 
				
			||||||
 | 
					  { 383, 0, 92},
 | 
				
			||||||
 | 
					  { 416, 1, 93},
 | 
				
			||||||
 | 
					  { 431, 1, 95},
 | 
				
			||||||
 | 
					  { 439, 0, 97},
 | 
				
			||||||
 | 
					  { 490, 1, 98},
 | 
				
			||||||
 | 
					  { 550, 3, 100},
 | 
				
			||||||
 | 
					  { 558, 1, 104},
 | 
				
			||||||
 | 
					  { 658, 0, 106},
 | 
				
			||||||
 | 
					  { 913, 0, 107},
 | 
				
			||||||
 | 
					  { 917, 0, 108},
 | 
				
			||||||
 | 
					  { 919, 0, 109},
 | 
				
			||||||
 | 
					  { 921, 0, 110},
 | 
				
			||||||
 | 
					  { 927, 0, 111},
 | 
				
			||||||
 | 
					  { 929, 0, 112},
 | 
				
			||||||
 | 
					  { 933, 0, 113},
 | 
				
			||||||
 | 
					  { 937, 0, 114},
 | 
				
			||||||
 | 
					  { 940, 0, 115},
 | 
				
			||||||
 | 
					  { 942, 0, 116},
 | 
				
			||||||
 | 
					  { 945, 0, 117},
 | 
				
			||||||
 | 
					  { 949, 0, 118},
 | 
				
			||||||
 | 
					  { 951, 0, 119},
 | 
				
			||||||
 | 
					  { 953, 0, 120},
 | 
				
			||||||
 | 
					  { 959, 0, 121},
 | 
				
			||||||
 | 
					  { 961, 0, 122},
 | 
				
			||||||
 | 
					  { 965, 0, 123},
 | 
				
			||||||
 | 
					  { 969, 2, 124},
 | 
				
			||||||
 | 
					  { 974, 0, 127},
 | 
				
			||||||
 | 
					  { 978, 0, 128},
 | 
				
			||||||
 | 
					  { 1030, 0, 129},
 | 
				
			||||||
 | 
					  { 1040, 0, 130},
 | 
				
			||||||
 | 
					  { 1043, 0, 131},
 | 
				
			||||||
 | 
					  { 1045, 3, 132},
 | 
				
			||||||
 | 
					  { 1050, 0, 136},
 | 
				
			||||||
 | 
					  { 1054, 0, 137},
 | 
				
			||||||
 | 
					  { 1059, 0, 138},
 | 
				
			||||||
 | 
					  { 1063, 0, 139},
 | 
				
			||||||
 | 
					  { 1067, 0, 140},
 | 
				
			||||||
 | 
					  { 1069, 0, 141},
 | 
				
			||||||
 | 
					  { 1072, 0, 142},
 | 
				
			||||||
 | 
					  { 1075, 0, 143},
 | 
				
			||||||
 | 
					  { 1077, 3, 144},
 | 
				
			||||||
 | 
					  { 1082, 0, 148},
 | 
				
			||||||
 | 
					  { 1086, 0, 149},
 | 
				
			||||||
 | 
					  { 1091, 0, 150},
 | 
				
			||||||
 | 
					  { 1095, 0, 151},
 | 
				
			||||||
 | 
					  { 1099, 0, 152},
 | 
				
			||||||
 | 
					  { 1101, 0, 153},
 | 
				
			||||||
 | 
					  { 1110, 0, 154},
 | 
				
			||||||
 | 
					  { 1140, 1, 155},
 | 
				
			||||||
 | 
					  { 1240, 1, 157},
 | 
				
			||||||
 | 
					  { 1256, 1, 159},
 | 
				
			||||||
 | 
					  { 1575, 0, 161},
 | 
				
			||||||
 | 
					  { 1608, 0, 162},
 | 
				
			||||||
 | 
					  { 1610, 0, 163},
 | 
				
			||||||
 | 
					  { 1729, 0, 164},
 | 
				
			||||||
 | 
					  { 1746, 0, 165},
 | 
				
			||||||
 | 
					  { 1749, 0, 166},
 | 
				
			||||||
 | 
					  { 2344, 0, 167},
 | 
				
			||||||
 | 
					  { 2352, 0, 168},
 | 
				
			||||||
 | 
					  { 2355, 0, 169},
 | 
				
			||||||
 | 
					  { 2503, 0, 170},
 | 
				
			||||||
 | 
					  { 2887, 0, 171},
 | 
				
			||||||
 | 
					  { 2962, 0, 172},
 | 
				
			||||||
 | 
					  { 3014, 1, 173},
 | 
				
			||||||
 | 
					  { 3142, 0, 175},
 | 
				
			||||||
 | 
					  { 3263, 0, 176},
 | 
				
			||||||
 | 
					  { 3270, 0, 177},
 | 
				
			||||||
 | 
					  { 3274, 0, 178},
 | 
				
			||||||
 | 
					  { 3398, 1, 179},
 | 
				
			||||||
 | 
					  { 3545, 0, 181},
 | 
				
			||||||
 | 
					  { 3548, 0, 182},
 | 
				
			||||||
 | 
					  { 4133, 0, 183},
 | 
				
			||||||
 | 
					  { 7734, 1, 184},
 | 
				
			||||||
 | 
					  { 7770, 1, 186},
 | 
				
			||||||
 | 
					  { 7778, 1, 188},
 | 
				
			||||||
 | 
					  { 7840, 1, 190},
 | 
				
			||||||
 | 
					  { 7864, 1, 192},
 | 
				
			||||||
 | 
					  { 7884, 1, 194},
 | 
				
			||||||
 | 
					  { 7936, 17, 196},
 | 
				
			||||||
 | 
					  { 7960, 1, 214},
 | 
				
			||||||
 | 
					  { 7968, 17, 216},
 | 
				
			||||||
 | 
					  { 7992, 1, 234},
 | 
				
			||||||
 | 
					  { 8000, 1, 236},
 | 
				
			||||||
 | 
					  { 8008, 1, 238},
 | 
				
			||||||
 | 
					  { 8016, 1, 240},
 | 
				
			||||||
 | 
					  { 8025, 0, 242},
 | 
				
			||||||
 | 
					  { 8032, 16, 243},
 | 
				
			||||||
 | 
					  { 8052, 0, 260},
 | 
				
			||||||
 | 
					  { 8060, 0, 261},
 | 
				
			||||||
 | 
					  { 8118, 0, 262},
 | 
				
			||||||
 | 
					  { 8127, 0, 263},
 | 
				
			||||||
 | 
					  { 8134, 0, 264},
 | 
				
			||||||
 | 
					  { 8182, 0, 265},
 | 
				
			||||||
 | 
					  { 8190, 0, 266},
 | 
				
			||||||
 | 
					  { 8592, 0, 267},
 | 
				
			||||||
 | 
					  { 8594, 0, 268},
 | 
				
			||||||
 | 
					  { 8596, 0, 269},
 | 
				
			||||||
 | 
					  { 8656, 0, 270},
 | 
				
			||||||
 | 
					  { 8658, 0, 271},
 | 
				
			||||||
 | 
					  { 8660, 0, 272},
 | 
				
			||||||
 | 
					  { 8707, 0, 273},
 | 
				
			||||||
 | 
					  { 8712, 0, 274},
 | 
				
			||||||
 | 
					  { 8715, 0, 275},
 | 
				
			||||||
 | 
					  { 8739, 0, 276},
 | 
				
			||||||
 | 
					  { 8741, 0, 277},
 | 
				
			||||||
 | 
					  { 8764, 0, 278},
 | 
				
			||||||
 | 
					  { 8771, 0, 279},
 | 
				
			||||||
 | 
					  { 8773, 0, 280},
 | 
				
			||||||
 | 
					  { 8776, 0, 281},
 | 
				
			||||||
 | 
					  { 8781, 0, 282},
 | 
				
			||||||
 | 
					  { 8801, 0, 283},
 | 
				
			||||||
 | 
					  { 8804, 1, 284},
 | 
				
			||||||
 | 
					  { 8818, 1, 286},
 | 
				
			||||||
 | 
					  { 8822, 1, 288},
 | 
				
			||||||
 | 
					  { 8826, 3, 290},
 | 
				
			||||||
 | 
					  { 8834, 1, 294},
 | 
				
			||||||
 | 
					  { 8838, 1, 296},
 | 
				
			||||||
 | 
					  { 8849, 1, 298},
 | 
				
			||||||
 | 
					  { 8866, 0, 300},
 | 
				
			||||||
 | 
					  { 8872, 1, 301},
 | 
				
			||||||
 | 
					  { 8875, 0, 303},
 | 
				
			||||||
 | 
					  { 8882, 3, 304},
 | 
				
			||||||
 | 
					  { 12358, 0, 308},
 | 
				
			||||||
 | 
					  { 12363, 0, 309},
 | 
				
			||||||
 | 
					  { 12365, 0, 310},
 | 
				
			||||||
 | 
					  { 12367, 0, 311},
 | 
				
			||||||
 | 
					  { 12369, 0, 312},
 | 
				
			||||||
 | 
					  { 12371, 0, 313},
 | 
				
			||||||
 | 
					  { 12373, 0, 314},
 | 
				
			||||||
 | 
					  { 12375, 0, 315},
 | 
				
			||||||
 | 
					  { 12377, 0, 316},
 | 
				
			||||||
 | 
					  { 12379, 0, 317},
 | 
				
			||||||
 | 
					  { 12381, 0, 318},
 | 
				
			||||||
 | 
					  { 12383, 0, 319},
 | 
				
			||||||
 | 
					  { 12385, 0, 320},
 | 
				
			||||||
 | 
					  { 12388, 0, 321},
 | 
				
			||||||
 | 
					  { 12390, 0, 322},
 | 
				
			||||||
 | 
					  { 12392, 0, 323},
 | 
				
			||||||
 | 
					  { 12399, 0, 324},
 | 
				
			||||||
 | 
					  { 12402, 0, 325},
 | 
				
			||||||
 | 
					  { 12405, 0, 326},
 | 
				
			||||||
 | 
					  { 12408, 0, 327},
 | 
				
			||||||
 | 
					  { 12411, 0, 328},
 | 
				
			||||||
 | 
					  { 12445, 0, 329},
 | 
				
			||||||
 | 
					  { 12454, 0, 330},
 | 
				
			||||||
 | 
					  { 12459, 0, 331},
 | 
				
			||||||
 | 
					  { 12461, 0, 332},
 | 
				
			||||||
 | 
					  { 12463, 0, 333},
 | 
				
			||||||
 | 
					  { 12465, 0, 334},
 | 
				
			||||||
 | 
					  { 12467, 0, 335},
 | 
				
			||||||
 | 
					  { 12469, 0, 336},
 | 
				
			||||||
 | 
					  { 12471, 0, 337},
 | 
				
			||||||
 | 
					  { 12473, 0, 338},
 | 
				
			||||||
 | 
					  { 12475, 0, 339},
 | 
				
			||||||
 | 
					  { 12477, 0, 340},
 | 
				
			||||||
 | 
					  { 12479, 0, 341},
 | 
				
			||||||
 | 
					  { 12481, 0, 342},
 | 
				
			||||||
 | 
					  { 12484, 0, 343},
 | 
				
			||||||
 | 
					  { 12486, 0, 344},
 | 
				
			||||||
 | 
					  { 12488, 0, 345},
 | 
				
			||||||
 | 
					  { 12495, 0, 346},
 | 
				
			||||||
 | 
					  { 12498, 0, 347},
 | 
				
			||||||
 | 
					  { 12501, 0, 348},
 | 
				
			||||||
 | 
					  { 12504, 0, 349},
 | 
				
			||||||
 | 
					  { 12507, 0, 350},
 | 
				
			||||||
 | 
					  { 12527, 3, 351},
 | 
				
			||||||
 | 
					  { 12541, 0, 355},
 | 
				
			||||||
 | 
					  {0,0,0}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct reindex nfc_last[] = {
 | 
				
			||||||
 | 
					  { 768, 4, 0},
 | 
				
			||||||
 | 
					  { 774, 6, 5},
 | 
				
			||||||
 | 
					  { 783, 0, 12},
 | 
				
			||||||
 | 
					  { 785, 0, 13},
 | 
				
			||||||
 | 
					  { 787, 1, 14},
 | 
				
			||||||
 | 
					  { 795, 0, 16},
 | 
				
			||||||
 | 
					  { 803, 5, 17},
 | 
				
			||||||
 | 
					  { 813, 1, 23},
 | 
				
			||||||
 | 
					  { 816, 1, 25},
 | 
				
			||||||
 | 
					  { 824, 0, 27},
 | 
				
			||||||
 | 
					  { 834, 0, 28},
 | 
				
			||||||
 | 
					  { 837, 0, 29},
 | 
				
			||||||
 | 
					  { 1619, 2, 30},
 | 
				
			||||||
 | 
					  { 2364, 0, 33},
 | 
				
			||||||
 | 
					  { 2494, 0, 34},
 | 
				
			||||||
 | 
					  { 2519, 0, 35},
 | 
				
			||||||
 | 
					  { 2878, 0, 36},
 | 
				
			||||||
 | 
					  { 2902, 1, 37},
 | 
				
			||||||
 | 
					  { 3006, 0, 39},
 | 
				
			||||||
 | 
					  { 3031, 0, 40},
 | 
				
			||||||
 | 
					  { 3158, 0, 41},
 | 
				
			||||||
 | 
					  { 3266, 0, 42},
 | 
				
			||||||
 | 
					  { 3285, 1, 43},
 | 
				
			||||||
 | 
					  { 3390, 0, 45},
 | 
				
			||||||
 | 
					  { 3415, 0, 46},
 | 
				
			||||||
 | 
					  { 3530, 0, 47},
 | 
				
			||||||
 | 
					  { 3535, 0, 48},
 | 
				
			||||||
 | 
					  { 3551, 0, 49},
 | 
				
			||||||
 | 
					  { 4142, 0, 50},
 | 
				
			||||||
 | 
					  { 12441, 1, 51},
 | 
				
			||||||
 | 
					  {0,0,0}
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
/* string literals */
 | 
					/* string literals */
 | 
				
			||||||
const char *_PyUnicode_CategoryNames[] = {
 | 
					const char *_PyUnicode_CategoryNames[] = {
 | 
				
			||||||
    "Cn",
 | 
					    "Cn",
 | 
				
			||||||
| 
						 | 
					@ -3209,3 +3444,345 @@ static unsigned short decomp_index2[] = {
 | 
				
			||||||
    0, 0, 
 | 
					    0, 0, 
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					/* NFC pairs */
 | 
				
			||||||
 | 
					#define COMP_SHIFT 3
 | 
				
			||||||
 | 
					static unsigned short comp_index[] = {
 | 
				
			||||||
 | 
					    0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 3, 0, 0, 4, 5, 6, 7, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 8, 9, 10, 0, 0, 0, 11, 12, 13, 0, 0, 0, 0, 14, 15, 16, 17, 0, 0, 
 | 
				
			||||||
 | 
					    18, 19, 20, 21, 0, 0, 0, 22, 0, 0, 0, 0, 0, 23, 24, 25, 26, 0, 0, 0, 27, 
 | 
				
			||||||
 | 
					    28, 29, 30, 0, 0, 31, 32, 33, 34, 35, 0, 0, 36, 0, 0, 0, 0, 0, 0, 37, 38, 
 | 
				
			||||||
 | 
					    39, 40, 0, 0, 41, 0, 42, 43, 44, 0, 0, 45, 46, 47, 0, 0, 0, 0, 48, 49, 
 | 
				
			||||||
 | 
					    50, 51, 0, 0, 52, 53, 54, 55, 0, 0, 0, 56, 57, 0, 0, 0, 0, 0, 58, 59, 60, 
 | 
				
			||||||
 | 
					    61, 0, 0, 62, 63, 64, 65, 0, 0, 0, 66, 67, 68, 69, 0, 0, 70, 71, 72, 73, 
 | 
				
			||||||
 | 
					    0, 0, 0, 74, 0, 75, 0, 0, 0, 0, 76, 0, 77, 0, 0, 0, 0, 78, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    79, 80, 81, 0, 0, 0, 0, 82, 83, 84, 85, 0, 0, 86, 87, 88, 89, 0, 0, 0, 
 | 
				
			||||||
 | 
					    90, 0, 91, 92, 0, 0, 93, 94, 95, 96, 0, 0, 0, 0, 97, 98, 99, 0, 0, 0, 
 | 
				
			||||||
 | 
					    100, 101, 102, 103, 0, 0, 0, 104, 0, 0, 0, 0, 0, 105, 106, 107, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 108, 109, 110, 111, 0, 0, 112, 113, 114, 115, 0, 0, 0, 116, 117, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 118, 0, 119, 120, 121, 0, 0, 122, 123, 124, 125, 0, 0, 0, 126, 0, 
 | 
				
			||||||
 | 
					    127, 0, 0, 0, 128, 129, 130, 131, 0, 0, 0, 132, 133, 134, 135, 0, 0, 0, 
 | 
				
			||||||
 | 
					    136, 0, 0, 0, 0, 0, 137, 138, 139, 140, 0, 0, 0, 141, 142, 143, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 144, 145, 146, 147, 0, 0, 148, 149, 150, 151, 0, 0, 0, 152, 0, 153, 0, 
 | 
				
			||||||
 | 
					    0, 0, 154, 155, 156, 0, 0, 0, 0, 0, 157, 0, 0, 0, 0, 158, 159, 160, 161, 
 | 
				
			||||||
 | 
					    0, 0, 0, 162, 163, 164, 165, 0, 0, 0, 166, 0, 0, 167, 0, 0, 168, 169, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 170, 0, 0, 0, 0, 0, 0, 171, 0, 0, 0, 0, 0, 172, 173, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 174, 0, 0, 0, 0, 0, 175, 176, 0, 0, 0, 0, 0, 177, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    178, 179, 0, 0, 0, 0, 180, 181, 0, 0, 0, 0, 0, 182, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    183, 0, 0, 0, 0, 0, 184, 185, 186, 0, 0, 0, 0, 187, 188, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    189, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 191, 0, 0, 0, 0, 0, 192, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 193, 194, 0, 0, 0, 0, 0, 195, 0, 0, 0, 0, 0, 196, 197, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 198, 199, 0, 0, 0, 0, 0, 200, 0, 0, 0, 0, 0, 201, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 202, 203, 0, 0, 0, 0, 204, 205, 0, 0, 0, 0, 0, 206, 207, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 208, 0, 0, 0, 0, 0, 209, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 211, 
 | 
				
			||||||
 | 
					    212, 0, 0, 0, 0, 0, 0, 213, 0, 0, 0, 0, 0, 214, 0, 0, 0, 0, 0, 0, 215, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 216, 0, 0, 0, 0, 0, 217, 0, 0, 0, 0, 0, 218, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 219, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 221, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    222, 223, 224, 0, 0, 0, 225, 226, 227, 0, 0, 0, 0, 228, 229, 230, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 231, 232, 233, 0, 0, 0, 0, 0, 234, 0, 0, 0, 0, 235, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 236, 0, 0, 0, 0, 0, 0, 237, 0, 0, 0, 0, 0, 238, 0, 0, 0, 0, 0, 0, 239, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 240, 0, 0, 0, 0, 0, 241, 0, 0, 0, 0, 0, 0, 242, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 243, 0, 0, 0, 0, 244, 245, 246, 0, 247, 0, 0, 248, 0, 249, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 250, 251, 252, 253, 0, 0, 254, 255, 256, 0, 0, 0, 0, 257, 0, 
 | 
				
			||||||
 | 
					    258, 0, 0, 0, 0, 0, 259, 0, 0, 0, 0, 260, 261, 262, 0, 0, 0, 0, 263, 0, 
 | 
				
			||||||
 | 
					    264, 265, 0, 0, 0, 0, 0, 0, 266, 0, 0, 0, 0, 0, 0, 267, 0, 0, 268, 269, 
 | 
				
			||||||
 | 
					    270, 271, 0, 0, 272, 0, 273, 0, 0, 0, 0, 274, 0, 275, 276, 277, 0, 0, 
 | 
				
			||||||
 | 
					    278, 279, 0, 280, 0, 0, 281, 0, 282, 0, 0, 0, 0, 0, 0, 283, 0, 0, 0, 284, 
 | 
				
			||||||
 | 
					    285, 286, 0, 287, 0, 0, 288, 0, 289, 0, 290, 0, 0, 291, 0, 0, 292, 0, 0, 
 | 
				
			||||||
 | 
					    293, 0, 0, 0, 294, 0, 0, 0, 0, 0, 0, 295, 0, 0, 296, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    297, 0, 0, 0, 0, 0, 298, 299, 0, 0, 0, 0, 0, 300, 0, 0, 0, 0, 0, 301, 
 | 
				
			||||||
 | 
					    302, 0, 0, 0, 0, 0, 303, 304, 0, 0, 0, 0, 0, 305, 0, 0, 0, 0, 0, 306, 
 | 
				
			||||||
 | 
					    307, 0, 0, 0, 0, 0, 308, 0, 0, 0, 0, 0, 0, 309, 0, 0, 0, 0, 0, 310, 311, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 312, 0, 0, 0, 0, 0, 0, 313, 0, 0, 0, 0, 0, 0, 314, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 315, 0, 0, 0, 0, 0, 316, 0, 0, 0, 0, 0, 0, 317, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 318, 0, 0, 0, 0, 0, 0, 319, 0, 0, 0, 0, 320, 321, 0, 0, 0, 0, 0, 322, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 323, 0, 0, 0, 0, 0, 324, 325, 0, 0, 0, 0, 0, 326, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 327, 0, 0, 0, 0, 0, 0, 328, 0, 0, 0, 0, 0, 0, 329, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 330, 0, 0, 0, 0, 0, 0, 331, 0, 0, 0, 0, 0, 332, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    333, 0, 0, 0, 0, 0, 0, 334, 0, 0, 0, 0, 0, 335, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 336, 0, 0, 0, 0, 0, 0, 337, 0, 0, 0, 0, 0, 338, 0, 0, 0, 0, 0, 0, 339, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 340, 0, 0, 0, 0, 0, 341, 0, 0, 0, 0, 0, 0, 342, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 343, 0, 0, 0, 0, 0, 344, 0, 0, 0, 0, 0, 0, 345, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 346, 0, 0, 0, 0, 0, 0, 347, 0, 0, 0, 0, 0, 0, 348, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    349, 0, 0, 0, 0, 0, 0, 350, 0, 0, 0, 0, 0, 0, 351, 0, 0, 0, 0, 0, 352, 
 | 
				
			||||||
 | 
					    353, 0, 0, 0, 0, 0, 354, 0, 0, 0, 0, 0, 0, 355, 0, 0, 0, 0, 0, 0, 356, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 357, 0, 0, 0, 0, 0, 358, 0, 0, 0, 0, 0, 0, 359, 360, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 361, 0, 0, 0, 0, 0, 362, 0, 0, 0, 0, 0, 0, 363, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 364, 0, 0, 0, 0, 0, 365, 0, 0, 0, 0, 0, 0, 366, 0, 0, 0, 0, 0, 367, 
 | 
				
			||||||
 | 
					    368, 0, 0, 0, 0, 0, 369, 0, 0, 0, 0, 0, 370, 0, 0, 0, 0, 0, 0, 371, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 372, 0, 0, 0, 0, 0, 373, 0, 0, 0, 374, 0, 0, 375, 0, 0, 376, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 377, 0, 0, 0, 0, 0, 0, 378, 0, 0, 0, 0, 0, 379, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 380, 0, 0, 0, 0, 0, 381, 0, 0, 0, 0, 0, 0, 382, 0, 0, 383, 0, 
 | 
				
			||||||
 | 
					    0, 0, 384, 0, 0, 385, 0, 0, 386, 0, 0, 0, 0, 0, 0, 387, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    388, 0, 0, 0, 0, 0, 389, 0, 0, 0, 0, 0, 0, 390, 0, 0, 0, 0, 0, 391, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 392, 0, 0, 393, 0, 0, 0, 0, 0, 0, 394, 0, 0, 0, 0, 0, 395, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 396, 0, 0, 0, 0, 0, 0, 397, 0, 0, 398, 0, 0, 399, 0, 0, 0, 
 | 
				
			||||||
 | 
					    400, 0, 0, 0, 0, 0, 401, 0, 0, 0, 0, 0, 0, 402, 0, 0, 0, 0, 0, 0, 403, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 404, 0, 0, 0, 0, 0, 0, 405, 0, 0, 0, 0, 0, 0, 406, 0, 0, 407, 
 | 
				
			||||||
 | 
					    0, 0, 408, 0, 0, 409, 0, 0, 0, 410, 0, 0, 0, 0, 0, 411, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    412, 0, 0, 0, 0, 0, 0, 413, 0, 0, 0, 0, 0, 414, 0, 0, 0, 0, 0, 0, 415, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 416, 0, 0, 417, 0, 0, 418, 0, 0, 419, 0, 0, 0, 420, 0, 0, 
 | 
				
			||||||
 | 
					    421, 0, 0, 422, 0, 0, 423, 424, 0, 0, 425, 0, 0, 426, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    427, 0, 0, 0, 0, 0, 428, 0, 0, 0, 0, 0, 0, 429, 0, 0, 0, 0, 0, 0, 430, 0, 
 | 
				
			||||||
 | 
					    0, 431, 0, 0, 432, 0, 0, 0, 433, 0, 0, 434, 0, 0, 435, 0, 0, 436, 437, 0, 
 | 
				
			||||||
 | 
					    0, 438, 0, 0, 439, 0, 0, 0, 440, 0, 0, 0, 0, 0, 441, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    442, 0, 0, 0, 0, 0, 0, 443, 0, 0, 0, 0, 0, 444, 0, 0, 0, 0, 0, 0, 445, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 446, 0, 0, 447, 448, 0, 0, 449, 0, 0, 450, 0, 0, 0, 451, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 452, 0, 0, 0, 0, 0, 0, 453, 0, 0, 0, 0, 0, 0, 454, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 455, 0, 0, 0, 0, 0, 0, 456, 0, 0, 0, 0, 0, 457, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    458, 0, 0, 0, 0, 0, 0, 459, 0, 0, 0, 0, 0, 460, 0, 0, 0, 0, 0, 0, 461, 0, 
 | 
				
			||||||
 | 
					    0, 462, 0, 0, 463, 0, 0, 0, 0, 0, 0, 464, 0, 0, 0, 0, 0, 0, 465, 0, 0, 
 | 
				
			||||||
 | 
					    466, 0, 0, 467, 0, 0, 0, 0, 0, 0, 468, 0, 0, 0, 0, 0, 469, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 470, 0, 0, 0, 0, 0, 0, 471, 0, 0, 0, 0, 0, 472, 0, 0, 0, 0, 0, 0, 473, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 474, 0, 0, 0, 0, 0, 475, 0, 0, 0, 0, 0, 0, 476, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 477, 0, 0, 0, 0, 0, 0, 478, 0, 0, 0, 0, 0, 0, 479, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 480, 0, 0, 0, 0, 0, 0, 481, 0, 0, 0, 0, 0, 0, 482, 0, 0, 0, 0, 0, 483, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 484, 0, 0, 0, 0, 0, 485, 0, 0, 0, 0, 0, 0, 486, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 487, 0, 0, 0, 0, 0, 488, 0, 0, 0, 0, 0, 0, 489, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 490, 0, 0, 0, 0, 0, 491, 0, 0, 0, 0, 0, 0, 492, 0, 0, 0, 0, 0, 493, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 494, 0, 0, 0, 0, 0, 0, 495, 0, 0, 0, 0, 0, 496, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 497, 0, 0, 0, 0, 0, 0, 498, 0, 0, 0, 0, 0, 499, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 500, 0, 0, 0, 0, 0, 501, 0, 0, 0, 0, 0, 0, 502, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    503, 0, 0, 0, 0, 0, 504, 0, 0, 0, 0, 0, 0, 505, 0, 0, 0, 0, 0, 0, 506, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 507, 0, 0, 0, 0, 0, 0, 508, 0, 0, 0, 0, 0, 0, 0, 0, 509, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 510, 0, 0, 0, 0, 0, 0, 511, 0, 0, 0, 0, 0, 512, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 513, 0, 0, 0, 0, 0, 0, 514, 0, 0, 0, 0, 0, 515, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 516, 0, 0, 0, 0, 0, 517, 0, 0, 0, 0, 0, 0, 518, 0, 0, 0, 0, 0, 0, 519, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 520, 0, 0, 0, 0, 0, 0, 521, 0, 0, 0, 0, 0, 0, 522, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 523, 0, 0, 0, 0, 0, 0, 524, 0, 0, 0, 0, 0, 525, 526, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 527, 0, 0, 0, 0, 0, 0, 528, 0, 0, 0, 0, 0, 529, 0, 0, 0, 0, 0, 0, 530, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 531, 0, 0, 0, 0, 0, 532, 0, 0, 0, 0, 0, 0, 533, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 534, 0, 0, 0, 0, 0, 0, 535, 0, 0, 0, 0, 0, 0, 536, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 537, 0, 0, 0, 0, 0, 0, 538, 0, 0, 0, 0, 0, 0, 539, 0, 0, 0, 0, 0, 540, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 541, 0, 0, 0, 0, 0, 542, 0, 0, 0, 0, 0, 0, 543, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 544, 0, 0, 0, 0, 0, 545, 0, 0, 0, 0, 0, 0, 546, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 547, 0, 0, 0, 0, 0, 548, 0, 0, 0, 0, 0, 0, 549, 0, 0, 0, 0, 0, 550, 
 | 
				
			||||||
 | 
					    551, 0, 0, 0, 0, 0, 552, 0, 0, 0, 0, 0, 0, 553, 0, 0, 0, 0, 0, 554, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 555, 0, 0, 0, 0, 0, 0, 556, 0, 0, 0, 0, 0, 557, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 558, 
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					static unsigned short comp_data[] = {
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8814, 0, 0, 0, 0, 8800, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 8815, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192, 193, 194, 195, 
 | 
				
			||||||
 | 
					    256, 258, 550, 196, 7842, 197, 0, 461, 512, 514, 0, 0, 0, 7840, 0, 7680, 
 | 
				
			||||||
 | 
					    0, 0, 260, 0, 0, 0, 0, 7682, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7684, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 7686, 0, 0, 0, 262, 264, 0, 0, 0, 266, 0, 0, 0, 0, 268, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 199, 0, 0, 0, 0, 0, 7690, 0, 0, 0, 0, 270, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7692, 0, 0, 0, 7696, 0, 7698, 0, 0, 7694, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 200, 201, 202, 7868, 274, 276, 278, 203, 7866, 0, 0, 282, 516, 
 | 
				
			||||||
 | 
					    518, 0, 0, 0, 7864, 0, 0, 0, 552, 280, 7704, 0, 7706, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7710, 0, 0, 0, 0, 0, 0, 0, 500, 284, 0, 7712, 286, 288, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 486, 0, 0, 0, 0, 0, 0, 0, 0, 0, 290, 0, 0, 0, 0, 0, 0, 0, 0, 0, 292, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7714, 7718, 0, 0, 0, 542, 0, 0, 0, 0, 0, 7716, 0, 0, 0, 7720, 0, 
 | 
				
			||||||
 | 
					    0, 7722, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204, 205, 206, 296, 298, 
 | 
				
			||||||
 | 
					    300, 304, 207, 7880, 0, 0, 463, 520, 522, 0, 0, 0, 7882, 0, 0, 0, 0, 302, 
 | 
				
			||||||
 | 
					    0, 0, 7724, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 308, 0, 0, 0, 7728, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 488, 0, 0, 0, 0, 0, 7730, 0, 0, 0, 310, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 7732, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 313, 0, 317, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7734, 0, 0, 0, 315, 0, 7740, 0, 0, 7738, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7742, 0, 0, 0, 0, 7744, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7746, 0, 0, 0, 504, 
 | 
				
			||||||
 | 
					    323, 0, 209, 0, 0, 7748, 0, 0, 0, 0, 327, 0, 0, 0, 0, 0, 7750, 0, 0, 0, 
 | 
				
			||||||
 | 
					    325, 0, 7754, 0, 0, 7752, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210, 211, 212, 
 | 
				
			||||||
 | 
					    213, 332, 334, 558, 214, 7886, 0, 336, 465, 524, 526, 0, 0, 416, 7884, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 490, 0, 0, 0, 0, 0, 0, 0, 7764, 0, 0, 0, 0, 7766, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 340, 0, 0, 0, 0, 7768, 0, 0, 0, 0, 344, 528, 530, 0, 0, 0, 7770, 0, 
 | 
				
			||||||
 | 
					    0, 0, 342, 0, 0, 0, 0, 7774, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 346, 348, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7776, 0, 0, 0, 0, 352, 0, 0, 0, 0, 0, 7778, 0, 0, 536, 350, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7786, 0, 0, 0, 0, 356, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7788, 0, 0, 538, 354, 0, 7792, 0, 0, 7790, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    217, 218, 219, 360, 362, 364, 0, 220, 7910, 366, 368, 467, 532, 534, 0, 
 | 
				
			||||||
 | 
					    0, 431, 7908, 7794, 0, 0, 0, 370, 7798, 0, 7796, 0, 0, 0, 0, 0, 0, 7804, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 7806, 0, 0, 0, 7808, 7810, 372, 0, 0, 0, 7814, 7812, 0, 
 | 
				
			||||||
 | 
					    7816, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7818, 7820, 0, 0, 0, 0, 0, 7922, 221, 
 | 
				
			||||||
 | 
					    374, 7928, 562, 0, 7822, 376, 7926, 0, 0, 0, 0, 0, 0, 0, 0, 7924, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 377, 7824, 0, 0, 0, 379, 0, 0, 0, 0, 381, 0, 0, 0, 0, 0, 7826, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 7828, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 224, 225, 226, 
 | 
				
			||||||
 | 
					    227, 257, 259, 551, 228, 7843, 229, 0, 462, 513, 515, 0, 0, 0, 7841, 0, 
 | 
				
			||||||
 | 
					    7681, 0, 0, 261, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7683, 0, 0, 7685, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 7687, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 263, 265, 0, 
 | 
				
			||||||
 | 
					    0, 0, 267, 0, 0, 0, 0, 269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 231, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 7691, 0, 0, 0, 0, 271, 0, 0, 0, 0, 0, 7693, 0, 0, 0, 7697, 0, 7699, 0, 
 | 
				
			||||||
 | 
					    0, 7695, 0, 0, 232, 233, 234, 7869, 275, 277, 279, 235, 7867, 0, 0, 283, 
 | 
				
			||||||
 | 
					    517, 519, 0, 0, 0, 7865, 0, 0, 0, 553, 281, 7705, 0, 7707, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 7711, 0, 0, 0, 0, 0, 0, 0, 501, 285, 0, 7713, 287, 289, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 487, 0, 0, 0, 0, 0, 0, 0, 0, 0, 291, 0, 293, 0, 0, 0, 7715, 7719, 
 | 
				
			||||||
 | 
					    0, 0, 0, 543, 0, 0, 0, 0, 0, 7717, 0, 0, 0, 7721, 0, 0, 7723, 0, 7830, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 236, 237, 238, 297, 299, 301, 0, 239, 7881, 0, 
 | 
				
			||||||
 | 
					    0, 464, 521, 523, 0, 0, 0, 7883, 0, 0, 0, 0, 303, 0, 0, 7725, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 309, 0, 0, 0, 0, 0, 0, 0, 0, 496, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7729, 
 | 
				
			||||||
 | 
					    0, 489, 0, 0, 0, 0, 0, 7731, 0, 0, 0, 311, 0, 0, 0, 0, 7733, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 314, 0, 0, 0, 0, 0, 0, 0, 0, 0, 318, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7735, 0, 0, 0, 316, 0, 7741, 0, 0, 7739, 0, 0, 0, 7743, 0, 0, 0, 0, 7745, 
 | 
				
			||||||
 | 
					    0, 0, 7747, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 505, 324, 0, 241, 0, 0, 
 | 
				
			||||||
 | 
					    7749, 0, 0, 0, 0, 328, 0, 0, 0, 0, 0, 7751, 0, 0, 0, 326, 0, 7755, 0, 0, 
 | 
				
			||||||
 | 
					    7753, 0, 0, 242, 243, 244, 245, 333, 335, 559, 246, 7887, 0, 337, 466, 
 | 
				
			||||||
 | 
					    525, 527, 0, 0, 417, 7885, 0, 0, 0, 0, 491, 0, 0, 0, 0, 0, 0, 0, 7765, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7767, 0, 0, 0, 0, 0, 0, 0, 341, 0, 0, 0, 0, 7769, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    345, 529, 531, 0, 0, 0, 7771, 0, 0, 0, 343, 0, 0, 0, 0, 7775, 0, 0, 0, 
 | 
				
			||||||
 | 
					    347, 349, 0, 0, 0, 7777, 0, 0, 0, 0, 353, 0, 0, 0, 0, 0, 7779, 0, 0, 537, 
 | 
				
			||||||
 | 
					    351, 0, 0, 0, 0, 0, 7787, 7831, 0, 0, 0, 357, 0, 0, 0, 0, 0, 7789, 0, 0, 
 | 
				
			||||||
 | 
					    539, 355, 0, 7793, 0, 0, 7791, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249, 250, 
 | 
				
			||||||
 | 
					    251, 361, 363, 365, 0, 252, 7911, 367, 369, 468, 533, 535, 0, 0, 432, 
 | 
				
			||||||
 | 
					    7909, 7795, 0, 0, 0, 371, 7799, 0, 7797, 0, 0, 0, 0, 0, 0, 7805, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 7807, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7809, 7811, 373, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7815, 7813, 0, 7832, 0, 0, 0, 0, 0, 0, 0, 7817, 0, 7819, 7821, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7923, 253, 375, 7929, 563, 0, 7823, 255, 
 | 
				
			||||||
 | 
					    7927, 7833, 0, 0, 0, 0, 0, 0, 0, 7925, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 378, 7825, 0, 0, 0, 380, 0, 0, 0, 0, 382, 0, 0, 0, 0, 0, 7827, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 7829, 0, 0, 8173, 901, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8129, 0, 0, 0, 0, 0, 0, 0, 0, 7846, 7844, 0, 7850, 0, 0, 0, 0, 7848, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 478, 0, 506, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    508, 0, 0, 482, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7688, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7872, 7870, 0, 7876, 0, 0, 0, 0, 7874, 0, 0, 0, 0, 0, 7726, 0, 
 | 
				
			||||||
 | 
					    0, 0, 7890, 7888, 0, 7894, 0, 0, 0, 0, 7892, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 7756, 0, 0, 556, 0, 0, 7758, 0, 0, 0, 0, 0, 0, 0, 0, 0, 554, 
 | 
				
			||||||
 | 
					    0, 510, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 475, 471, 0, 0, 469, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 473, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7847, 7845, 0, 7851, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7849, 0, 0, 0, 0, 0, 0, 0, 0, 479, 0, 0, 0, 0, 0, 0, 0, 0, 0, 507, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 509, 0, 0, 483, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7689, 0, 0, 0, 7873, 
 | 
				
			||||||
 | 
					    7871, 0, 7877, 0, 0, 0, 0, 7875, 0, 0, 0, 0, 0, 7727, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 7891, 7889, 0, 7895, 0, 0, 0, 0, 7893, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7757, 0, 0, 557, 0, 0, 7759, 0, 0, 0, 0, 0, 0, 0, 0, 0, 555, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 511, 0, 0, 0, 476, 472, 0, 0, 470, 0, 0, 0, 0, 0, 0, 474, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 7856, 7854, 0, 7860, 0, 0, 0, 0, 7858, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 7857, 7855, 0, 7861, 0, 0, 0, 0, 7859, 0, 0, 0, 0, 7700, 7702, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7701, 7703, 0, 0, 0, 7760, 7762, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 7761, 7763, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7780, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7781, 0, 0, 0, 0, 7782, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7783, 0, 0, 0, 0, 0, 0, 0, 7800, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7801, 0, 0, 7802, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7803, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7835, 0, 0, 0, 0, 0, 0, 7900, 7898, 0, 7904, 0, 0, 0, 0, 7902, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 7906, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7901, 7899, 0, 
 | 
				
			||||||
 | 
					    7905, 0, 0, 0, 0, 7903, 0, 0, 0, 0, 0, 0, 0, 0, 7907, 0, 0, 0, 7914, 
 | 
				
			||||||
 | 
					    7912, 0, 7918, 0, 0, 0, 0, 7916, 0, 0, 0, 0, 0, 0, 0, 0, 7920, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7915, 7913, 0, 7919, 0, 0, 0, 0, 7917, 0, 0, 0, 0, 0, 0, 0, 0, 7921, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 494, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 492, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 493, 0, 0, 0, 0, 480, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 481, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7708, 0, 0, 0, 0, 7709, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 560, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 561, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 495, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8122, 902, 0, 0, 8121, 8120, 0, 0, 0, 0, 0, 0, 0, 0, 7944, 7945, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8124, 0, 0, 0, 0, 0, 0, 0, 8136, 904, 0, 0, 0, 0, 7960, 7961, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 8138, 905, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7976, 7977, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8140, 0, 0, 0, 0, 0, 0, 0, 8154, 
 | 
				
			||||||
 | 
					    906, 0, 0, 8153, 8152, 0, 938, 0, 0, 0, 0, 0, 0, 7992, 7993, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 8184, 908, 0, 0, 0, 0, 8008, 8009, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8172, 0, 0, 0, 0, 0, 8170, 910, 0, 0, 8169, 8168, 0, 939, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8025, 0, 0, 0, 0, 0, 8186, 911, 0, 0, 0, 0, 8040, 8041, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 8188, 0, 0, 0, 0, 8116, 0, 0, 0, 0, 8132, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 8048, 940, 0, 0, 8113, 8112, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7936, 7937, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8118, 8115, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 8050, 941, 0, 0, 0, 0, 7952, 7953, 0, 0, 0, 0, 0, 8052, 942, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7968, 7969, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8134, 8131, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 8054, 943, 0, 0, 8145, 8144, 0, 970, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7984, 7985, 0, 0, 0, 0, 8150, 0, 0, 0, 0, 0, 0, 0, 0, 8056, 972, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 8000, 8001, 0, 0, 0, 8164, 8165, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 8058, 973, 0, 0, 8161, 8160, 0, 971, 0, 0, 0, 0, 0, 0, 8016, 8017, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 8166, 0, 0, 0, 0, 0, 0, 0, 0, 8060, 974, 0, 0, 0, 0, 8032, 8033, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8182, 8179, 0, 0, 0, 0, 0, 0, 0, 8146, 912, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8151, 0, 0, 0, 0, 0, 0, 0, 0, 8162, 944, 0, 0, 8167, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8180, 0, 0, 0, 0, 0, 0, 0, 0, 979, 0, 0, 0, 0, 0, 980, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1031, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1232, 0, 1234, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1027, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1024, 0, 0, 0, 0, 1238, 0, 1025, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1217, 0, 1244, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 1246, 0, 0, 0, 0, 0, 1037, 0, 0, 0, 1250, 1049, 0, 1252, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 1036, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1254, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 1262, 1038, 0, 1264, 0, 0, 1266, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1268, 0, 0, 0, 0, 1272, 0, 0, 0, 0, 1260, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1233, 0, 1235, 0, 0, 0, 0, 0, 0, 1107, 0, 0, 0, 1104, 0, 0, 0, 0, 1239, 
 | 
				
			||||||
 | 
					    0, 1105, 0, 0, 1218, 0, 1245, 0, 0, 0, 0, 1247, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 1117, 0, 0, 0, 1251, 1081, 0, 1253, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1116, 0, 0, 1255, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1263, 1118, 0, 1265, 0, 0, 
 | 
				
			||||||
 | 
					    1267, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1269, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 1273, 0, 0, 0, 0, 1261, 0, 0, 0, 0, 1111, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    1142, 0, 0, 0, 0, 1143, 0, 0, 0, 0, 0, 0, 0, 1242, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 1243, 0, 0, 0, 0, 1258, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 1259, 0, 0, 0, 1570, 1571, 1573, 0, 0, 0, 1572, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 1574, 0, 0, 0, 0, 1730, 0, 0, 0, 0, 1747, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 1728, 0, 0, 0, 0, 0, 0, 2345, 0, 0, 0, 0, 2353, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2356, 0, 0, 0, 0, 0, 2507, 2508, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 2891, 2888, 2892, 0, 0, 0, 0, 0, 0, 2964, 0, 0, 0, 3018, 3020, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3019, 0, 0, 0, 0, 0, 0, 3144, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 3264, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 3274, 3271, 3272, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 3275, 0, 0, 0, 0, 0, 0, 3402, 3404, 0, 0, 0, 
 | 
				
			||||||
 | 
					    3403, 0, 0, 0, 0, 0, 0, 3546, 3548, 3550, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    3549, 0, 0, 0, 0, 0, 0, 0, 4134, 0, 0, 0, 0, 0, 0, 7736, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7737, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7772, 0, 0, 0, 0, 7773, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 7784, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7785, 7852, 0, 0, 
 | 
				
			||||||
 | 
					    7862, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7853, 0, 0, 7863, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 7878, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7879, 0, 0, 0, 0, 7896, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 7897, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7938, 7940, 0, 0, 7942, 
 | 
				
			||||||
 | 
					    8064, 0, 0, 0, 0, 0, 0, 0, 7939, 7941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7943, 8065, 0, 0, 0, 0, 8066, 0, 0, 0, 0, 8067, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8068, 0, 0, 0, 0, 8069, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8070, 0, 0, 0, 0, 8071, 0, 0, 0, 0, 0, 0, 0, 7946, 7948, 0, 0, 7950, 
 | 
				
			||||||
 | 
					    8072, 0, 0, 0, 0, 0, 0, 0, 7947, 7949, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7951, 8073, 0, 0, 0, 0, 8074, 0, 0, 0, 0, 8075, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8076, 0, 0, 0, 0, 8077, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8078, 0, 0, 0, 0, 8079, 0, 0, 0, 0, 0, 0, 0, 7954, 7956, 0, 0, 0, 7955, 
 | 
				
			||||||
 | 
					    7957, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7962, 7964, 0, 0, 0, 7963, 7965, 
 | 
				
			||||||
 | 
					    0, 0, 0, 7970, 7972, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7974, 8080, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 7971, 7973, 0, 0, 7975, 8081, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8082, 0, 0, 0, 0, 8083, 0, 0, 0, 0, 8084, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8085, 0, 0, 0, 0, 8086, 0, 0, 0, 0, 8087, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 7978, 7980, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7982, 8088, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 7979, 7981, 0, 0, 7983, 8089, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8090, 0, 0, 0, 0, 8091, 0, 0, 0, 0, 8092, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8093, 0, 0, 0, 0, 8094, 0, 0, 0, 0, 8095, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7986, 7988, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 7990, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    7987, 7989, 0, 0, 7991, 0, 0, 0, 0, 0, 0, 0, 0, 7994, 7996, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 7998, 0, 0, 0, 0, 0, 0, 0, 0, 7995, 7997, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 7999, 0, 0, 0, 0, 0, 0, 0, 0, 8002, 8004, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8003, 8005, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8010, 8012, 0, 0, 0, 8011, 
 | 
				
			||||||
 | 
					    8013, 0, 0, 0, 8018, 8020, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8022, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 8019, 8021, 0, 0, 8023, 0, 0, 0, 0, 0, 0, 0, 0, 8027, 
 | 
				
			||||||
 | 
					    8029, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8031, 0, 0, 0, 0, 0, 0, 0, 0, 8034, 
 | 
				
			||||||
 | 
					    8036, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8038, 8096, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8035, 8037, 0, 0, 8039, 8097, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8098, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8099, 0, 0, 0, 0, 8100, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8101, 0, 0, 0, 0, 8102, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8103, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 8042, 8044, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8046, 8104, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 8043, 8045, 0, 0, 8047, 8105, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8106, 0, 0, 0, 0, 8107, 0, 0, 0, 0, 8108, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 8109, 0, 0, 0, 0, 8110, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8111, 0, 0, 0, 0, 8114, 0, 0, 0, 0, 8130, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8178, 0, 0, 0, 0, 8119, 0, 0, 0, 0, 0, 0, 0, 8141, 8142, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 0, 0, 8143, 0, 0, 0, 0, 0, 8135, 0, 0, 0, 0, 8183, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 0, 0, 0, 8157, 8158, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8159, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8602, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8603, 0, 0, 0, 0, 8622, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8653, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8655, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8654, 0, 0, 0, 0, 8708, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8713, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8716, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8740, 0, 0, 0, 0, 8742, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8769, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8772, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8775, 0, 0, 0, 0, 8777, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8813, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8802, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8816, 0, 0, 0, 0, 8817, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8820, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8821, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8824, 0, 0, 0, 0, 8825, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8832, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8833, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8928, 0, 0, 0, 0, 8929, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8836, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8837, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8840, 0, 0, 0, 0, 8841, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8930, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8931, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8876, 0, 0, 0, 0, 8877, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 8878, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8879, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    8938, 0, 0, 0, 0, 8939, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 8940, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 8941, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12436, 0, 0, 0, 0, 12364, 
 | 
				
			||||||
 | 
					    0, 0, 0, 0, 12366, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12368, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12370, 0, 0, 0, 0, 12372, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12374, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12376, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12378, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12380, 0, 0, 0, 0, 12382, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12384, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12386, 0, 0, 0, 0, 12389, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12391, 0, 0, 0, 0, 12393, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12400, 
 | 
				
			||||||
 | 
					    12401, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12403, 12404, 0, 0, 0, 12406, 
 | 
				
			||||||
 | 
					    12407, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12409, 12410, 0, 0, 0, 12412, 
 | 
				
			||||||
 | 
					    12413, 0, 0, 0, 12446, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12532, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 12460, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12462, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12464, 0, 0, 0, 0, 12466, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12468, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12470, 0, 0, 0, 0, 12472, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12474, 0, 0, 0, 0, 12476, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12478, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12480, 0, 0, 0, 0, 12482, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12485, 0, 0, 0, 0, 12487, 0, 0, 0, 0, 12489, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12496, 12497, 0, 0, 0, 12499, 12500, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    0, 0, 12502, 12503, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12505, 12506, 0, 0, 
 | 
				
			||||||
 | 
					    0, 12508, 12509, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12535, 0, 0, 0, 0, 
 | 
				
			||||||
 | 
					    12536, 0, 0, 0, 0, 12537, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12538, 0, 
 | 
				
			||||||
 | 
					    0, 0, 0, 12542, 0, 
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -13,6 +13,9 @@
 | 
				
			||||||
# 2000-11-03 fl   expand first/last ranges
 | 
					# 2000-11-03 fl   expand first/last ranges
 | 
				
			||||||
# 2001-01-19 fl   added character name tables (2.1)
 | 
					# 2001-01-19 fl   added character name tables (2.1)
 | 
				
			||||||
# 2001-01-21 fl   added decomp compression; dynamic phrasebook threshold
 | 
					# 2001-01-21 fl   added decomp compression; dynamic phrasebook threshold
 | 
				
			||||||
 | 
					# 2002-09-11 wd   use string methods
 | 
				
			||||||
 | 
					# 2002-10-18 mvl  update to Unicode 3.2
 | 
				
			||||||
 | 
					# 2002-10-22 mvl  generate NFC tables
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
# written by Fredrik Lundh (fredrik@pythonware.com)
 | 
					# written by Fredrik Lundh (fredrik@pythonware.com)
 | 
				
			||||||
#
 | 
					#
 | 
				
			||||||
| 
						 | 
					@ -22,7 +25,8 @@ import sys
 | 
				
			||||||
SCRIPT = sys.argv[0]
 | 
					SCRIPT = sys.argv[0]
 | 
				
			||||||
VERSION = "2.1"
 | 
					VERSION = "2.1"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
UNICODE_DATA = "UnicodeData-Latest.txt"
 | 
					UNICODE_DATA = "UnicodeData.txt"
 | 
				
			||||||
 | 
					COMPOSITION_EXCLUSIONS = "CompositionExclusions.txt"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
 | 
					CATEGORY_NAMES = [ "Cn", "Lu", "Ll", "Lt", "Mn", "Mc", "Me", "Nd",
 | 
				
			||||||
    "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
 | 
					    "Nl", "No", "Zs", "Zl", "Zp", "Cc", "Cf", "Cs", "Co", "Cn", "Lm",
 | 
				
			||||||
| 
						 | 
					@ -47,7 +51,7 @@ def maketables(trace=0):
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print "--- Reading", UNICODE_DATA, "..."
 | 
					    print "--- Reading", UNICODE_DATA, "..."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    unicode = UnicodeData(UNICODE_DATA)
 | 
					    unicode = UnicodeData(UNICODE_DATA, COMPOSITION_EXCLUSIONS)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print len(filter(None, unicode.table)), "characters"
 | 
					    print len(filter(None, unicode.table)), "characters"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -96,6 +100,10 @@ def makeunicodedata(unicode, trace):
 | 
				
			||||||
    decomp_index = [0] * len(unicode.chars)
 | 
					    decomp_index = [0] * len(unicode.chars)
 | 
				
			||||||
    decomp_size = 0
 | 
					    decomp_size = 0
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    comp_pairs = []
 | 
				
			||||||
 | 
					    comp_first = [None] * len(unicode.chars)
 | 
				
			||||||
 | 
					    comp_last = [None] * len(unicode.chars)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    for char in unicode.chars:
 | 
					    for char in unicode.chars:
 | 
				
			||||||
        record = unicode.table[char]
 | 
					        record = unicode.table[char]
 | 
				
			||||||
        if record:
 | 
					        if record:
 | 
				
			||||||
| 
						 | 
					@ -116,6 +124,14 @@ def makeunicodedata(unicode, trace):
 | 
				
			||||||
                # content
 | 
					                # content
 | 
				
			||||||
                decomp = [prefix + (len(decomp)<<8)] +\
 | 
					                decomp = [prefix + (len(decomp)<<8)] +\
 | 
				
			||||||
                         map(lambda s: int(s, 16), decomp)
 | 
					                         map(lambda s: int(s, 16), decomp)
 | 
				
			||||||
 | 
					                # Collect NFC pairs
 | 
				
			||||||
 | 
					                if not prefix and len(decomp) == 3 and \
 | 
				
			||||||
 | 
					                   char not in unicode.exclusions and \
 | 
				
			||||||
 | 
					                   unicode.table[decomp[1]][3] == "0":
 | 
				
			||||||
 | 
					                    p, l, r = decomp
 | 
				
			||||||
 | 
					                    comp_first[l] = 1
 | 
				
			||||||
 | 
					                    comp_last[r] = 1
 | 
				
			||||||
 | 
					                    comp_pairs.append((l,r,char))
 | 
				
			||||||
                try:
 | 
					                try:
 | 
				
			||||||
                    i = decomp_data.index(decomp)
 | 
					                    i = decomp_data.index(decomp)
 | 
				
			||||||
                except ValueError:
 | 
					                except ValueError:
 | 
				
			||||||
| 
						 | 
					@ -126,10 +142,49 @@ def makeunicodedata(unicode, trace):
 | 
				
			||||||
                i = 0
 | 
					                i = 0
 | 
				
			||||||
            decomp_index[char] = i
 | 
					            decomp_index[char] = i
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    f = l = 0
 | 
				
			||||||
 | 
					    comp_first_ranges = []
 | 
				
			||||||
 | 
					    comp_last_ranges = []
 | 
				
			||||||
 | 
					    prev_f = prev_l = None
 | 
				
			||||||
 | 
					    for i in unicode.chars:
 | 
				
			||||||
 | 
					        if comp_first[i] is not None:
 | 
				
			||||||
 | 
					            comp_first[i] = f
 | 
				
			||||||
 | 
					            f += 1
 | 
				
			||||||
 | 
					            if prev_f is None:
 | 
				
			||||||
 | 
					                prev_f = (i,i)
 | 
				
			||||||
 | 
					            elif prev_f[1]+1 == i:
 | 
				
			||||||
 | 
					                prev_f = prev_f[0],i
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                comp_first_ranges.append(prev_f)
 | 
				
			||||||
 | 
					                prev_f = (i,i)
 | 
				
			||||||
 | 
					        if comp_last[i] is not None:
 | 
				
			||||||
 | 
					            comp_last[i] = l
 | 
				
			||||||
 | 
					            l += 1
 | 
				
			||||||
 | 
					            if prev_l is None:
 | 
				
			||||||
 | 
					                prev_l = (i,i)
 | 
				
			||||||
 | 
					            elif prev_l[1]+1 == i:
 | 
				
			||||||
 | 
					                prev_l = prev_l[0],i
 | 
				
			||||||
 | 
					            else:
 | 
				
			||||||
 | 
					                comp_last_ranges.append(prev_l)
 | 
				
			||||||
 | 
					                prev_l = (i,i)
 | 
				
			||||||
 | 
					    comp_first_ranges.append(prev_f)
 | 
				
			||||||
 | 
					    comp_last_ranges.append(prev_l)
 | 
				
			||||||
 | 
					    total_first = f
 | 
				
			||||||
 | 
					    total_last = l
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    comp_data = [0]*(total_first*total_last)
 | 
				
			||||||
 | 
					    for f,l,char in comp_pairs:
 | 
				
			||||||
 | 
					        f = comp_first[f]
 | 
				
			||||||
 | 
					        l = comp_last[l]
 | 
				
			||||||
 | 
					        comp_data[f*total_last+l] = char
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print len(table), "unique properties"
 | 
					    print len(table), "unique properties"
 | 
				
			||||||
    print len(decomp_prefix), "unique decomposition prefixes"
 | 
					    print len(decomp_prefix), "unique decomposition prefixes"
 | 
				
			||||||
    print len(decomp_data), "unique decomposition entries:",
 | 
					    print len(decomp_data), "unique decomposition entries:",
 | 
				
			||||||
    print decomp_size, "bytes"
 | 
					    print decomp_size, "bytes"
 | 
				
			||||||
 | 
					    print total_first, "first characters in NFC"
 | 
				
			||||||
 | 
					    print total_last, "last characters in NFC"
 | 
				
			||||||
 | 
					    print len(comp_pairs), "NFC pairs"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    print "--- Writing", FILE, "..."
 | 
					    print "--- Writing", FILE, "..."
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -144,6 +199,21 @@ def makeunicodedata(unicode, trace):
 | 
				
			||||||
    print >>fp, "};"
 | 
					    print >>fp, "};"
 | 
				
			||||||
    print >>fp
 | 
					    print >>fp
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    print >>fp, "/* Reindexing of NFC first characters. */"
 | 
				
			||||||
 | 
					    print >>fp, "#define TOTAL_FIRST",total_first
 | 
				
			||||||
 | 
					    print >>fp, "#define TOTAL_LAST",total_last
 | 
				
			||||||
 | 
					    print >>fp, "struct reindex{int start;short count,index;};"
 | 
				
			||||||
 | 
					    print >>fp, "struct reindex nfc_first[] = {"
 | 
				
			||||||
 | 
					    for start,end in comp_first_ranges:
 | 
				
			||||||
 | 
					        print >>fp,"  { %d, %d, %d}," % (start,end-start,comp_first[start])
 | 
				
			||||||
 | 
					    print >>fp,"  {0,0,0}"
 | 
				
			||||||
 | 
					    print >>fp,"};\n"
 | 
				
			||||||
 | 
					    print >>fp, "struct reindex nfc_last[] = {"
 | 
				
			||||||
 | 
					    for start,end in comp_last_ranges:
 | 
				
			||||||
 | 
					        print >>fp,"  { %d, %d, %d}," % (start,end-start,comp_last[start])
 | 
				
			||||||
 | 
					    print >>fp,"  {0,0,0}"
 | 
				
			||||||
 | 
					    print >>fp,"};\n"
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    # FIXME: <fl> the following tables could be made static, and
 | 
					    # FIXME: <fl> the following tables could be made static, and
 | 
				
			||||||
    # the support code moved into unicodedatabase.c
 | 
					    # the support code moved into unicodedatabase.c
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -185,6 +255,12 @@ def makeunicodedata(unicode, trace):
 | 
				
			||||||
    Array("decomp_index1", index1).dump(fp, trace)
 | 
					    Array("decomp_index1", index1).dump(fp, trace)
 | 
				
			||||||
    Array("decomp_index2", index2).dump(fp, trace)
 | 
					    Array("decomp_index2", index2).dump(fp, trace)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    index, index2, shift = splitbins(comp_data, trace)
 | 
				
			||||||
 | 
					    print >>fp, "/* NFC pairs */"
 | 
				
			||||||
 | 
					    print >>fp, "#define COMP_SHIFT", shift
 | 
				
			||||||
 | 
					    Array("comp_index", index).dump(fp, trace)
 | 
				
			||||||
 | 
					    Array("comp_data", index2).dump(fp, trace)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    fp.close()
 | 
					    fp.close()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
# --------------------------------------------------------------------
 | 
					# --------------------------------------------------------------------
 | 
				
			||||||
| 
						 | 
					@ -454,7 +530,7 @@ import sys
 | 
				
			||||||
 | 
					
 | 
				
			||||||
class UnicodeData:
 | 
					class UnicodeData:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def __init__(self, filename, expand=1):
 | 
					    def __init__(self, filename, exclusions, expand=1):
 | 
				
			||||||
        file = open(filename)
 | 
					        file = open(filename)
 | 
				
			||||||
        table = [None] * 0x110000
 | 
					        table = [None] * 0x110000
 | 
				
			||||||
        while 1:
 | 
					        while 1:
 | 
				
			||||||
| 
						 | 
					@ -486,6 +562,17 @@ class UnicodeData:
 | 
				
			||||||
        self.table = table
 | 
					        self.table = table
 | 
				
			||||||
        self.chars = range(0x110000) # unicode 3.2
 | 
					        self.chars = range(0x110000) # unicode 3.2
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        file = open(exclusions)
 | 
				
			||||||
 | 
					        self.exclusions = {}
 | 
				
			||||||
 | 
					        for s in file:
 | 
				
			||||||
 | 
					            s = s.strip()
 | 
				
			||||||
 | 
					            if not s:
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            if s[0] == '#':
 | 
				
			||||||
 | 
					                continue
 | 
				
			||||||
 | 
					            char = int(s.split()[0],16)
 | 
				
			||||||
 | 
					            self.exclusions[char] = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def uselatin1(self):
 | 
					    def uselatin1(self):
 | 
				
			||||||
        # restrict character range to ISO Latin 1
 | 
					        # restrict character range to ISO Latin 1
 | 
				
			||||||
        self.chars = range(256)
 | 
					        self.chars = range(256)
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue