mirror of
https://github.com/python/cpython.git
synced 2025-08-03 08:34:29 +00:00
unicode database compression, step 2:
- fixed attributions - moved decomposition data to a separate table, in preparation for step 3 (which won't happen before 2.0 final, promise!) - use relative paths in the generator script I have a lot more stuff in the works for 2.1, but let's leave that for another day...
This commit is contained in:
parent
2101348830
commit
cfcea49218
5 changed files with 4613 additions and 4330 deletions
|
@ -4,17 +4,16 @@
|
|||
|
||||
Data was extracted from the Unicode 3.0 UnicodeData.txt file.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
|
||||
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
#include "Python.h"
|
||||
#include "unicodedatabase.h"
|
||||
|
||||
#define unicode_db _PyUnicode_Database_GetRecord
|
||||
|
||||
/* --- Module API --------------------------------------------------------- */
|
||||
|
||||
static PyObject *
|
||||
|
@ -134,15 +133,9 @@ unicodedata_category(PyObject *self,
|
|||
"need a single Unicode character as parameter");
|
||||
goto onError;
|
||||
}
|
||||
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->category;
|
||||
if (index < 0 ||
|
||||
index > sizeof(_PyUnicode_CategoryNames) /
|
||||
sizeof(_PyUnicode_CategoryNames[0])) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"category index out of range: %i",
|
||||
index);
|
||||
goto onError;
|
||||
}
|
||||
index = (int) _PyUnicode_Database_GetRecord(
|
||||
(int) *PyUnicode_AS_UNICODE(v)
|
||||
)->category;
|
||||
return PyString_FromString(_PyUnicode_CategoryNames[index]);
|
||||
|
||||
onError:
|
||||
|
@ -164,15 +157,9 @@ unicodedata_bidirectional(PyObject *self,
|
|||
"need a single Unicode character as parameter");
|
||||
goto onError;
|
||||
}
|
||||
index = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->bidirectional;
|
||||
if (index < 0 ||
|
||||
index > sizeof(_PyUnicode_CategoryNames) /
|
||||
sizeof(_PyUnicode_CategoryNames[0])) {
|
||||
PyErr_Format(PyExc_SystemError,
|
||||
"bidirectional index out of range: %i",
|
||||
index);
|
||||
goto onError;
|
||||
}
|
||||
index = (int) _PyUnicode_Database_GetRecord(
|
||||
(int) *PyUnicode_AS_UNICODE(v)
|
||||
)->bidirectional;
|
||||
return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
|
||||
|
||||
onError:
|
||||
|
@ -194,7 +181,9 @@ unicodedata_combining(PyObject *self,
|
|||
"need a single Unicode character as parameter");
|
||||
goto onError;
|
||||
}
|
||||
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->combining;
|
||||
value = (int) _PyUnicode_Database_GetRecord(
|
||||
(int) *PyUnicode_AS_UNICODE(v)
|
||||
)->combining;
|
||||
return PyInt_FromLong(value);
|
||||
|
||||
onError:
|
||||
|
@ -216,7 +205,9 @@ unicodedata_mirrored(PyObject *self,
|
|||
"need a single Unicode character as parameter");
|
||||
goto onError;
|
||||
}
|
||||
value = (int)unicode_db((int)*PyUnicode_AS_UNICODE(v))->mirrored;
|
||||
value = (int) _PyUnicode_Database_GetRecord(
|
||||
(int) *PyUnicode_AS_UNICODE(v)
|
||||
)->mirrored;
|
||||
return PyInt_FromLong(value);
|
||||
|
||||
onError:
|
||||
|
@ -238,10 +229,9 @@ unicodedata_decomposition(PyObject *self,
|
|||
"need a single Unicode character as parameter");
|
||||
goto onError;
|
||||
}
|
||||
value = unicode_db((int)*PyUnicode_AS_UNICODE(v))->decomposition;
|
||||
if (value == NULL)
|
||||
return PyString_FromString("");
|
||||
else
|
||||
value = _PyUnicode_Database_GetDecomposition(
|
||||
(int) *PyUnicode_AS_UNICODE(v)
|
||||
);
|
||||
return PyString_FromString(value);
|
||||
|
||||
onError:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -4,9 +4,10 @@
|
|||
|
||||
Data was extracted from the Unicode 3.0 UnicodeData.txt file.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Rewritten for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
|
||||
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
@ -29,3 +30,18 @@ _PyUnicode_Database_GetRecord(int code)
|
|||
}
|
||||
return &_PyUnicode_Database_Records[index];
|
||||
}
|
||||
|
||||
const char *
|
||||
_PyUnicode_Database_GetDecomposition(int code)
|
||||
{
|
||||
int index;
|
||||
|
||||
if (code < 0 || code >= 65536)
|
||||
index = 0;
|
||||
else {
|
||||
index = decomp_index1[(code>>DECOMP_SHIFT)];
|
||||
index = decomp_index2[(index<<DECOMP_SHIFT)+
|
||||
(code&((1<<DECOMP_SHIFT)-1))];
|
||||
}
|
||||
return decomp_data[index];
|
||||
}
|
||||
|
|
|
@ -4,9 +4,10 @@
|
|||
|
||||
Data was extracted from the Unicode 3.0 UnicodeData.txt file.
|
||||
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Written by Marc-Andre Lemburg (mal@lemburg.com).
|
||||
Modified for Python 2.0 by Fredrik Lundh (fredrik@pythonware.com)
|
||||
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
Copyright (c) Corporation for National Research Initiatives.
|
||||
|
||||
------------------------------------------------------------------------ */
|
||||
|
||||
|
@ -19,15 +20,14 @@ typedef struct {
|
|||
const unsigned char bidirectional; /* index into
|
||||
_PyUnicode_BidirectionalNames */
|
||||
const unsigned char mirrored; /* true if mirrored in bidir mode */
|
||||
const char *decomposition; /* pointer to the decomposition
|
||||
string or NULL */
|
||||
} _PyUnicode_DatabaseRecord;
|
||||
|
||||
/* --- Unicode category names --------------------------------------------- */
|
||||
|
||||
extern const char *_PyUnicode_CategoryNames[32];
|
||||
extern const char *_PyUnicode_BidirectionalNames[21];
|
||||
extern const char *_PyUnicode_CategoryNames[];
|
||||
extern const char *_PyUnicode_BidirectionalNames[];
|
||||
|
||||
/* --- Unicode Database --------------------------------------------------- */
|
||||
|
||||
extern const _PyUnicode_DatabaseRecord *_PyUnicode_Database_GetRecord(int ch);
|
||||
extern const char *_PyUnicode_Database_GetDecomposition(int ch);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue