compress unicode decomposition tables (this saves another 55k)

This commit is contained in:
Fredrik Lundh 2001-01-21 22:41:08 +00:00
parent f75c9d94b4
commit 7b7dd107b3
7 changed files with 7496 additions and 10712 deletions

View file

@ -14,11 +14,40 @@
#include "Python.h"
#include "unicodedatabase.h"
typedef struct {
const unsigned char category; /* index into
_PyUnicode_CategoryNames */
const unsigned char combining; /* combining class value 0 - 255 */
const unsigned char bidirectional; /* index into
_PyUnicode_BidirectionalNames */
const unsigned char mirrored; /* true if mirrored in bidir mode */
} _PyUnicode_DatabaseRecord;
/* data file generated by Tools/unicode/makeunicodedata.py */
#include "unicodedata_db.h"
static const _PyUnicode_DatabaseRecord*
getrecord(PyUnicodeObject* v)
{
int code;
int index;
code = (int) *PyUnicode_AS_UNICODE(v);
if (code < 0 || code >= 65536)
index = 0;
else {
index = index1[(code>>SHIFT)];
index = index2[(index<<SHIFT)+(code&((1<<SHIFT)-1))];
}
return &_PyUnicode_Database_Records[index];
}
/* --- Module API --------------------------------------------------------- */
static PyObject *
unicodedata_decimal(PyObject *self,
PyObject *args)
unicodedata_decimal(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
PyObject *defobj = NULL;
@ -26,18 +55,18 @@ unicodedata_decimal(PyObject *self,
if (!PyArg_ParseTuple(args, "O!|O:decimal",
&PyUnicode_Type, &v, &defobj))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
rc = Py_UNICODE_TODECIMAL(*PyUnicode_AS_UNICODE(v));
if (rc < 0) {
if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError,
"not a decimal");
goto onError;
return NULL;
}
else {
Py_INCREF(defobj);
@ -45,14 +74,10 @@ unicodedata_decimal(PyObject *self,
}
}
return PyInt_FromLong(rc);
onError:
return NULL;
}
static PyObject *
unicodedata_digit(PyObject *self,
PyObject *args)
unicodedata_digit(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
PyObject *defobj = NULL;
@ -60,18 +85,18 @@ unicodedata_digit(PyObject *self,
if (!PyArg_ParseTuple(args, "O!|O:digit",
&PyUnicode_Type, &v, &defobj))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
rc = Py_UNICODE_TODIGIT(*PyUnicode_AS_UNICODE(v));
if (rc < 0) {
if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError,
"not a digit");
goto onError;
return NULL;
}
else {
Py_INCREF(defobj);
@ -79,14 +104,10 @@ unicodedata_digit(PyObject *self,
}
}
return PyInt_FromLong(rc);
onError:
return NULL;
}
static PyObject *
unicodedata_numeric(PyObject *self,
PyObject *args)
unicodedata_numeric(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
PyObject *defobj = NULL;
@ -94,18 +115,18 @@ unicodedata_numeric(PyObject *self,
if (!PyArg_ParseTuple(args, "O!|O:numeric",
&PyUnicode_Type, &v, &defobj))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
rc = Py_UNICODE_TONUMERIC(*PyUnicode_AS_UNICODE(v));
if (rc < 0) {
if (defobj == NULL) {
PyErr_SetString(PyExc_ValueError,
"not a numeric character");
goto onError;
return NULL;
}
else {
Py_INCREF(defobj);
@ -113,129 +134,123 @@ unicodedata_numeric(PyObject *self,
}
}
return PyFloat_FromDouble(rc);
onError:
return NULL;
}
static PyObject *
unicodedata_category(PyObject *self,
PyObject *args)
unicodedata_category(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
int index;
if (!PyArg_ParseTuple(args, "O!:category",
&PyUnicode_Type, &v))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
index = (int) _PyUnicode_Database_GetRecord(
(int) *PyUnicode_AS_UNICODE(v)
)->category;
index = (int) getrecord(v)->category;
return PyString_FromString(_PyUnicode_CategoryNames[index]);
onError:
return NULL;
}
static PyObject *
unicodedata_bidirectional(PyObject *self,
PyObject *args)
unicodedata_bidirectional(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
int index;
if (!PyArg_ParseTuple(args, "O!:bidirectional",
&PyUnicode_Type, &v))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
index = (int) _PyUnicode_Database_GetRecord(
(int) *PyUnicode_AS_UNICODE(v)
)->bidirectional;
index = (int) getrecord(v)->bidirectional;
return PyString_FromString(_PyUnicode_BidirectionalNames[index]);
onError:
return NULL;
}
static PyObject *
unicodedata_combining(PyObject *self,
PyObject *args)
unicodedata_combining(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
int value;
if (!PyArg_ParseTuple(args, "O!:combining",
&PyUnicode_Type, &v))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
value = (int) _PyUnicode_Database_GetRecord(
(int) *PyUnicode_AS_UNICODE(v)
)->combining;
return PyInt_FromLong(value);
onError:
return NULL;
return PyInt_FromLong((int) getrecord(v)->combining);
}
static PyObject *
unicodedata_mirrored(PyObject *self,
PyObject *args)
unicodedata_mirrored(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
int value;
if (!PyArg_ParseTuple(args, "O!:mirrored",
&PyUnicode_Type, &v))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
value = (int) _PyUnicode_Database_GetRecord(
(int) *PyUnicode_AS_UNICODE(v)
)->mirrored;
return PyInt_FromLong(value);
onError:
return NULL;
return PyInt_FromLong((int) getrecord(v)->mirrored);
}
static PyObject *
unicodedata_decomposition(PyObject *self,
PyObject *args)
unicodedata_decomposition(PyObject *self, PyObject *args)
{
PyUnicodeObject *v;
const char *value;
char decomp[256];
int code, index, count, i;
if (!PyArg_ParseTuple(args, "O!:decomposition",
&PyUnicode_Type, &v))
goto onError;
return NULL;
if (PyUnicode_GET_SIZE(v) != 1) {
PyErr_SetString(PyExc_TypeError,
"need a single Unicode character as parameter");
goto onError;
return NULL;
}
code = (int) *PyUnicode_AS_UNICODE(v);
if (code < 0 || code >= 65536)
index = 0;
else {
index = decomp_index1[(code>>DECOMP_SHIFT)];
index = decomp_index2[(index<<DECOMP_SHIFT)+
(code&((1<<DECOMP_SHIFT)-1))];
}
/* high byte is of hex bytes (usually one or two), low byte
is prefix code (from*/
count = decomp_data[index] >> 8;
/* XXX: could allocate the PyString up front instead
(strlen(prefix) + 5 * count + 1 bytes) */
/* copy prefix */
i = strlen(decomp_prefix[decomp_data[index] & 255]);
memcpy(decomp, decomp_prefix[decomp_data[index] & 255], i);
while (count-- > 0) {
if (i)
decomp[i++] = ' ';
sprintf(decomp + i, "%04X", decomp_data[++index]);
i += strlen(decomp + i);
}
value = _PyUnicode_Database_GetDecomposition(
(int) *PyUnicode_AS_UNICODE(v)
);
return PyString_FromString(value);
onError:
return NULL;
decomp[i] = '\0';
return PyString_FromString(decomp);
}
/* XXX Add doc strings. */