mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Revert Patch #670715: iconv support.
This commit is contained in:
parent
1e469c5603
commit
7fb697b5d2
8 changed files with 2 additions and 892 deletions
|
@ -121,8 +121,3 @@ def search_function(encoding):
|
|||
# Register the search_function in the Python codec registry
|
||||
codecs.register(search_function)
|
||||
|
||||
# Register iconv_codec lookup function if available
|
||||
try:
|
||||
import iconv_codec
|
||||
except (ImportError, RuntimeError):
|
||||
pass
|
||||
|
|
|
@ -1,34 +0,0 @@
|
|||
""" Python 'iconv' Codec
|
||||
|
||||
|
||||
Written by Hye-Shik Chang (perky@FreeBSD.org).
|
||||
|
||||
Copyright(c) Python Software Foundation, All Rights Reserved. NO WARRANTY.
|
||||
|
||||
"""
|
||||
|
||||
import _iconv_codec
|
||||
import codecs
|
||||
|
||||
def lookup(enc):
|
||||
class IconvCodec(_iconv_codec.iconvcodec, codecs.Codec):
|
||||
encoding = enc
|
||||
|
||||
try:
|
||||
c = IconvCodec()
|
||||
|
||||
class IconvStreamReader(IconvCodec, codecs.StreamReader):
|
||||
__init__ = codecs.StreamReader.__init__
|
||||
class IconvStreamWriter(IconvCodec, codecs.StreamWriter):
|
||||
__init__ = codecs.StreamWriter.__init__
|
||||
|
||||
return (
|
||||
c.encode, c.decode,
|
||||
IconvStreamReader, IconvStreamWriter
|
||||
)
|
||||
except ValueError:
|
||||
return None
|
||||
|
||||
codecs.register(lookup)
|
||||
|
||||
# ex: ts=8 sts=4 et
|
|
@ -569,7 +569,6 @@ _expectations = {
|
|||
test_gdbm
|
||||
test_gl
|
||||
test_grp
|
||||
test_iconv_codecs
|
||||
test_imgfile
|
||||
test_ioctl
|
||||
test_largefile
|
||||
|
@ -626,7 +625,6 @@ _expectations = {
|
|||
test_fork1
|
||||
test_gl
|
||||
test_grp
|
||||
test_iconv_codecs
|
||||
test_ioctl
|
||||
test_imgfile
|
||||
test_largefile
|
||||
|
@ -774,7 +772,6 @@ _expectations = {
|
|||
test_email_codecs
|
||||
test_gdbm
|
||||
test_gl
|
||||
test_iconv_codecs
|
||||
test_imgfile
|
||||
test_largefile
|
||||
test_linuxaudiodev
|
||||
|
@ -890,7 +887,6 @@ _expectations = {
|
|||
test_dl
|
||||
test_email_codecs
|
||||
test_gl
|
||||
test_iconv_codecs
|
||||
test_imgfile
|
||||
test_largefile
|
||||
test_linuxaudiodev
|
||||
|
|
|
@ -1,99 +0,0 @@
|
|||
from test import test_support
|
||||
import unittest, sys
|
||||
import codecs, _iconv_codec
|
||||
from encodings import iconv_codec
|
||||
from StringIO import StringIO
|
||||
|
||||
class IconvCodecTest(unittest.TestCase):
|
||||
|
||||
if sys.byteorder == 'big':
|
||||
spam = '\x00s\x00p\x00a\x00m' * 2
|
||||
else:
|
||||
spam = 's\x00p\x00a\x00m\x00' * 2
|
||||
|
||||
def test_sane(self):
|
||||
# FIXME: Commented out, because it's not clear whether
|
||||
# the internal encoding choosen requires byte swapping
|
||||
# for this iconv() implementation.
|
||||
if False:
|
||||
self.encoder, self.decoder, self.reader, self.writer = \
|
||||
codecs.lookup(_iconv_codec.internal_encoding)
|
||||
self.assertEqual(self.decoder(self.spam), (u'spamspam', 16))
|
||||
self.assertEqual(self.encoder(u'spamspam'), (self.spam, 8))
|
||||
self.assertEqual(self.reader(StringIO(self.spam)).read(), u'spamspam')
|
||||
f = StringIO()
|
||||
self.writer(f).write(u'spamspam')
|
||||
self.assertEqual(f.getvalue(), self.spam)
|
||||
|
||||
def test_basic_errors(self):
|
||||
self.encoder, self.decoder, self.reader, self.writer = \
|
||||
iconv_codec.lookup("ascii")
|
||||
def testencerror(errors):
|
||||
return self.encoder(u'sp\ufffdam', errors)
|
||||
def testdecerror(errors):
|
||||
return self.decoder('sp\xffam', errors)
|
||||
|
||||
self.assertRaises(UnicodeEncodeError, testencerror, 'strict')
|
||||
self.assertRaises(UnicodeDecodeError, testdecerror, 'strict')
|
||||
self.assertEqual(testencerror('replace'), ('sp?am', 5))
|
||||
self.assertEqual(testdecerror('replace'), (u'sp\ufffdam', 5))
|
||||
self.assertEqual(testencerror('ignore'), ('spam', 5))
|
||||
self.assertEqual(testdecerror('ignore'), (u'spam', 5))
|
||||
|
||||
def test_pep293_errors(self):
|
||||
self.encoder, self.decoder, self.reader, self.writer = \
|
||||
iconv_codec.lookup("ascii")
|
||||
def testencerror(errors):
|
||||
return self.encoder(u'sp\ufffdam', errors)
|
||||
def testdecerror(errors):
|
||||
return self.decoder('sp\xffam', errors)
|
||||
|
||||
self.assertEqual(testencerror('xmlcharrefreplace'),
|
||||
('sp�am', 5))
|
||||
self.assertEqual(testencerror('backslashreplace'),
|
||||
('sp\\ufffdam', 5))
|
||||
|
||||
def error_bomb(exc):
|
||||
return (u'*'*40, len(exc.object))
|
||||
def error_mock(exc):
|
||||
error_mock.lastexc = exc
|
||||
return (unicode(exc.object[exc.start - 1]), exc.end)
|
||||
|
||||
codecs.register_error('test_iconv_codec.bomb', error_bomb)
|
||||
codecs.register_error('test_iconv_codec.mock', error_mock)
|
||||
|
||||
self.assertEqual(testencerror('test_iconv_codec.bomb'),
|
||||
('sp' + ('*'*40), 5))
|
||||
self.assertEqual(testdecerror('test_iconv_codec.bomb'),
|
||||
(u'sp' + (u'*'*40), 5))
|
||||
|
||||
self.assertEqual(testencerror('test_iconv_codec.mock'), ('sppam', 5))
|
||||
exc = error_mock.lastexc
|
||||
self.assertEqual(exc.object, u'sp\ufffdam')
|
||||
self.assertEqual(exc.start, 2)
|
||||
self.assertEqual(exc.end, 3)
|
||||
self.assert_(isinstance(exc, UnicodeEncodeError))
|
||||
|
||||
self.assertEqual(testdecerror('test_iconv_codec.mock'), (u'sppam', 5))
|
||||
exc = error_mock.lastexc
|
||||
self.assertEqual(exc.object, 'sp\xffam')
|
||||
self.assertEqual(exc.start, 2)
|
||||
self.assertEqual(exc.end, 3)
|
||||
self.assert_(isinstance(exc, UnicodeDecodeError))
|
||||
|
||||
def test_empty_escape_decode(self):
|
||||
self.encoder, self.decoder, self.reader, self.writer = \
|
||||
iconv_codec.lookup("ascii")
|
||||
self.assertEquals(self.decoder(u""), ("", 0))
|
||||
self.assertEquals(self.encoder(""), (u"", 0))
|
||||
|
||||
def test_main():
|
||||
suite = unittest.TestSuite()
|
||||
suite.addTest(unittest.makeSuite(IconvCodecTest))
|
||||
test_support.run_suite(suite)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_main()
|
||||
|
||||
# ex: ts=8 sts=4 et
|
|
@ -41,6 +41,8 @@ Core and builtins
|
|||
Extension modules
|
||||
-----------------
|
||||
|
||||
- The iconv module has been removed from this release.
|
||||
|
||||
- The platform-independent routines for packing floats in IEEE formats
|
||||
(struct.pack's <f, >f, <d, and >d codes; pickle and cPickle's protocol 1
|
||||
pickling of floats) ignored that rounding can cause a carry to
|
||||
|
@ -105,8 +107,6 @@ TBD
|
|||
Build
|
||||
-----
|
||||
|
||||
- Fix build problems when _iconv_codec failed. (SF bug #690012.)
|
||||
|
||||
- Fix problem building on OSF1 because the compiler only accepted
|
||||
preprocessor directives that start in column 1. (SF bug #691793.)
|
||||
|
||||
|
@ -276,9 +276,6 @@ Extension modules
|
|||
- The SSL module now handles sockets with a timeout set correctly (SF
|
||||
patch #675750, fixing SF bug #675552).
|
||||
|
||||
- A new module _iconv_codec has been added, to expose the iconv(3)
|
||||
library.
|
||||
|
||||
- os/posixmodule has grown the sysexits.h constants (EX_OK and friends).
|
||||
|
||||
- Fixed broken threadstate swap in readline that could cause fatal
|
||||
|
|
|
@ -478,10 +478,6 @@ GLHACK=-Dclear=__GLclear
|
|||
#EXPAT_DIR=/usr/local/src/expat-1.95.2
|
||||
#pyexpat pyexpat.c -DHAVE_EXPAT_H -I$(EXPAT_DIR)/lib -L$(EXPAT_DIR) -lexpat
|
||||
|
||||
# Wrapper for iconv(3). This requires either GNU iconv, or a native
|
||||
# iconv implementation (only Linux, Solaris, and BSD are known to work)
|
||||
#_iconv_codec _iconv_codec -I$(prefix)/include -L$(exec_prefix)/lib -liconv
|
||||
|
||||
# Example -- included for reference only:
|
||||
# xx xxmodule.c
|
||||
|
||||
|
|
|
@ -1,723 +0,0 @@
|
|||
/*
|
||||
* _iconv_codec.c
|
||||
*
|
||||
* libiconv adaptor for Python iconvcodec
|
||||
*
|
||||
* Author : Hye-Shik Chang <perky@FreeBSD.org>
|
||||
* Created : 17 January 2003
|
||||
*/
|
||||
|
||||
#include "Python.h"
|
||||
#include <string.h>
|
||||
#include <iconv.h>
|
||||
|
||||
static const char *__version__ = "$Revision$";
|
||||
|
||||
#if Py_USING_UNICODE
|
||||
# if Py_UNICODE_SIZE == 2
|
||||
# ifdef __GNU_LIBRARY__
|
||||
# define UNICODE_ENCODING "ucs-2"
|
||||
# else
|
||||
# define UNICODE_ENCODING "ucs-2-internal"
|
||||
# endif
|
||||
# define MBENCODED_LENGTH_MAX 4
|
||||
# elif Py_UNICODE_SIZE == 4
|
||||
# ifdef __GNU_LIBRARY__
|
||||
# define UNICODE_ENCODING "ucs-4"
|
||||
# else
|
||||
# define UNICODE_ENCODING "ucs-4-internal"
|
||||
# endif
|
||||
# define MBENCODED_LENGTH_MAX 6
|
||||
# endif
|
||||
#else
|
||||
# error "Unicode is not available"
|
||||
#endif
|
||||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
iconv_t enchdl, dechdl;
|
||||
char *encoding;
|
||||
} iconvcodecObject;
|
||||
PyDoc_STRVAR(iconvcodec_doc, "iconvcodec object");
|
||||
|
||||
/* does the chosen internal encoding require
|
||||
* byteswapping to get native endianness?
|
||||
* 0=no, 1=yes, -1=unknown */
|
||||
static int byteswap = -1;
|
||||
|
||||
#define ERROR_STRICT (PyObject *)(1)
|
||||
#define ERROR_IGNORE (PyObject *)(2)
|
||||
#define ERROR_REPLACE (PyObject *)(3)
|
||||
#define ERROR_MAX ERROR_REPLACE
|
||||
|
||||
#define REPLACEMENT_CHAR_DECODE 0xFFFD
|
||||
#define REPLACEMENT_CHAR_ENCODE '?'
|
||||
|
||||
#define DEFAULT_ENCODING "utf-8"
|
||||
|
||||
|
||||
static PyObject *
|
||||
get_errorcallback(const char *errors)
|
||||
{
|
||||
if (errors == NULL || strcmp(errors, "strict") == 0)
|
||||
return ERROR_STRICT;
|
||||
else if (strcmp(errors, "ignore") == 0)
|
||||
return ERROR_IGNORE;
|
||||
else if (strcmp(errors, "replace") == 0)
|
||||
return ERROR_REPLACE;
|
||||
else
|
||||
return PyCodec_LookupError(errors);
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(iconvcodec_encode__doc__,
|
||||
"I.encode(unicode, [,errors]) -> (string, length consumed)\n\
|
||||
\n\
|
||||
Return an encoded string version of `unicode'. errors may be given to\n\
|
||||
set a different error handling scheme. Default is 'strict' meaning that\n\
|
||||
encoding errors raise a UnicodeEncodeError. Other possible values are\n\
|
||||
'ignore', 'replace' and 'xmlcharrefreplace' as well as any other name\n\
|
||||
registered with codecs.register_error that can handle UnicodeEncodeErrors.");
|
||||
|
||||
static PyObject *
|
||||
iconvcodec_encode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = { "input", "errors", NULL };
|
||||
Py_UNICODE *input;
|
||||
int inputlen;
|
||||
char *errors = NULL/*strict*/, *out, *out_top;
|
||||
const char *inp, *inp_top;
|
||||
size_t inplen, inplen_total, outlen, outlen_total, estep;
|
||||
PyObject *outputobj = NULL, *errorcb = NULL,
|
||||
*exceptionobj = NULL;
|
||||
Py_UNICODE *swappedinput = NULL;
|
||||
int swapi;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "u#|s:encode",
|
||||
kwlist, &input, &inputlen, &errors))
|
||||
return NULL; /* TypeError */
|
||||
|
||||
errorcb = get_errorcallback(errors);
|
||||
if (errorcb == NULL)
|
||||
return NULL; /* LookupError or something else from error handler */
|
||||
|
||||
inp = inp_top = (char *)input;
|
||||
inplen = inplen_total = (size_t)(inputlen * Py_UNICODE_SIZE);
|
||||
|
||||
outlen = inputlen * MBENCODED_LENGTH_MAX;
|
||||
if (outlen < 16)
|
||||
outlen = 16; /* for iso-2022 codecs */
|
||||
|
||||
outputobj = PyString_FromStringAndSize(NULL, outlen);
|
||||
if (outputobj == NULL)
|
||||
return NULL;
|
||||
out = out_top = PyString_AS_STRING(outputobj);
|
||||
outlen_total = outlen;
|
||||
|
||||
estep = inputlen * Py_UNICODE_SIZE / 2;
|
||||
|
||||
#define RESIZE_OUTBUFFER(size) { \
|
||||
size_t toadd = (size); \
|
||||
outlen_total += toadd; \
|
||||
outlen += toadd; \
|
||||
if (_PyString_Resize(&outputobj, outlen_total) == -1) \
|
||||
goto errorexit; \
|
||||
out = PyString_AS_STRING(outputobj) + (out - out_top); \
|
||||
out_top = PyString_AS_STRING(outputobj); \
|
||||
}
|
||||
if (byteswap) {
|
||||
swappedinput = PyMem_Malloc(inplen);
|
||||
if (swappedinput == NULL)
|
||||
return NULL;
|
||||
for (swapi = 0; swapi<inputlen; ++swapi)
|
||||
{
|
||||
Py_UNICODE c = input[swapi];
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
c = ((char *)&c)[0]<<8 | ((char *)&c)[1];
|
||||
#else
|
||||
c = ((char *)&c)[0]<<24 | ((char *)&c)[1]<<16 |
|
||||
((char *)&c)[2]<<8 | ((char *)&c)[3];
|
||||
#endif
|
||||
swappedinput[swapi] = c;
|
||||
}
|
||||
inp = inp_top = (char *)swappedinput;
|
||||
}
|
||||
|
||||
while (inplen > 0) {
|
||||
if (iconv(self->enchdl, (char**)&inp, &inplen, &out, &outlen)
|
||||
== (size_t)-1)
|
||||
{
|
||||
char reason[128];
|
||||
int errpos;
|
||||
|
||||
if (errno == E2BIG) {
|
||||
RESIZE_OUTBUFFER(estep);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
|
||||
inplen -= Py_UNICODE_SIZE;
|
||||
inp += Py_UNICODE_SIZE;
|
||||
if (errorcb == ERROR_REPLACE) {
|
||||
if (outlen < 1)
|
||||
RESIZE_OUTBUFFER(errno == EINVAL ? 1 : estep);
|
||||
outlen--;
|
||||
*out++ = REPLACEMENT_CHAR_ENCODE;
|
||||
}
|
||||
if (errno == EINVAL) break;
|
||||
else continue;
|
||||
}
|
||||
|
||||
errpos = (int)(inp - inp_top) / Py_UNICODE_SIZE;
|
||||
sprintf(reason, "Undefined character map from "
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
"\\u%04x"
|
||||
#elif Py_UNICODE_SIZE == 4
|
||||
"\\u%08x"
|
||||
#endif
|
||||
, *(Py_UNICODE *)inp);
|
||||
|
||||
if (exceptionobj == NULL) {
|
||||
if ((exceptionobj = PyUnicodeEncodeError_Create(
|
||||
self->encoding, input, inputlen,
|
||||
errpos, errpos + 1, reason)) == NULL)
|
||||
goto errorexit;
|
||||
} else {
|
||||
if (PyUnicodeEncodeError_SetStart(exceptionobj, errpos) != 0)
|
||||
goto errorexit;
|
||||
if (PyUnicodeEncodeError_SetEnd(exceptionobj, errpos + 1) != 0)
|
||||
goto errorexit;
|
||||
if (PyUnicodeEncodeError_SetReason(exceptionobj, reason) != 0)
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (errorcb == ERROR_STRICT) {
|
||||
PyCodec_StrictErrors(exceptionobj);
|
||||
goto errorexit;
|
||||
} else {
|
||||
PyObject *argsobj, *retobj, *retuni;
|
||||
long newpos;
|
||||
|
||||
argsobj = PyTuple_New(1);
|
||||
if (argsobj == NULL)
|
||||
goto errorexit;
|
||||
PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
|
||||
Py_INCREF(exceptionobj);
|
||||
retobj = PyObject_CallObject(errorcb, argsobj);
|
||||
Py_DECREF(argsobj);
|
||||
if (retobj == NULL)
|
||||
goto errorexit;
|
||||
|
||||
if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
|
||||
!PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
|
||||
!PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
|
||||
Py_DECREF(retobj);
|
||||
PyErr_SetString(PyExc_ValueError, "encoding error handler "
|
||||
"must return (unicode, int) tuple");
|
||||
goto errorexit;
|
||||
}
|
||||
if (PyUnicode_GET_SIZE(retuni) > 0) {
|
||||
#define errorexit errorexit_cbpad
|
||||
PyObject *retstr = NULL;
|
||||
int retstrsize;
|
||||
|
||||
retstr = PyUnicode_AsEncodedString(
|
||||
retuni, self->encoding, NULL);
|
||||
if (retstr == NULL || !PyString_Check(retstr))
|
||||
goto errorexit;
|
||||
|
||||
retstrsize = PyString_GET_SIZE(retstr);
|
||||
if (outlen < retstrsize)
|
||||
RESIZE_OUTBUFFER(errno == EINVAL || retstrsize > estep
|
||||
? retstrsize - outlen : estep);
|
||||
|
||||
memcpy(out, PyString_AS_STRING(retstr), retstrsize);
|
||||
out += retstrsize;
|
||||
outlen -= retstrsize;
|
||||
#undef errorexit
|
||||
if (0) {
|
||||
errorexit_cbpad: Py_XDECREF(retobj);
|
||||
Py_XDECREF(retstr);
|
||||
goto errorexit;
|
||||
}
|
||||
Py_DECREF(retstr);
|
||||
}
|
||||
|
||||
newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
|
||||
Py_DECREF(retobj);
|
||||
|
||||
if (newpos < 0)
|
||||
newpos = inputlen + newpos;
|
||||
if (newpos < 0 || newpos > inputlen) {
|
||||
PyErr_Format(PyExc_IndexError,
|
||||
"position %ld from error handler out of bounds",
|
||||
newpos);
|
||||
goto errorexit;
|
||||
}
|
||||
if (newpos == inputlen)
|
||||
break;
|
||||
inp = inp_top + Py_UNICODE_SIZE * newpos;
|
||||
inplen = inplen_total - Py_UNICODE_SIZE * newpos;
|
||||
}
|
||||
} else
|
||||
break;
|
||||
}
|
||||
#undef RESIZE_OUTBUFFER
|
||||
|
||||
{
|
||||
PyObject *rettup;
|
||||
int finalsize;
|
||||
|
||||
finalsize = (int)(out - out_top);
|
||||
|
||||
if (finalsize != outlen_total) {
|
||||
if (_PyString_Resize(&outputobj, finalsize) == -1)
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (errorcb > ERROR_MAX) {
|
||||
Py_DECREF(errorcb);
|
||||
}
|
||||
Py_XDECREF(exceptionobj);
|
||||
|
||||
rettup = PyTuple_New(2);
|
||||
if (rettup == NULL) {
|
||||
Py_DECREF(outputobj);
|
||||
if (byteswap)
|
||||
PyMem_Free(swappedinput);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(rettup, 0, outputobj);
|
||||
PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inputlen));
|
||||
return rettup;
|
||||
}
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(outputobj);
|
||||
if (errorcb > ERROR_MAX) {
|
||||
Py_DECREF(errorcb);
|
||||
}
|
||||
Py_XDECREF(exceptionobj);
|
||||
if (byteswap)
|
||||
PyMem_Free(swappedinput);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(iconvcodec_decode__doc__,
|
||||
"I.decode(string, [,errors]) -> (unicodeobject, length consumed)\n\
|
||||
\n\
|
||||
Decodes `string' using I, an iconvcodec instance. errors may be given\n\
|
||||
to set a different error handling scheme. Default is 'strict' meaning\n\
|
||||
that encoding errors raise a UnicodeDecodeError. Other possible values\n\
|
||||
are 'ignore' and 'replace' as well as any other name registerd with\n\
|
||||
codecs.register_error that is able to handle UnicodeDecodeErrors.");
|
||||
|
||||
static PyObject *
|
||||
iconvcodec_decode(iconvcodecObject *self, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
static char *kwlist[] = { "input", "errors", NULL };
|
||||
char *errors = NULL/*strict*/, *out, *out_top;
|
||||
const char *inp, *inp_top;
|
||||
int inplen_int;
|
||||
size_t inplen, inplen_total, outlen, outlen_total, estep;
|
||||
PyObject *outputobj = NULL, *errorcb = NULL,
|
||||
*exceptionobj = NULL;
|
||||
|
||||
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#|s:decode",
|
||||
kwlist, &inp, &inplen_int, &errors))
|
||||
return NULL; /* TypeError */
|
||||
|
||||
errorcb = get_errorcallback(errors);
|
||||
if (errorcb == NULL)
|
||||
return NULL; /* LookupError or something else from error handler */
|
||||
|
||||
inp_top = inp;
|
||||
inplen_total = inplen = (size_t)inplen_int;
|
||||
|
||||
outputobj = PyUnicode_FromUnicode(NULL, inplen);
|
||||
if (outputobj == NULL)
|
||||
return NULL;
|
||||
outlen_total = outlen = PyUnicode_GET_DATA_SIZE(outputobj);
|
||||
out = out_top = (char *)PyUnicode_AS_UNICODE(outputobj);
|
||||
|
||||
estep = outlen / 2;
|
||||
|
||||
#define RESIZE_OUTBUFFER(size) { \
|
||||
size_t toadd = (size); \
|
||||
outlen_total += toadd; \
|
||||
outlen += toadd; \
|
||||
if (PyUnicode_Resize(&outputobj, outlen_total/Py_UNICODE_SIZE) == -1) \
|
||||
goto errorexit; \
|
||||
out = (char *)PyUnicode_AS_UNICODE(outputobj) + (out - out_top); \
|
||||
out_top = (char *)PyUnicode_AS_UNICODE(outputobj); \
|
||||
}
|
||||
while (inplen > 0) {
|
||||
char *oldout = out;
|
||||
size_t res = iconv(self->dechdl, (char**)&inp, &inplen, &out, &outlen);
|
||||
|
||||
if (byteswap) {
|
||||
while (oldout < out)
|
||||
{
|
||||
char c0 = oldout[0];
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
oldout[0] = oldout[1];
|
||||
oldout[1] = c0;
|
||||
#else
|
||||
char c1 = oldout[1];
|
||||
oldout[0] = oldout[3];
|
||||
oldout[1] = oldout[2];
|
||||
oldout[2] = c1;
|
||||
oldout[3] = c0;
|
||||
#endif
|
||||
oldout += sizeof(Py_UNICODE);
|
||||
}
|
||||
}
|
||||
if (res == (size_t)-1) {
|
||||
char reason[128], *reasonpos = (char *)reason;
|
||||
int errpos;
|
||||
|
||||
if (errno == E2BIG) {
|
||||
RESIZE_OUTBUFFER(estep);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (errorcb == ERROR_IGNORE || errorcb == ERROR_REPLACE) {
|
||||
inplen--; inp++;
|
||||
if (errorcb == ERROR_REPLACE) {
|
||||
Py_UNICODE *replp;
|
||||
|
||||
if (outlen < Py_UNICODE_SIZE)
|
||||
RESIZE_OUTBUFFER(
|
||||
errno == EINVAL || Py_UNICODE_SIZE > estep
|
||||
? Py_UNICODE_SIZE : estep);
|
||||
|
||||
/* some compilers hate casted lvalue */
|
||||
replp = (Py_UNICODE *)out;
|
||||
assert((long)replp % Py_UNICODE_SIZE == 0);/* aligned? */
|
||||
*replp = REPLACEMENT_CHAR_DECODE;
|
||||
|
||||
out += Py_UNICODE_SIZE;
|
||||
outlen -= Py_UNICODE_SIZE;
|
||||
}
|
||||
if (errno == EINVAL) break;
|
||||
else continue;
|
||||
}
|
||||
|
||||
errpos = (int)(inp - inp_top);
|
||||
reasonpos += sprintf(reason, "Invalid multibyte sequence \\x%02x",
|
||||
(unsigned char)*inp);
|
||||
if (inplen > 1) {
|
||||
reasonpos += sprintf(reasonpos,
|
||||
"\\x%02x", (unsigned char)*(inp+1));
|
||||
if (inplen > 2)
|
||||
sprintf(reasonpos, "\\x%02x", (unsigned char)*(inp+2));
|
||||
}
|
||||
|
||||
if (exceptionobj == NULL) {
|
||||
exceptionobj = PyUnicodeDecodeError_Create(
|
||||
self->encoding, inp_top, inplen_total,
|
||||
errpos, errpos + 1, reason);
|
||||
if (exceptionobj == NULL)
|
||||
goto errorexit;
|
||||
} else {
|
||||
if (PyUnicodeDecodeError_SetStart(exceptionobj, errpos) != 0)
|
||||
goto errorexit;
|
||||
if (PyUnicodeDecodeError_SetEnd(exceptionobj, errpos + 1) != 0)
|
||||
goto errorexit;
|
||||
if (PyUnicodeDecodeError_SetReason(exceptionobj, reason) != 0)
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (errorcb == ERROR_STRICT) {
|
||||
PyCodec_StrictErrors(exceptionobj);
|
||||
goto errorexit;
|
||||
} else {
|
||||
PyObject *argsobj, *retobj, *retuni;
|
||||
long newpos;
|
||||
|
||||
argsobj = PyTuple_New(1);
|
||||
if (argsobj == NULL)
|
||||
goto errorexit;
|
||||
PyTuple_SET_ITEM(argsobj, 0, exceptionobj);
|
||||
Py_INCREF(exceptionobj);
|
||||
retobj = PyObject_CallObject(errorcb, argsobj);
|
||||
Py_DECREF(argsobj);
|
||||
if (retobj == NULL)
|
||||
goto errorexit;
|
||||
|
||||
if (!PyTuple_Check(retobj) || PyTuple_GET_SIZE(retobj) != 2 ||
|
||||
!PyUnicode_Check((retuni = PyTuple_GET_ITEM(retobj, 0))) ||
|
||||
!PyInt_Check(PyTuple_GET_ITEM(retobj, 1))) {
|
||||
Py_DECREF(retobj);
|
||||
PyErr_SetString(PyExc_ValueError, "decoding error handler "
|
||||
"must return (unicode, int) tuple");
|
||||
goto errorexit;
|
||||
}
|
||||
if (PyUnicode_GET_SIZE(retuni) > 0) {
|
||||
#define errorexit errorexit_cbpad
|
||||
size_t retunisize;
|
||||
|
||||
retunisize = PyUnicode_GET_DATA_SIZE(retuni);
|
||||
if (outlen < retunisize)
|
||||
RESIZE_OUTBUFFER(errno == EINVAL || retunisize > estep
|
||||
? retunisize - outlen : estep);
|
||||
|
||||
memcpy(out, PyUnicode_AS_DATA(retuni), retunisize);
|
||||
out += retunisize;
|
||||
outlen -= retunisize;
|
||||
#undef errorexit
|
||||
if (0) {
|
||||
errorexit_cbpad: Py_DECREF(retobj);
|
||||
goto errorexit;
|
||||
}
|
||||
}
|
||||
|
||||
newpos = PyInt_AS_LONG(PyTuple_GET_ITEM(retobj, 1));
|
||||
Py_DECREF(retobj);
|
||||
|
||||
if (newpos < 0)
|
||||
newpos = inplen_total + newpos;
|
||||
if (newpos < 0 || newpos > inplen_total) {
|
||||
PyErr_Format(PyExc_IndexError,
|
||||
"position %ld from error handler out of bounds",
|
||||
newpos);
|
||||
goto errorexit;
|
||||
}
|
||||
if (newpos == inplen_total)
|
||||
break;
|
||||
inp = inp_top + newpos;
|
||||
inplen = inplen_total - newpos;
|
||||
}
|
||||
} else
|
||||
break;
|
||||
}
|
||||
#undef RESIZE_OUTBUFFER
|
||||
|
||||
{
|
||||
PyObject *rettup;
|
||||
int finalsize;
|
||||
|
||||
finalsize = (int)(out - out_top);
|
||||
if (finalsize != outlen_total) {
|
||||
if (PyUnicode_Resize(&outputobj, finalsize / Py_UNICODE_SIZE)
|
||||
== -1)
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
if (errorcb > ERROR_MAX) {
|
||||
Py_DECREF(errorcb);
|
||||
}
|
||||
Py_XDECREF(exceptionobj);
|
||||
|
||||
rettup = PyTuple_New(2);
|
||||
if (rettup == NULL) {
|
||||
Py_DECREF(outputobj);
|
||||
return NULL;
|
||||
}
|
||||
PyTuple_SET_ITEM(rettup, 0, outputobj);
|
||||
PyTuple_SET_ITEM(rettup, 1, PyInt_FromLong(inplen_total));
|
||||
return rettup;
|
||||
}
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(outputobj);
|
||||
if (errorcb > ERROR_MAX) {
|
||||
Py_DECREF(errorcb);
|
||||
}
|
||||
Py_XDECREF(exceptionobj);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct PyMethodDef iconvcodec_methods[] = {
|
||||
{"encode", (PyCFunction)iconvcodec_encode,
|
||||
METH_VARARGS | METH_KEYWORDS,
|
||||
iconvcodec_encode__doc__},
|
||||
{"decode", (PyCFunction)iconvcodec_decode,
|
||||
METH_VARARGS | METH_KEYWORDS,
|
||||
iconvcodec_decode__doc__},
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
static PyObject *
|
||||
iconvcodec_new(PyTypeObject *type, PyObject *args, PyObject *kwargs)
|
||||
{
|
||||
PyObject *encobj = NULL;
|
||||
iconvcodecObject *new = NULL;
|
||||
|
||||
new = (iconvcodecObject *)type->tp_alloc(type, 0);
|
||||
if (new == NULL)
|
||||
return NULL;
|
||||
|
||||
new->encoding = NULL;
|
||||
new->enchdl = new->dechdl = (iconv_t)(-1);
|
||||
|
||||
encobj = PyObject_GetAttrString((PyObject *)new, "encoding");
|
||||
if (encobj == NULL) {
|
||||
PyErr_Clear();
|
||||
new->encoding = PyMem_Malloc(sizeof(DEFAULT_ENCODING));
|
||||
strcpy(new->encoding, DEFAULT_ENCODING);
|
||||
} else if (!PyString_Check(encobj)) {
|
||||
Py_DECREF(encobj);
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"`encoding' attribute must be a string.");
|
||||
goto errorexit;
|
||||
} else {
|
||||
new->encoding = PyMem_Malloc(PyString_GET_SIZE(encobj) + 1);
|
||||
strcpy(new->encoding, PyString_AS_STRING(encobj));
|
||||
Py_DECREF(encobj);
|
||||
}
|
||||
|
||||
new->dechdl = iconv_open(UNICODE_ENCODING, new->encoding);
|
||||
if (new->dechdl == (iconv_t)(-1)) {
|
||||
PyErr_SetString(PyExc_ValueError, "unsupported decoding");
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
new->enchdl = iconv_open(new->encoding, UNICODE_ENCODING);
|
||||
if (new->enchdl == (iconv_t)(-1)) {
|
||||
PyErr_SetString(PyExc_ValueError, "unsupported encoding");
|
||||
iconv_close(new->dechdl);
|
||||
new->dechdl = (iconv_t)(-1);
|
||||
goto errorexit;
|
||||
}
|
||||
|
||||
return (PyObject *)new;
|
||||
|
||||
errorexit:
|
||||
Py_XDECREF(new);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
iconvcodec_dealloc(iconvcodecObject *self)
|
||||
{
|
||||
if (self->enchdl != (iconv_t)-1)
|
||||
iconv_close(self->enchdl);
|
||||
if (self->dechdl != (iconv_t)-1)
|
||||
iconv_close(self->dechdl);
|
||||
if (self->encoding != NULL)
|
||||
PyMem_Free(self->encoding);
|
||||
|
||||
self->ob_type->tp_free((PyObject *)self);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
iconvcodec_repr(PyObject *self)
|
||||
{
|
||||
return PyString_FromFormat("<iconvcodec encoding='%s'>",
|
||||
((iconvcodecObject *)self)->encoding);
|
||||
}
|
||||
|
||||
static PyTypeObject iconvcodec_Type = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /* Number of items for varobject */
|
||||
"iconvcodec", /* Name of this type */
|
||||
sizeof(iconvcodecObject), /* Basic object size */
|
||||
0, /* Item size for varobject */
|
||||
(destructor)iconvcodec_dealloc, /* tp_dealloc */
|
||||
0, /* tp_print */
|
||||
0, /* tp_getattr */
|
||||
0, /* tp_setattr */
|
||||
0, /* tp_compare */
|
||||
iconvcodec_repr, /* tp_repr */
|
||||
0, /* tp_as_number */
|
||||
0, /* tp_as_sequence */
|
||||
0, /* tp_as_mapping */
|
||||
0, /* tp_hash */
|
||||
0, /* tp_call */
|
||||
0, /* tp_str */
|
||||
PyObject_GenericGetAttr, /* tp_getattro */
|
||||
0, /* tp_setattro */
|
||||
0, /* tp_as_buffer */
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
|
||||
iconvcodec_doc, /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iterext */
|
||||
iconvcodec_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
0, /* tp_dict */
|
||||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
0, /* tp_init */
|
||||
PyType_GenericAlloc, /* tp_alloc */
|
||||
iconvcodec_new, /* tp_new */
|
||||
PyObject_Del, /* tp_free */
|
||||
};
|
||||
|
||||
static struct PyMethodDef _iconv_codec_methods[] = {
|
||||
{NULL, NULL},
|
||||
};
|
||||
|
||||
void
|
||||
init_iconv_codec(void)
|
||||
{
|
||||
PyObject *m;
|
||||
|
||||
char in = '0';
|
||||
char *inptr = ∈
|
||||
size_t insize = 1;
|
||||
Py_UNICODE out = 0;
|
||||
char *outptr = (char *)&out;
|
||||
size_t outsize = sizeof(out);
|
||||
size_t res;
|
||||
|
||||
iconv_t hdl = iconv_open(UNICODE_ENCODING, "ISO-8859-1");
|
||||
|
||||
if (hdl == (iconv_t)-1) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"can't initialize the _iconv_codec module: iconv_open() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
res = iconv(hdl, &inptr, &insize, &outptr, &outsize);
|
||||
if (res == (size_t)-1) {
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"can't initialize the _iconv_codec module: iconv() failed");
|
||||
return;
|
||||
}
|
||||
|
||||
/* Check whether conv() returned native endianess or not for the chosen
|
||||
encoding */
|
||||
if (out == 0x30)
|
||||
byteswap = 0;
|
||||
#if Py_UNICODE_SIZE == 2
|
||||
else if (out == 0x3000)
|
||||
#else
|
||||
else if (out == 0x30000000)
|
||||
#endif
|
||||
byteswap = 1;
|
||||
else {
|
||||
iconv_close(hdl);
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"can't initialize the _iconv_codec module: mixed endianess");
|
||||
return;
|
||||
}
|
||||
iconv_close(hdl);
|
||||
|
||||
iconvcodec_Type.ob_type = &PyType_Type;
|
||||
m = Py_InitModule("_iconv_codec", _iconv_codec_methods);
|
||||
|
||||
PyModule_AddStringConstant(m, "__version__", (char*)__version__);
|
||||
Py_INCREF(&iconvcodec_Type);
|
||||
PyModule_AddObject(m, "iconvcodec", (PyObject *)(&iconvcodec_Type));
|
||||
PyModule_AddStringConstant(m, "internal_encoding", UNICODE_ENCODING);
|
||||
|
||||
if (PyErr_Occurred())
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"can't initialize the _iconv_codec module");
|
||||
}
|
||||
|
||||
/*
|
||||
* ex: ts=8 sts=4 et
|
||||
* $Id$
|
||||
*/
|
18
setup.py
18
setup.py
|
@ -622,24 +622,6 @@ class PyBuildExt(build_ext):
|
|||
exts.append( Extension('nis', ['nismodule.c'],
|
||||
libraries = libs) )
|
||||
|
||||
# Hye-Shik Chang's iconv_codec C interface
|
||||
iconv_incs = find_file('iconv.h', inc_dirs,
|
||||
['/usr/local/include', '/usr/pkg/include'])
|
||||
iconv_libs = find_library_file(self.compiler, 'iconv', lib_dirs,
|
||||
['/usr/local/lib', '/usr/pkg/lib'])
|
||||
|
||||
if platform not in ['darwin'] and iconv_incs is not None:
|
||||
if iconv_libs is not None:
|
||||
iconv_libraries = ['iconv']
|
||||
else:
|
||||
iconv_libraries = [] # in libc
|
||||
|
||||
exts.append( Extension('_iconv_codec',
|
||||
['_iconv_codec.c'],
|
||||
include_dirs = iconv_incs,
|
||||
library_dirs = iconv_libs,
|
||||
libraries = iconv_libraries), )
|
||||
|
||||
# Curses support, requring the System V version of curses, often
|
||||
# provided by the ncurses library.
|
||||
if platform == 'sunos4':
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue