mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
tokenizer.c: make coding markup work again.
io.open() now takes all positional parameters (so we can conveniently call it from C code). test_tarfile.py no longer uses u"..." literals, but is otherwise still badly broken. This is a checkpoint; some more stuff now breaks.
This commit is contained in:
parent
e7ba495627
commit
9cbfffd1a6
3 changed files with 35 additions and 32 deletions
|
@ -49,7 +49,7 @@ class BlockingIOError(IOError):
|
||||||
self.characters_written = characters_written
|
self.characters_written = characters_written
|
||||||
|
|
||||||
|
|
||||||
def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
|
def open(file, mode="r", buffering=None, encoding=None, newline=None):
|
||||||
"""Replacement for the built-in open function.
|
"""Replacement for the built-in open function.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
|
@ -59,7 +59,6 @@ def open(file, mode="r", buffering=None, *, encoding=None, newline=None):
|
||||||
buffering: optional int >= 0 giving the buffer size; values
|
buffering: optional int >= 0 giving the buffer size; values
|
||||||
can be: 0 = unbuffered, 1 = line buffered,
|
can be: 0 = unbuffered, 1 = line buffered,
|
||||||
larger = fully buffered.
|
larger = fully buffered.
|
||||||
Keywords (for text modes only; *must* be given as keyword arguments):
|
|
||||||
encoding: optional string giving the text encoding.
|
encoding: optional string giving the text encoding.
|
||||||
newline: optional newlines specifier; must be None, '\n' or '\r\n';
|
newline: optional newlines specifier; must be None, '\n' or '\r\n';
|
||||||
specifies the line ending expected on input and written on
|
specifies the line ending expected on input and written on
|
||||||
|
|
|
@ -432,17 +432,17 @@ class PaxReadTest(LongnameTest):
|
||||||
tarinfo = tar.getmember("pax/regtype1")
|
tarinfo = tar.getmember("pax/regtype1")
|
||||||
self.assertEqual(tarinfo.uname, "foo")
|
self.assertEqual(tarinfo.uname, "foo")
|
||||||
self.assertEqual(tarinfo.gname, "bar")
|
self.assertEqual(tarinfo.gname, "bar")
|
||||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||||
|
|
||||||
tarinfo = tar.getmember("pax/regtype2")
|
tarinfo = tar.getmember("pax/regtype2")
|
||||||
self.assertEqual(tarinfo.uname, "")
|
self.assertEqual(tarinfo.uname, "")
|
||||||
self.assertEqual(tarinfo.gname, "bar")
|
self.assertEqual(tarinfo.gname, "bar")
|
||||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||||
|
|
||||||
tarinfo = tar.getmember("pax/regtype3")
|
tarinfo = tar.getmember("pax/regtype3")
|
||||||
self.assertEqual(tarinfo.uname, "tarfile")
|
self.assertEqual(tarinfo.uname, "tarfile")
|
||||||
self.assertEqual(tarinfo.gname, "tarfile")
|
self.assertEqual(tarinfo.gname, "tarfile")
|
||||||
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), u"ÄÖÜäöüß")
|
self.assertEqual(tarinfo.pax_headers.get("VENDOR.umlauts"), "ÄÖÜäöüß")
|
||||||
|
|
||||||
def test_pax_number_fields(self):
|
def test_pax_number_fields(self):
|
||||||
# All following number fields are read from the pax header.
|
# All following number fields are read from the pax header.
|
||||||
|
@ -727,11 +727,11 @@ class PaxWriteTest(GNUWriteTest):
|
||||||
|
|
||||||
def test_pax_global_header(self):
|
def test_pax_global_header(self):
|
||||||
pax_headers = {
|
pax_headers = {
|
||||||
u"foo": u"bar",
|
"foo": "bar",
|
||||||
u"uid": u"0",
|
"uid": "0",
|
||||||
u"mtime": u"1.23",
|
"mtime": "1.23",
|
||||||
u"test": u"äöü",
|
"test": "äöü",
|
||||||
u"äöü": u"test"}
|
"äöü": "test"}
|
||||||
|
|
||||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
|
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, \
|
||||||
pax_headers=pax_headers)
|
pax_headers=pax_headers)
|
||||||
|
@ -756,11 +756,11 @@ class PaxWriteTest(GNUWriteTest):
|
||||||
def test_pax_extended_header(self):
|
def test_pax_extended_header(self):
|
||||||
# The fields from the pax header have priority over the
|
# The fields from the pax header have priority over the
|
||||||
# TarInfo.
|
# TarInfo.
|
||||||
pax_headers = {u"path": u"foo", u"uid": u"123"}
|
pax_headers = {"path": "foo", "uid": "123"}
|
||||||
|
|
||||||
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
|
tar = tarfile.open(tmpname, "w", format=tarfile.PAX_FORMAT, encoding="iso8859-1")
|
||||||
t = tarfile.TarInfo()
|
t = tarfile.TarInfo()
|
||||||
t.name = u"äöü" # non-ASCII
|
t.name = "äöü" # non-ASCII
|
||||||
t.uid = 8**8 # too large
|
t.uid = 8**8 # too large
|
||||||
t.pax_headers = pax_headers
|
t.pax_headers = pax_headers
|
||||||
tar.addfile(t)
|
tar.addfile(t)
|
||||||
|
@ -808,11 +808,11 @@ class UstarUnicodeTest(unittest.TestCase):
|
||||||
else:
|
else:
|
||||||
tar.addfile(tarinfo)
|
tar.addfile(tarinfo)
|
||||||
|
|
||||||
tarinfo.name = u"äöü"
|
tarinfo.name = "äöü"
|
||||||
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
||||||
|
|
||||||
tarinfo.name = "foo"
|
tarinfo.name = "foo"
|
||||||
tarinfo.uname = u"äöü"
|
tarinfo.uname = "äöü"
|
||||||
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
self.assertRaises(UnicodeError, tar.addfile, tarinfo)
|
||||||
|
|
||||||
def test_unicode_argument(self):
|
def test_unicode_argument(self):
|
||||||
|
@ -825,7 +825,7 @@ class UstarUnicodeTest(unittest.TestCase):
|
||||||
tar.close()
|
tar.close()
|
||||||
|
|
||||||
def test_uname_unicode(self):
|
def test_uname_unicode(self):
|
||||||
for name in (u"äöü", "äöü"):
|
for name in ("äöü", "äöü"):
|
||||||
t = tarfile.TarInfo("foo")
|
t = tarfile.TarInfo("foo")
|
||||||
t.uname = name
|
t.uname = name
|
||||||
t.gname = name
|
t.gname = name
|
||||||
|
@ -860,9 +860,9 @@ class PaxUnicodeTest(UstarUnicodeTest):
|
||||||
def test_error_handlers(self):
|
def test_error_handlers(self):
|
||||||
# Test if the unicode error handlers work correctly for characters
|
# Test if the unicode error handlers work correctly for characters
|
||||||
# that cannot be expressed in a given encoding.
|
# that cannot be expressed in a given encoding.
|
||||||
self._create_unicode_name(u"äöü")
|
self._create_unicode_name("äöü")
|
||||||
|
|
||||||
for handler, name in (("utf-8", u"äöü".encode("utf8")),
|
for handler, name in (("utf-8", "äöü".encode("utf8")),
|
||||||
("replace", "???"), ("ignore", "")):
|
("replace", "???"), ("ignore", "")):
|
||||||
tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
|
tar = tarfile.open(tmpname, format=self.format, encoding="ascii",
|
||||||
errors=handler)
|
errors=handler)
|
||||||
|
@ -874,11 +874,11 @@ class PaxUnicodeTest(UstarUnicodeTest):
|
||||||
def test_error_handler_utf8(self):
|
def test_error_handler_utf8(self):
|
||||||
# Create a pathname that has one component representable using
|
# Create a pathname that has one component representable using
|
||||||
# iso8859-1 and the other only in iso8859-15.
|
# iso8859-1 and the other only in iso8859-15.
|
||||||
self._create_unicode_name(u"äöü/¤")
|
self._create_unicode_name("äöü/¤")
|
||||||
|
|
||||||
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
|
tar = tarfile.open(tmpname, format=self.format, encoding="iso8859-1",
|
||||||
errors="utf-8")
|
errors="utf-8")
|
||||||
self.assertEqual(tar.getnames()[0], "äöü/" + u"¤".encode("utf8"))
|
self.assertEqual(tar.getnames()[0], "äöü/" + "¤".encode("utf8"))
|
||||||
|
|
||||||
|
|
||||||
class AppendTest(unittest.TestCase):
|
class AppendTest(unittest.TestCase):
|
||||||
|
|
|
@ -396,25 +396,29 @@ fp_readl(char *s, int size, struct tok_state *tok)
|
||||||
static int
|
static int
|
||||||
fp_setreadl(struct tok_state *tok, const char* enc)
|
fp_setreadl(struct tok_state *tok, const char* enc)
|
||||||
{
|
{
|
||||||
PyObject *reader, *stream, *readline;
|
PyObject *readline = NULL, *stream = NULL, *io = NULL;
|
||||||
|
int ok = 0;
|
||||||
|
|
||||||
/* XXX: constify filename argument. */
|
io = PyImport_ImportModule("io");
|
||||||
stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);
|
if (io == NULL)
|
||||||
|
goto cleanup;
|
||||||
|
|
||||||
|
stream = PyObject_CallMethod(io, "open", "ssis",
|
||||||
|
tok->filename, "r", -1, enc);
|
||||||
if (stream == NULL)
|
if (stream == NULL)
|
||||||
return 0;
|
goto cleanup;
|
||||||
|
|
||||||
reader = PyCodec_StreamReader(enc, stream, NULL);
|
readline = PyObject_GetAttrString(stream, "readline");
|
||||||
Py_DECREF(stream);
|
|
||||||
if (reader == NULL)
|
|
||||||
return 0;
|
|
||||||
|
|
||||||
readline = PyObject_GetAttrString(reader, "readline");
|
|
||||||
Py_DECREF(reader);
|
|
||||||
if (readline == NULL)
|
if (readline == NULL)
|
||||||
return 0;
|
goto cleanup;
|
||||||
|
|
||||||
tok->decoding_readline = readline;
|
tok->decoding_readline = readline;
|
||||||
return 1;
|
ok = 1;
|
||||||
|
|
||||||
|
cleanup:
|
||||||
|
Py_XDECREF(stream);
|
||||||
|
Py_XDECREF(io);
|
||||||
|
return ok;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Fetch the next byte from TOK. */
|
/* Fetch the next byte from TOK. */
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue