mirror of
https://github.com/python/cpython.git
synced 2025-11-27 13:45:25 +00:00
Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and
strings. Closes issue #3799.
This commit is contained in:
parent
6e0d68e9e2
commit
58425d3103
3 changed files with 52 additions and 27 deletions
|
|
@ -84,6 +84,7 @@ class _Database(collections.MutableMapping):
|
||||||
for line in f:
|
for line in f:
|
||||||
line = line.rstrip()
|
line = line.rstrip()
|
||||||
key, pos_and_siz_pair = eval(line)
|
key, pos_and_siz_pair = eval(line)
|
||||||
|
key = key.encode('Latin-1')
|
||||||
self._index[key] = pos_and_siz_pair
|
self._index[key] = pos_and_siz_pair
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
@ -110,13 +111,16 @@ class _Database(collections.MutableMapping):
|
||||||
f = self._io.open(self._dirfile, 'w')
|
f = self._io.open(self._dirfile, 'w')
|
||||||
self._chmod(self._dirfile)
|
self._chmod(self._dirfile)
|
||||||
for key, pos_and_siz_pair in self._index.items():
|
for key, pos_and_siz_pair in self._index.items():
|
||||||
f.write("%r, %r\n" % (key, pos_and_siz_pair))
|
# Use Latin-1 since it has no qualms with any value in any
|
||||||
|
# position; UTF-8, though, does care sometimes.
|
||||||
|
f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
sync = _commit
|
sync = _commit
|
||||||
|
|
||||||
def __getitem__(self, key):
|
def __getitem__(self, key):
|
||||||
key = key.decode("latin-1")
|
if isinstance(key, str):
|
||||||
|
key = key.encode('utf-8')
|
||||||
pos, siz = self._index[key] # may raise KeyError
|
pos, siz = self._index[key] # may raise KeyError
|
||||||
f = _io.open(self._datfile, 'rb')
|
f = _io.open(self._datfile, 'rb')
|
||||||
f.seek(pos)
|
f.seek(pos)
|
||||||
|
|
@ -161,11 +165,12 @@ class _Database(collections.MutableMapping):
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
def __setitem__(self, key, val):
|
def __setitem__(self, key, val):
|
||||||
if not isinstance(key, bytes):
|
if isinstance(key, str):
|
||||||
raise TypeError("keys must be bytes")
|
key = key.encode('utf-8')
|
||||||
key = key.decode("latin-1") # hashable bytes
|
elif not isinstance(key, (bytes, bytearray)):
|
||||||
|
raise TypeError("keys must be bytes or strings")
|
||||||
if not isinstance(val, (bytes, bytearray)):
|
if not isinstance(val, (bytes, bytearray)):
|
||||||
raise TypeError("values must be byte strings")
|
raise TypeError("values must be bytes")
|
||||||
if key not in self._index:
|
if key not in self._index:
|
||||||
self._addkey(key, self._addval(val))
|
self._addkey(key, self._addval(val))
|
||||||
else:
|
else:
|
||||||
|
|
@ -191,7 +196,8 @@ class _Database(collections.MutableMapping):
|
||||||
# (so that _commit() never gets called).
|
# (so that _commit() never gets called).
|
||||||
|
|
||||||
def __delitem__(self, key):
|
def __delitem__(self, key):
|
||||||
key = key.decode("latin-1")
|
if isinstance(key, str):
|
||||||
|
key = key.encode('utf-8')
|
||||||
# The blocks used by the associated value are lost.
|
# The blocks used by the associated value are lost.
|
||||||
del self._index[key]
|
del self._index[key]
|
||||||
# XXX It's unclear why we do a _commit() here (the code always
|
# XXX It's unclear why we do a _commit() here (the code always
|
||||||
|
|
@ -201,14 +207,14 @@ class _Database(collections.MutableMapping):
|
||||||
self._commit()
|
self._commit()
|
||||||
|
|
||||||
def keys(self):
|
def keys(self):
|
||||||
return [key.encode("latin-1") for key in self._index.keys()]
|
return list(self._index.keys())
|
||||||
|
|
||||||
def items(self):
|
def items(self):
|
||||||
return [(key.encode("latin-1"), self[key.encode("latin-1")])
|
return [(key, self[key]) for key in self._index.keys()]
|
||||||
for key in self._index.keys()]
|
|
||||||
|
|
||||||
def __contains__(self, key):
|
def __contains__(self, key):
|
||||||
key = key.decode("latin-1")
|
if isinstance(key, str):
|
||||||
|
key = key.encode('utf-8')
|
||||||
return key in self._index
|
return key in self._index
|
||||||
|
|
||||||
def iterkeys(self):
|
def iterkeys(self):
|
||||||
|
|
|
||||||
|
|
@ -19,13 +19,14 @@ def _delete_files():
|
||||||
pass
|
pass
|
||||||
|
|
||||||
class DumbDBMTestCase(unittest.TestCase):
|
class DumbDBMTestCase(unittest.TestCase):
|
||||||
_dict = {'0': b'',
|
_dict = {b'0': b'',
|
||||||
'a': b'Python:',
|
b'a': b'Python:',
|
||||||
'b': b'Programming',
|
b'b': b'Programming',
|
||||||
'c': b'the',
|
b'c': b'the',
|
||||||
'd': b'way',
|
b'd': b'way',
|
||||||
'f': b'Guido',
|
b'f': b'Guido',
|
||||||
'g': b'intended',
|
b'g': b'intended',
|
||||||
|
'\u00fc'.encode('utf-8') : b'!',
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, *args):
|
def __init__(self, *args):
|
||||||
|
|
@ -35,7 +36,7 @@ class DumbDBMTestCase(unittest.TestCase):
|
||||||
f = dumbdbm.open(_fname, 'c')
|
f = dumbdbm.open(_fname, 'c')
|
||||||
self.assertEqual(list(f.keys()), [])
|
self.assertEqual(list(f.keys()), [])
|
||||||
for key in self._dict:
|
for key in self._dict:
|
||||||
f[key.encode("ascii")] = self._dict[key]
|
f[key] = self._dict[key]
|
||||||
self.read_helper(f)
|
self.read_helper(f)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
@ -73,7 +74,7 @@ class DumbDBMTestCase(unittest.TestCase):
|
||||||
def test_dumbdbm_modification(self):
|
def test_dumbdbm_modification(self):
|
||||||
self.init_db()
|
self.init_db()
|
||||||
f = dumbdbm.open(_fname, 'w')
|
f = dumbdbm.open(_fname, 'w')
|
||||||
self._dict['g'] = f[b'g'] = b"indented"
|
self._dict[b'g'] = f[b'g'] = b"indented"
|
||||||
self.read_helper(f)
|
self.read_helper(f)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
|
@ -105,6 +106,21 @@ class DumbDBMTestCase(unittest.TestCase):
|
||||||
self.assertEqual(f[b'1'], b'hello2')
|
self.assertEqual(f[b'1'], b'hello2')
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
|
def test_str_read(self):
|
||||||
|
self.init_db()
|
||||||
|
f = dumbdbm.open(_fname, 'r')
|
||||||
|
self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')])
|
||||||
|
|
||||||
|
def test_str_write_contains(self):
|
||||||
|
self.init_db()
|
||||||
|
f = dumbdbm.open(_fname)
|
||||||
|
f['\u00fc'] = b'!'
|
||||||
|
f.close()
|
||||||
|
f = dumbdbm.open(_fname, 'r')
|
||||||
|
self.assert_('\u00fc' in f)
|
||||||
|
self.assertEqual(f['\u00fc'.encode('utf-8')],
|
||||||
|
self._dict['\u00fc'.encode('utf-8')])
|
||||||
|
|
||||||
def test_line_endings(self):
|
def test_line_endings(self):
|
||||||
# test for bug #1172763: dumbdbm would die if the line endings
|
# test for bug #1172763: dumbdbm would die if the line endings
|
||||||
# weren't what was expected.
|
# weren't what was expected.
|
||||||
|
|
@ -129,16 +145,16 @@ class DumbDBMTestCase(unittest.TestCase):
|
||||||
def read_helper(self, f):
|
def read_helper(self, f):
|
||||||
keys = self.keys_helper(f)
|
keys = self.keys_helper(f)
|
||||||
for key in self._dict:
|
for key in self._dict:
|
||||||
self.assertEqual(self._dict[key], f[key.encode("ascii")])
|
self.assertEqual(self._dict[key], f[key])
|
||||||
|
|
||||||
def init_db(self):
|
def init_db(self):
|
||||||
f = dumbdbm.open(_fname, 'w')
|
f = dumbdbm.open(_fname, 'w')
|
||||||
for k in self._dict:
|
for k in self._dict:
|
||||||
f[k.encode("ascii")] = self._dict[k]
|
f[k] = self._dict[k]
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
def keys_helper(self, f):
|
def keys_helper(self, f):
|
||||||
keys = sorted(k.decode("ascii") for k in f.keys())
|
keys = sorted(f.keys())
|
||||||
dkeys = sorted(self._dict.keys())
|
dkeys = sorted(self._dict.keys())
|
||||||
self.assertEqual(keys, dkeys)
|
self.assertEqual(keys, dkeys)
|
||||||
return keys
|
return keys
|
||||||
|
|
@ -155,12 +171,12 @@ class DumbDBMTestCase(unittest.TestCase):
|
||||||
if random.random() < 0.2:
|
if random.random() < 0.2:
|
||||||
if k in d:
|
if k in d:
|
||||||
del d[k]
|
del d[k]
|
||||||
del f[k.encode("ascii")]
|
del f[k]
|
||||||
else:
|
else:
|
||||||
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
|
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
|
||||||
d[k] = v
|
d[k] = v
|
||||||
f[k.encode("ascii")] = v
|
f[k] = v
|
||||||
self.assertEqual(f[k.encode("ascii")], v)
|
self.assertEqual(f[k], v)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
f = dumbdbm.open(_fname)
|
f = dumbdbm.open(_fname)
|
||||||
|
|
|
||||||
|
|
@ -33,6 +33,9 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #3799: Fix dbm.dumb to accept strings as well as bytes for keys. String
|
||||||
|
keys are now written out in UTF-8.
|
||||||
|
|
||||||
- Issue #4338: Fix distutils upload command.
|
- Issue #4338: Fix distutils upload command.
|
||||||
|
|
||||||
- Issue #4354: Fix distutils register command.
|
- Issue #4354: Fix distutils register command.
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue