Make dbm.dumb encode strings as UTF-8. Also fix it so it accepts bytes and

strings.

Closes issue #3799.
This commit is contained in:
Brett Cannon 2008-11-21 00:17:53 +00:00
parent 6e0d68e9e2
commit 58425d3103
3 changed files with 52 additions and 27 deletions

View file

@ -84,6 +84,7 @@ class _Database(collections.MutableMapping):
for line in f: for line in f:
line = line.rstrip() line = line.rstrip()
key, pos_and_siz_pair = eval(line) key, pos_and_siz_pair = eval(line)
key = key.encode('Latin-1')
self._index[key] = pos_and_siz_pair self._index[key] = pos_and_siz_pair
f.close() f.close()
@ -110,13 +111,16 @@ class _Database(collections.MutableMapping):
f = self._io.open(self._dirfile, 'w') f = self._io.open(self._dirfile, 'w')
self._chmod(self._dirfile) self._chmod(self._dirfile)
for key, pos_and_siz_pair in self._index.items(): for key, pos_and_siz_pair in self._index.items():
f.write("%r, %r\n" % (key, pos_and_siz_pair)) # Use Latin-1 since it has no qualms with any value in any
# position; UTF-8, though, does care sometimes.
f.write("%r, %r\n" % (key.decode('Latin-1'), pos_and_siz_pair))
f.close() f.close()
sync = _commit sync = _commit
def __getitem__(self, key): def __getitem__(self, key):
key = key.decode("latin-1") if isinstance(key, str):
key = key.encode('utf-8')
pos, siz = self._index[key] # may raise KeyError pos, siz = self._index[key] # may raise KeyError
f = _io.open(self._datfile, 'rb') f = _io.open(self._datfile, 'rb')
f.seek(pos) f.seek(pos)
@ -161,11 +165,12 @@ class _Database(collections.MutableMapping):
f.close() f.close()
def __setitem__(self, key, val): def __setitem__(self, key, val):
if not isinstance(key, bytes): if isinstance(key, str):
raise TypeError("keys must be bytes") key = key.encode('utf-8')
key = key.decode("latin-1") # hashable bytes elif not isinstance(key, (bytes, bytearray)):
raise TypeError("keys must be bytes or strings")
if not isinstance(val, (bytes, bytearray)): if not isinstance(val, (bytes, bytearray)):
raise TypeError("values must be byte strings") raise TypeError("values must be bytes")
if key not in self._index: if key not in self._index:
self._addkey(key, self._addval(val)) self._addkey(key, self._addval(val))
else: else:
@ -191,7 +196,8 @@ class _Database(collections.MutableMapping):
# (so that _commit() never gets called). # (so that _commit() never gets called).
def __delitem__(self, key): def __delitem__(self, key):
key = key.decode("latin-1") if isinstance(key, str):
key = key.encode('utf-8')
# The blocks used by the associated value are lost. # The blocks used by the associated value are lost.
del self._index[key] del self._index[key]
# XXX It's unclear why we do a _commit() here (the code always # XXX It's unclear why we do a _commit() here (the code always
@ -201,14 +207,14 @@ class _Database(collections.MutableMapping):
self._commit() self._commit()
def keys(self): def keys(self):
return [key.encode("latin-1") for key in self._index.keys()] return list(self._index.keys())
def items(self): def items(self):
return [(key.encode("latin-1"), self[key.encode("latin-1")]) return [(key, self[key]) for key in self._index.keys()]
for key in self._index.keys()]
def __contains__(self, key): def __contains__(self, key):
key = key.decode("latin-1") if isinstance(key, str):
key = key.encode('utf-8')
return key in self._index return key in self._index
def iterkeys(self): def iterkeys(self):

View file

@ -19,13 +19,14 @@ def _delete_files():
pass pass
class DumbDBMTestCase(unittest.TestCase): class DumbDBMTestCase(unittest.TestCase):
_dict = {'0': b'', _dict = {b'0': b'',
'a': b'Python:', b'a': b'Python:',
'b': b'Programming', b'b': b'Programming',
'c': b'the', b'c': b'the',
'd': b'way', b'd': b'way',
'f': b'Guido', b'f': b'Guido',
'g': b'intended', b'g': b'intended',
'\u00fc'.encode('utf-8') : b'!',
} }
def __init__(self, *args): def __init__(self, *args):
@ -35,7 +36,7 @@ class DumbDBMTestCase(unittest.TestCase):
f = dumbdbm.open(_fname, 'c') f = dumbdbm.open(_fname, 'c')
self.assertEqual(list(f.keys()), []) self.assertEqual(list(f.keys()), [])
for key in self._dict: for key in self._dict:
f[key.encode("ascii")] = self._dict[key] f[key] = self._dict[key]
self.read_helper(f) self.read_helper(f)
f.close() f.close()
@ -73,7 +74,7 @@ class DumbDBMTestCase(unittest.TestCase):
def test_dumbdbm_modification(self): def test_dumbdbm_modification(self):
self.init_db() self.init_db()
f = dumbdbm.open(_fname, 'w') f = dumbdbm.open(_fname, 'w')
self._dict['g'] = f[b'g'] = b"indented" self._dict[b'g'] = f[b'g'] = b"indented"
self.read_helper(f) self.read_helper(f)
f.close() f.close()
@ -105,6 +106,21 @@ class DumbDBMTestCase(unittest.TestCase):
self.assertEqual(f[b'1'], b'hello2') self.assertEqual(f[b'1'], b'hello2')
f.close() f.close()
def test_str_read(self):
self.init_db()
f = dumbdbm.open(_fname, 'r')
self.assertEqual(f['\u00fc'], self._dict['\u00fc'.encode('utf-8')])
def test_str_write_contains(self):
self.init_db()
f = dumbdbm.open(_fname)
f['\u00fc'] = b'!'
f.close()
f = dumbdbm.open(_fname, 'r')
self.assert_('\u00fc' in f)
self.assertEqual(f['\u00fc'.encode('utf-8')],
self._dict['\u00fc'.encode('utf-8')])
def test_line_endings(self): def test_line_endings(self):
# test for bug #1172763: dumbdbm would die if the line endings # test for bug #1172763: dumbdbm would die if the line endings
# weren't what was expected. # weren't what was expected.
@ -129,16 +145,16 @@ class DumbDBMTestCase(unittest.TestCase):
def read_helper(self, f): def read_helper(self, f):
keys = self.keys_helper(f) keys = self.keys_helper(f)
for key in self._dict: for key in self._dict:
self.assertEqual(self._dict[key], f[key.encode("ascii")]) self.assertEqual(self._dict[key], f[key])
def init_db(self): def init_db(self):
f = dumbdbm.open(_fname, 'w') f = dumbdbm.open(_fname, 'w')
for k in self._dict: for k in self._dict:
f[k.encode("ascii")] = self._dict[k] f[k] = self._dict[k]
f.close() f.close()
def keys_helper(self, f): def keys_helper(self, f):
keys = sorted(k.decode("ascii") for k in f.keys()) keys = sorted(f.keys())
dkeys = sorted(self._dict.keys()) dkeys = sorted(self._dict.keys())
self.assertEqual(keys, dkeys) self.assertEqual(keys, dkeys)
return keys return keys
@ -155,12 +171,12 @@ class DumbDBMTestCase(unittest.TestCase):
if random.random() < 0.2: if random.random() < 0.2:
if k in d: if k in d:
del d[k] del d[k]
del f[k.encode("ascii")] del f[k]
else: else:
v = random.choice((b'a', b'b', b'c')) * random.randrange(10000) v = random.choice((b'a', b'b', b'c')) * random.randrange(10000)
d[k] = v d[k] = v
f[k.encode("ascii")] = v f[k] = v
self.assertEqual(f[k.encode("ascii")], v) self.assertEqual(f[k], v)
f.close() f.close()
f = dumbdbm.open(_fname) f = dumbdbm.open(_fname)

View file

@ -19,7 +19,7 @@ Core and Builtins
- Issue #3327: Don't overallocate in the modules_by_index list. - Issue #3327: Don't overallocate in the modules_by_index list.
- Issue #1721812: Binary set operations and copy() returned the input type - Issue #1721812: Binary set operations and copy() returned the input type
instead of the appropriate base type. This was incorrect because set instead of the appropriate base type. This was incorrect because set
subclasses would be created without their __init__() method being called. subclasses would be created without their __init__() method being called.
The corrected behavior brings sets into line with lists and dicts. The corrected behavior brings sets into line with lists and dicts.
@ -33,6 +33,9 @@ Core and Builtins
Library Library
------- -------
- Issue #3799: Fix dbm.dumb to accept strings as well as bytes for keys. String
keys are now written out in UTF-8.
- Issue #4338: Fix distutils upload command. - Issue #4338: Fix distutils upload command.
- Issue #4354: Fix distutils register command. - Issue #4354: Fix distutils register command.