gh-134004: Added the reorganize() methods to dbm.sqlite, dbm.dumb and shelve (GH-134028)

They are similar to the same named method in dbm.gnu.
This commit is contained in:
Andrea-Oliveri 2025-06-01 14:30:04 +02:00 committed by GitHub
parent b595237166
commit f806463e16
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 172 additions and 6 deletions

View file

@ -9,7 +9,7 @@ XXX TO DO:
- seems to contain a bug when updating...
- reclaim free space (currently, space once occupied by deleted or expanded
items is never reused)
items is not reused exept if .reorganize() is called)
- support concurrent access (currently, if two processes take turns making
updates, they can mess up the index)
@ -17,8 +17,6 @@ updates, they can mess up the index)
- support efficient access to large databases (currently, the whole index
is read when the database is opened, and some updates rewrite the whole index)
- support opening for read-only (flag = 'm')
"""
import ast as _ast
@ -289,6 +287,34 @@ class _Database(collections.abc.MutableMapping):
def __exit__(self, *args):
self.close()
def reorganize(self):
if self._readonly:
raise error('The database is opened for reading only')
self._verify_open()
# Ensure all changes are committed before reorganizing.
self._commit()
# Open file in r+ to allow changing in-place.
with _io.open(self._datfile, 'rb+') as f:
reorganize_pos = 0
# Iterate over existing keys, sorted by starting byte.
for key in sorted(self._index, key = lambda k: self._index[k][0]):
pos, siz = self._index[key]
f.seek(pos)
val = f.read(siz)
f.seek(reorganize_pos)
f.write(val)
self._index[key] = (reorganize_pos, siz)
blocks_occupied = (siz + _BLOCKSIZE - 1) // _BLOCKSIZE
reorganize_pos += blocks_occupied * _BLOCKSIZE
f.truncate(reorganize_pos)
# Commit changes to index, which were not in-place.
self._commit()
def open(file, flag='c', mode=0o666):
"""Open the database file, filename, and return corresponding object.