gh-108590: Fix sqlite3.iterdump for invalid Unicode in TEXT columns (#108657)

Co-authored-by: Erlend E. Aasland <erlend@python.org>
This commit is contained in:
Corvin 2023-08-30 05:06:21 -04:00 committed by GitHub
parent 210a5d7b8b
commit 400a1cebc7
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 41 additions and 2 deletions

View file

@ -7,6 +7,10 @@
# future enhancements, you should normally quote any identifier that
# is an English language word, even if you do not have to."
from contextlib import contextmanager
def _quote_name(name):
return '"{0}"'.format(name.replace('"', '""'))
@ -15,6 +19,24 @@ def _quote_value(value):
return "'{0}'".format(value.replace("'", "''"))
def _force_decode(bs, *args, **kwargs):
# gh-108590: Don't fail if the database contains invalid Unicode data.
try:
return bs.decode(*args, **kwargs)
except UnicodeDecodeError:
return "".join([chr(c) for c in bs])
@contextmanager
def _text_factory(con, factory):
saved_factory = con.text_factory
con.text_factory = factory
try:
yield
finally:
con.text_factory = saved_factory
def _iterdump(connection):
"""
Returns an iterator to the dump of the database in an SQL text format.
@ -74,8 +96,9 @@ def _iterdump(connection):
)
)
query_res = cu.execute(q)
for row in query_res:
yield("{0};".format(row[0]))
with _text_factory(connection, bytes):
for row in query_res:
yield("{0};".format(_force_decode(row[0])))
# Now when the type is 'index', 'trigger', or 'view'
q = """