mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
[3.11] gh-113594: Fix UnicodeEncodeError in TokenList.fold() (GH-113730) (GH-113908)
It occurred when try to re-encode an unknown-8bit part combined with non-unknown-8bit part.
(cherry picked from commit e9d5b6ea2d
)
Co-authored-by: Serhiy Storchaka <storchaka@gmail.com>
This commit is contained in:
parent
c92a473a71
commit
435e891b32
3 changed files with 48 additions and 0 deletions
|
@ -2768,6 +2768,7 @@ def _refold_parse_tree(parse_tree, *, policy):
|
|||
encoding = 'utf-8' if policy.utf8 else 'us-ascii'
|
||||
lines = ['']
|
||||
last_ew = None
|
||||
last_charset = None
|
||||
wrap_as_ew_blocked = 0
|
||||
want_encoding = False
|
||||
end_ew_not_allowed = Terminal('', 'wrap_as_ew_blocked')
|
||||
|
@ -2822,8 +2823,14 @@ def _refold_parse_tree(parse_tree, *, policy):
|
|||
else:
|
||||
# It's a terminal, wrap it as an encoded word, possibly
|
||||
# combining it with previously encoded words if allowed.
|
||||
if (last_ew is not None and
|
||||
charset != last_charset and
|
||||
(last_charset == 'unknown-8bit' or
|
||||
last_charset == 'utf-8' and charset != 'us-ascii')):
|
||||
last_ew = None
|
||||
last_ew = _fold_as_ew(tstr, lines, maxlen, last_ew,
|
||||
part.ew_combine_allowed, charset)
|
||||
last_charset = charset
|
||||
want_encoding = False
|
||||
continue
|
||||
if len(tstr) <= maxlen - len(lines[-1]):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue