mirror of
https://github.com/python/cpython.git
synced 2025-07-16 15:55:18 +00:00
[3.12] bpo-43950: handle wide unicode characters in tracebacks (GH-28150) (#111346)
This commit is contained in:
parent
e25d8b40cd
commit
c81ebf5b3d
5 changed files with 187 additions and 16 deletions
|
@ -922,8 +922,63 @@ class TracebackErrorLocationCaretTestBase:
|
||||||
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
|
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
|
||||||
" callable()",
|
" callable()",
|
||||||
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
|
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 4}, in f",
|
||||||
" print(1, www(",
|
f" print(1, www(",
|
||||||
" ^^^^",
|
f" ^^^^^^^",
|
||||||
|
]
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_byte_offset_with_wide_characters_term_highlight(self):
|
||||||
|
def f():
|
||||||
|
说明说明 = 1
|
||||||
|
şçöğıĤellö = 0 # not wide but still non-ascii
|
||||||
|
return 说明说明 / şçöğıĤellö
|
||||||
|
|
||||||
|
actual = self.get_exception(f)
|
||||||
|
expected = [
|
||||||
|
f"Traceback (most recent call last):",
|
||||||
|
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
|
||||||
|
f" callable()",
|
||||||
|
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 3}, in f",
|
||||||
|
f" return 说明说明 / şçöğıĤellö",
|
||||||
|
f" ~~~~~~~~~^~~~~~~~~~~~",
|
||||||
|
]
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_byte_offset_with_emojis_term_highlight(self):
|
||||||
|
def f():
|
||||||
|
return "✨🐍" + func_说明说明("📗🚛",
|
||||||
|
"📗🚛") + "🐍"
|
||||||
|
|
||||||
|
actual = self.get_exception(f)
|
||||||
|
expected = [
|
||||||
|
f"Traceback (most recent call last):",
|
||||||
|
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
|
||||||
|
f" callable()",
|
||||||
|
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 1}, in f",
|
||||||
|
f' return "✨🐍" + func_说明说明("📗🚛",',
|
||||||
|
f" ^^^^^^^^^^^^^",
|
||||||
|
]
|
||||||
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
def test_byte_offset_wide_chars_subscript(self):
|
||||||
|
def f():
|
||||||
|
my_dct = {
|
||||||
|
"✨🚛✨": {
|
||||||
|
"说明": {
|
||||||
|
"🐍🐍🐍": None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]
|
||||||
|
|
||||||
|
actual = self.get_exception(f)
|
||||||
|
expected = [
|
||||||
|
f"Traceback (most recent call last):",
|
||||||
|
f" File \"{__file__}\", line {self.callable_line}, in get_exception",
|
||||||
|
f" callable()",
|
||||||
|
f" File \"{__file__}\", line {f.__code__.co_firstlineno + 8}, in f",
|
||||||
|
f' return my_dct["✨🚛✨"]["说明"]["🐍"]["说明"]["🐍🐍"]',
|
||||||
|
f" ~~~~~~~~~~~~~~~~~~~~~~~~^^^^^^",
|
||||||
]
|
]
|
||||||
self.assertEqual(actual, expected)
|
self.assertEqual(actual, expected)
|
||||||
|
|
||||||
|
|
|
@ -470,7 +470,8 @@ class StackSummary(list):
|
||||||
stripped_line = frame_summary.line.strip()
|
stripped_line = frame_summary.line.strip()
|
||||||
row.append(' {}\n'.format(stripped_line))
|
row.append(' {}\n'.format(stripped_line))
|
||||||
|
|
||||||
orig_line_len = len(frame_summary._original_line)
|
line = frame_summary._original_line
|
||||||
|
orig_line_len = len(line)
|
||||||
frame_line_len = len(frame_summary.line.lstrip())
|
frame_line_len = len(frame_summary.line.lstrip())
|
||||||
stripped_characters = orig_line_len - frame_line_len
|
stripped_characters = orig_line_len - frame_line_len
|
||||||
if (
|
if (
|
||||||
|
@ -478,31 +479,40 @@ class StackSummary(list):
|
||||||
and frame_summary.end_colno is not None
|
and frame_summary.end_colno is not None
|
||||||
):
|
):
|
||||||
start_offset = _byte_offset_to_character_offset(
|
start_offset = _byte_offset_to_character_offset(
|
||||||
frame_summary._original_line, frame_summary.colno) + 1
|
line, frame_summary.colno)
|
||||||
end_offset = _byte_offset_to_character_offset(
|
end_offset = _byte_offset_to_character_offset(
|
||||||
frame_summary._original_line, frame_summary.end_colno) + 1
|
line, frame_summary.end_colno)
|
||||||
|
code_segment = line[start_offset:end_offset]
|
||||||
|
|
||||||
anchors = None
|
anchors = None
|
||||||
if frame_summary.lineno == frame_summary.end_lineno:
|
if frame_summary.lineno == frame_summary.end_lineno:
|
||||||
with suppress(Exception):
|
with suppress(Exception):
|
||||||
anchors = _extract_caret_anchors_from_line_segment(
|
anchors = _extract_caret_anchors_from_line_segment(code_segment)
|
||||||
frame_summary._original_line[start_offset - 1:end_offset - 1]
|
|
||||||
)
|
|
||||||
else:
|
else:
|
||||||
end_offset = stripped_characters + len(stripped_line)
|
# Don't count the newline since the anchors only need to
|
||||||
|
# go up until the last character of the line.
|
||||||
|
end_offset = len(line.rstrip())
|
||||||
|
|
||||||
# show indicators if primary char doesn't span the frame line
|
# show indicators if primary char doesn't span the frame line
|
||||||
if end_offset - start_offset < len(stripped_line) or (
|
if end_offset - start_offset < len(stripped_line) or (
|
||||||
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
|
anchors and anchors.right_start_offset - anchors.left_end_offset > 0):
|
||||||
|
# When showing this on a terminal, some of the non-ASCII characters
|
||||||
|
# might be rendered as double-width characters, so we need to take
|
||||||
|
# that into account when calculating the length of the line.
|
||||||
|
dp_start_offset = _display_width(line, start_offset) + 1
|
||||||
|
dp_end_offset = _display_width(line, end_offset) + 1
|
||||||
|
|
||||||
row.append(' ')
|
row.append(' ')
|
||||||
row.append(' ' * (start_offset - stripped_characters))
|
row.append(' ' * (dp_start_offset - stripped_characters))
|
||||||
|
|
||||||
if anchors:
|
if anchors:
|
||||||
row.append(anchors.primary_char * (anchors.left_end_offset))
|
dp_left_end_offset = _display_width(code_segment, anchors.left_end_offset)
|
||||||
row.append(anchors.secondary_char * (anchors.right_start_offset - anchors.left_end_offset))
|
dp_right_start_offset = _display_width(code_segment, anchors.right_start_offset)
|
||||||
row.append(anchors.primary_char * (end_offset - start_offset - anchors.right_start_offset))
|
row.append(anchors.primary_char * dp_left_end_offset)
|
||||||
|
row.append(anchors.secondary_char * (dp_right_start_offset - dp_left_end_offset))
|
||||||
|
row.append(anchors.primary_char * (dp_end_offset - dp_start_offset - dp_right_start_offset))
|
||||||
else:
|
else:
|
||||||
row.append('^' * (end_offset - start_offset))
|
row.append('^' * (dp_end_offset - dp_start_offset))
|
||||||
|
|
||||||
row.append('\n')
|
row.append('\n')
|
||||||
|
|
||||||
|
@ -623,6 +633,25 @@ def _extract_caret_anchors_from_line_segment(segment):
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
_WIDE_CHAR_SPECIFIERS = "WF"
|
||||||
|
|
||||||
|
def _display_width(line, offset):
|
||||||
|
"""Calculate the extra amount of width space the given source
|
||||||
|
code segment might take if it were to be displayed on a fixed
|
||||||
|
width output device. Supports wide unicode characters and emojis."""
|
||||||
|
|
||||||
|
# Fast track for ASCII-only strings
|
||||||
|
if line.isascii():
|
||||||
|
return offset
|
||||||
|
|
||||||
|
import unicodedata
|
||||||
|
|
||||||
|
return sum(
|
||||||
|
2 if unicodedata.east_asian_width(char) in _WIDE_CHAR_SPECIFIERS else 1
|
||||||
|
for char in line[:offset]
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class _ExceptionPrintContext:
|
class _ExceptionPrintContext:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
|
|
|
@ -38,6 +38,61 @@ _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset)
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Calculate the extra amount of width space the given source
|
||||||
|
// code segment might take if it were to be displayed on a fixed
|
||||||
|
// width output device. Supports wide unicode characters and emojis.
|
||||||
|
Py_ssize_t
|
||||||
|
_PyPegen_calculate_display_width(PyObject *line, Py_ssize_t character_offset)
|
||||||
|
{
|
||||||
|
PyObject *segment = PyUnicode_Substring(line, 0, character_offset);
|
||||||
|
if (!segment) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fast track for ascii strings
|
||||||
|
if (PyUnicode_IS_ASCII(segment)) {
|
||||||
|
Py_DECREF(segment);
|
||||||
|
return character_offset;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *width_fn = _PyImport_GetModuleAttrString("unicodedata", "east_asian_width");
|
||||||
|
if (!width_fn) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t width = 0;
|
||||||
|
Py_ssize_t len = PyUnicode_GET_LENGTH(segment);
|
||||||
|
for (Py_ssize_t i = 0; i < len; i++) {
|
||||||
|
PyObject *chr = PyUnicode_Substring(segment, i, i + 1);
|
||||||
|
if (!chr) {
|
||||||
|
Py_DECREF(segment);
|
||||||
|
Py_DECREF(width_fn);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyObject *width_specifier = PyObject_CallOneArg(width_fn, chr);
|
||||||
|
Py_DECREF(chr);
|
||||||
|
if (!width_specifier) {
|
||||||
|
Py_DECREF(segment);
|
||||||
|
Py_DECREF(width_fn);
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (_PyUnicode_EqualToASCIIString(width_specifier, "W") ||
|
||||||
|
_PyUnicode_EqualToASCIIString(width_specifier, "F")) {
|
||||||
|
width += 2;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
width += 1;
|
||||||
|
}
|
||||||
|
Py_DECREF(width_specifier);
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_DECREF(segment);
|
||||||
|
Py_DECREF(width_fn);
|
||||||
|
return width;
|
||||||
|
}
|
||||||
|
|
||||||
// Here, mark is the start of the node, while p->mark is the end.
|
// Here, mark is the start of the node, while p->mark is the end.
|
||||||
// If node==NULL, they should be the same.
|
// If node==NULL, they should be the same.
|
||||||
int
|
int
|
||||||
|
|
|
@ -151,6 +151,7 @@ expr_ty _PyPegen_name_token(Parser *p);
|
||||||
expr_ty _PyPegen_number_token(Parser *p);
|
expr_ty _PyPegen_number_token(Parser *p);
|
||||||
void *_PyPegen_string_token(Parser *p);
|
void *_PyPegen_string_token(Parser *p);
|
||||||
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
|
Py_ssize_t _PyPegen_byte_offset_to_character_offset(PyObject *line, Py_ssize_t col_offset);
|
||||||
|
Py_ssize_t _PyPegen_calculate_display_width(PyObject *segment, Py_ssize_t character_offset);
|
||||||
|
|
||||||
// Error handling functions and APIs
|
// Error handling functions and APIs
|
||||||
typedef enum {
|
typedef enum {
|
||||||
|
|
|
@ -900,8 +900,39 @@ tb_displayline(PyTracebackObject* tb, PyObject *f, PyObject *filename, int linen
|
||||||
goto done;
|
goto done;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (print_error_location_carets(f, truncation, start_offset, end_offset,
|
// Convert all offsets to display offsets (e.g. the space they would take up if printed
|
||||||
right_start_offset, left_end_offset,
|
// on the screen).
|
||||||
|
Py_ssize_t dp_start = _PyPegen_calculate_display_width(source_line, start_offset);
|
||||||
|
if (dp_start < 0) {
|
||||||
|
err = ignore_source_errors() < 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t dp_end = _PyPegen_calculate_display_width(source_line, end_offset);
|
||||||
|
if (dp_end < 0) {
|
||||||
|
err = ignore_source_errors() < 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
Py_ssize_t dp_left_end = -1;
|
||||||
|
Py_ssize_t dp_right_start = -1;
|
||||||
|
if (has_secondary_ranges) {
|
||||||
|
dp_left_end = _PyPegen_calculate_display_width(source_line, left_end_offset);
|
||||||
|
if (dp_left_end < 0) {
|
||||||
|
err = ignore_source_errors() < 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
|
||||||
|
dp_right_start = _PyPegen_calculate_display_width(source_line, right_start_offset);
|
||||||
|
if (dp_right_start < 0) {
|
||||||
|
err = ignore_source_errors() < 0;
|
||||||
|
goto done;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
if (print_error_location_carets(f, truncation, dp_start, dp_end,
|
||||||
|
dp_right_start, dp_left_end,
|
||||||
primary_error_char, secondary_error_char) < 0) {
|
primary_error_char, secondary_error_char) < 0) {
|
||||||
err = -1;
|
err = -1;
|
||||||
goto done;
|
goto done;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue