ruff_linter: fix handling of unprintable characters

Previously, we were replacing unprintable ASCII characters with a
printable representation of them via fancier Unicode characters. Since
`annotate-snippets` used to use codepoint offsets, this didn't make our
ranges incorrect: we swapped one codepoint for another.

But now, with the `annotate-snippets` upgrade, we use byte offsets
(which is IMO the correct choice). However, this means our ranges can be
thrown off since an ASCII codepoint is always one byte and a non-ASCII
codepoint is always more than one byte.

Instead of tweaking the `ShowNonprinting` trait and making it more
complicated (which is used in places other than this diagnostic
rendering it seems), we instead change `replace_whitespace` to handle
non-printable characters. This works out because `replace_whitespace`
was already updating the annotation range to account for the tab
replacement. We copy that approach for unprintable characters.
This commit is contained in:
Andrew Gallant 2025-01-08 15:07:33 -05:00 committed by Andrew Gallant
parent 84179aaa96
commit 2922490cb8

View file

@ -8,14 +8,13 @@ use ruff_annotate_snippets::{Level, Renderer, Snippet};
use ruff_notebook::NotebookIndex;
use ruff_source_file::{OneIndexed, SourceLocation};
use ruff_text_size::{Ranged, TextRange, TextSize};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::fs::relativize_path;
use crate::line_width::{IndentWidth, LineWidthBuilder};
use crate::message::diff::Diff;
use crate::message::{Emitter, EmitterContext, Message};
use crate::settings::types::UnsafeFixes;
use crate::text_helpers::ShowNonprinting;
bitflags! {
#[derive(Default)]
@ -245,13 +244,11 @@ impl Display for MessageCodeFrame<'_> {
let start_offset = source_code.line_start(start_index);
let end_offset = source_code.line_end(end_index);
let source = replace_whitespace(
let source = replace_whitespace_and_unprintable(
source_code.slice(TextRange::new(start_offset, end_offset)),
self.message.range() - start_offset,
);
let source_text = source.text.show_nonprinting();
let label = self
.message
.rule()
@ -270,7 +267,7 @@ impl Display for MessageCodeFrame<'_> {
let span = usize::from(source.annotation_range.start())
..usize::from(source.annotation_range.end());
let annotation = Level::Error.span(span).label(&label);
let snippet = Snippet::source(&source_text)
let snippet = Snippet::source(&source.text)
.line_start(line_start)
.annotation(annotation)
.fold(false);
@ -286,38 +283,68 @@ impl Display for MessageCodeFrame<'_> {
}
}
fn replace_whitespace(source: &str, annotation_range: TextRange) -> SourceCode {
/// Given some source code and an annotation range, this routine replaces
/// tabs with ASCII whitespace, and unprintable characters with printable
/// representations of them.
///
/// The source code returned has an annotation that is updated to reflect
/// changes made to the source code (if any).
fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode {
let mut result = String::new();
let mut last_end = 0;
let mut range = annotation_range;
let mut line_width = LineWidthBuilder::new(IndentWidth::default());
// Updates the range given by the caller whenever a single byte (at
// `index` in `source`) is replaced with `len` bytes.
//
// When the index occurs before the start of the range, the range is
// offset by `len`. When the range occurs after or at the start but before
// the end, then the end of the range only is offset by `len`.
let mut update_range = |index, len| {
if index < usize::from(annotation_range.start()) {
range += TextSize::new(len - 1);
} else if index < usize::from(annotation_range.end()) {
range = range.add_end(TextSize::new(len - 1));
}
};
// If `c` is an unprintable character, then this returns a printable
// representation of it (using a fancier Unicode codepoint).
let unprintable_replacement = |c: char| -> Option<char> {
match c {
'\x07' => Some('␇'),
'\x08' => Some('␈'),
'\x1b' => Some('␛'),
'\x7f' => Some('␡'),
_ => None,
}
};
for (index, c) in source.char_indices() {
let old_width = line_width.get();
line_width = line_width.add_char(c);
if matches!(c, '\t') {
// SAFETY: The difference is a value in the range [1..TAB_SIZE] which is guaranteed to be less than `u32`.
#[allow(clippy::cast_possible_truncation)]
let tab_width = (line_width.get() - old_width) as u32;
if index < usize::from(annotation_range.start()) {
range += TextSize::new(tab_width - 1);
} else if index < usize::from(annotation_range.end()) {
range = range.add_end(TextSize::new(tab_width - 1));
}
let tab_width = u32::try_from(line_width.get() - old_width)
.expect("small width because of tab size");
result.push_str(&source[last_end..index]);
for _ in 0..tab_width {
result.push(' ');
}
last_end = index + 1;
update_range(index, tab_width);
} else if let Some(printable) = unprintable_replacement(c) {
result.push_str(&source[last_end..index]);
result.push(printable);
last_end = index + 1;
let len = printable.text_len().to_u32();
update_range(index, len);
}
}
// No tabs
// No tabs or unprintable chars
if result.is_empty() {
SourceCode {
annotation_range,