mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-06 19:58:09 +00:00
ruff_linter: fix handling of unprintable characters
Previously, we were replacing unprintable ASCII characters with a printable representation of them via fancier Unicode characters. Since `annotate-snippets` used to use codepoint offsets, this didn't make our ranges incorrect: we swapped one codepoint for another. But now, with the `annotate-snippets` upgrade, we use byte offsets (which is IMO the correct choice). However, this means our ranges can be thrown off since an ASCII codepoint is always one byte and a non-ASCII codepoint is always more than one byte. Instead of tweaking the `ShowNonprinting` trait and making it more complicated (which is used in places other than this diagnostic rendering it seems), we instead change `replace_whitespace` to handle non-printable characters. This works out because `replace_whitespace` was already updating the annotation range to account for the tab replacement. We copy that approach for unprintable characters.
This commit is contained in:
parent
84179aaa96
commit
2922490cb8
1 changed files with 47 additions and 20 deletions
|
@ -8,14 +8,13 @@ use ruff_annotate_snippets::{Level, Renderer, Snippet};
|
||||||
|
|
||||||
use ruff_notebook::NotebookIndex;
|
use ruff_notebook::NotebookIndex;
|
||||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::fs::relativize_path;
|
use crate::fs::relativize_path;
|
||||||
use crate::line_width::{IndentWidth, LineWidthBuilder};
|
use crate::line_width::{IndentWidth, LineWidthBuilder};
|
||||||
use crate::message::diff::Diff;
|
use crate::message::diff::Diff;
|
||||||
use crate::message::{Emitter, EmitterContext, Message};
|
use crate::message::{Emitter, EmitterContext, Message};
|
||||||
use crate::settings::types::UnsafeFixes;
|
use crate::settings::types::UnsafeFixes;
|
||||||
use crate::text_helpers::ShowNonprinting;
|
|
||||||
|
|
||||||
bitflags! {
|
bitflags! {
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -245,13 +244,11 @@ impl Display for MessageCodeFrame<'_> {
|
||||||
let start_offset = source_code.line_start(start_index);
|
let start_offset = source_code.line_start(start_index);
|
||||||
let end_offset = source_code.line_end(end_index);
|
let end_offset = source_code.line_end(end_index);
|
||||||
|
|
||||||
let source = replace_whitespace(
|
let source = replace_whitespace_and_unprintable(
|
||||||
source_code.slice(TextRange::new(start_offset, end_offset)),
|
source_code.slice(TextRange::new(start_offset, end_offset)),
|
||||||
self.message.range() - start_offset,
|
self.message.range() - start_offset,
|
||||||
);
|
);
|
||||||
|
|
||||||
let source_text = source.text.show_nonprinting();
|
|
||||||
|
|
||||||
let label = self
|
let label = self
|
||||||
.message
|
.message
|
||||||
.rule()
|
.rule()
|
||||||
|
@ -270,7 +267,7 @@ impl Display for MessageCodeFrame<'_> {
|
||||||
let span = usize::from(source.annotation_range.start())
|
let span = usize::from(source.annotation_range.start())
|
||||||
..usize::from(source.annotation_range.end());
|
..usize::from(source.annotation_range.end());
|
||||||
let annotation = Level::Error.span(span).label(&label);
|
let annotation = Level::Error.span(span).label(&label);
|
||||||
let snippet = Snippet::source(&source_text)
|
let snippet = Snippet::source(&source.text)
|
||||||
.line_start(line_start)
|
.line_start(line_start)
|
||||||
.annotation(annotation)
|
.annotation(annotation)
|
||||||
.fold(false);
|
.fold(false);
|
||||||
|
@ -286,38 +283,68 @@ impl Display for MessageCodeFrame<'_> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn replace_whitespace(source: &str, annotation_range: TextRange) -> SourceCode {
|
/// Given some source code and an annotation range, this routine replaces
|
||||||
|
/// tabs with ASCII whitespace, and unprintable characters with printable
|
||||||
|
/// representations of them.
|
||||||
|
///
|
||||||
|
/// The source code returned has an annotation that is updated to reflect
|
||||||
|
/// changes made to the source code (if any).
|
||||||
|
fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode {
|
||||||
let mut result = String::new();
|
let mut result = String::new();
|
||||||
let mut last_end = 0;
|
let mut last_end = 0;
|
||||||
let mut range = annotation_range;
|
let mut range = annotation_range;
|
||||||
let mut line_width = LineWidthBuilder::new(IndentWidth::default());
|
let mut line_width = LineWidthBuilder::new(IndentWidth::default());
|
||||||
|
|
||||||
|
// Updates the range given by the caller whenever a single byte (at
|
||||||
|
// `index` in `source`) is replaced with `len` bytes.
|
||||||
|
//
|
||||||
|
// When the index occurs before the start of the range, the range is
|
||||||
|
// offset by `len`. When the range occurs after or at the start but before
|
||||||
|
// the end, then the end of the range only is offset by `len`.
|
||||||
|
let mut update_range = |index, len| {
|
||||||
|
if index < usize::from(annotation_range.start()) {
|
||||||
|
range += TextSize::new(len - 1);
|
||||||
|
} else if index < usize::from(annotation_range.end()) {
|
||||||
|
range = range.add_end(TextSize::new(len - 1));
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// If `c` is an unprintable character, then this returns a printable
|
||||||
|
// representation of it (using a fancier Unicode codepoint).
|
||||||
|
let unprintable_replacement = |c: char| -> Option<char> {
|
||||||
|
match c {
|
||||||
|
'\x07' => Some('␇'),
|
||||||
|
'\x08' => Some('␈'),
|
||||||
|
'\x1b' => Some('␛'),
|
||||||
|
'\x7f' => Some('␡'),
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
for (index, c) in source.char_indices() {
|
for (index, c) in source.char_indices() {
|
||||||
let old_width = line_width.get();
|
let old_width = line_width.get();
|
||||||
line_width = line_width.add_char(c);
|
line_width = line_width.add_char(c);
|
||||||
|
|
||||||
if matches!(c, '\t') {
|
if matches!(c, '\t') {
|
||||||
// SAFETY: The difference is a value in the range [1..TAB_SIZE] which is guaranteed to be less than `u32`.
|
let tab_width = u32::try_from(line_width.get() - old_width)
|
||||||
#[allow(clippy::cast_possible_truncation)]
|
.expect("small width because of tab size");
|
||||||
let tab_width = (line_width.get() - old_width) as u32;
|
|
||||||
|
|
||||||
if index < usize::from(annotation_range.start()) {
|
|
||||||
range += TextSize::new(tab_width - 1);
|
|
||||||
} else if index < usize::from(annotation_range.end()) {
|
|
||||||
range = range.add_end(TextSize::new(tab_width - 1));
|
|
||||||
}
|
|
||||||
|
|
||||||
result.push_str(&source[last_end..index]);
|
result.push_str(&source[last_end..index]);
|
||||||
|
|
||||||
for _ in 0..tab_width {
|
for _ in 0..tab_width {
|
||||||
result.push(' ');
|
result.push(' ');
|
||||||
}
|
}
|
||||||
|
|
||||||
last_end = index + 1;
|
last_end = index + 1;
|
||||||
|
update_range(index, tab_width);
|
||||||
|
} else if let Some(printable) = unprintable_replacement(c) {
|
||||||
|
result.push_str(&source[last_end..index]);
|
||||||
|
result.push(printable);
|
||||||
|
last_end = index + 1;
|
||||||
|
|
||||||
|
let len = printable.text_len().to_u32();
|
||||||
|
update_range(index, len);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// No tabs
|
// No tabs or unprintable chars
|
||||||
if result.is_empty() {
|
if result.is_empty() {
|
||||||
SourceCode {
|
SourceCode {
|
||||||
annotation_range,
|
annotation_range,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue