mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-05 19:28:11 +00:00
ruff_linter: fix handling of unprintable characters
Previously, we were replacing unprintable ASCII characters with a printable representation of them via fancier Unicode characters. Since `annotate-snippets` used to use codepoint offsets, this didn't make our ranges incorrect: we swapped one codepoint for another. But now, with the `annotate-snippets` upgrade, we use byte offsets (which is IMO the correct choice). However, this means our ranges can be thrown off since an ASCII codepoint is always one byte and a non-ASCII codepoint is always more than one byte. Instead of tweaking the `ShowNonprinting` trait and making it more complicated (which is used in places other than this diagnostic rendering it seems), we instead change `replace_whitespace` to handle non-printable characters. This works out because `replace_whitespace` was already updating the annotation range to account for the tab replacement. We copy that approach for unprintable characters.
This commit is contained in:
parent
84179aaa96
commit
2922490cb8
1 changed files with 47 additions and 20 deletions
|
@ -8,14 +8,13 @@ use ruff_annotate_snippets::{Level, Renderer, Snippet};
|
|||
|
||||
use ruff_notebook::NotebookIndex;
|
||||
use ruff_source_file::{OneIndexed, SourceLocation};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
|
||||
|
||||
use crate::fs::relativize_path;
|
||||
use crate::line_width::{IndentWidth, LineWidthBuilder};
|
||||
use crate::message::diff::Diff;
|
||||
use crate::message::{Emitter, EmitterContext, Message};
|
||||
use crate::settings::types::UnsafeFixes;
|
||||
use crate::text_helpers::ShowNonprinting;
|
||||
|
||||
bitflags! {
|
||||
#[derive(Default)]
|
||||
|
@ -245,13 +244,11 @@ impl Display for MessageCodeFrame<'_> {
|
|||
let start_offset = source_code.line_start(start_index);
|
||||
let end_offset = source_code.line_end(end_index);
|
||||
|
||||
let source = replace_whitespace(
|
||||
let source = replace_whitespace_and_unprintable(
|
||||
source_code.slice(TextRange::new(start_offset, end_offset)),
|
||||
self.message.range() - start_offset,
|
||||
);
|
||||
|
||||
let source_text = source.text.show_nonprinting();
|
||||
|
||||
let label = self
|
||||
.message
|
||||
.rule()
|
||||
|
@ -270,7 +267,7 @@ impl Display for MessageCodeFrame<'_> {
|
|||
let span = usize::from(source.annotation_range.start())
|
||||
..usize::from(source.annotation_range.end());
|
||||
let annotation = Level::Error.span(span).label(&label);
|
||||
let snippet = Snippet::source(&source_text)
|
||||
let snippet = Snippet::source(&source.text)
|
||||
.line_start(line_start)
|
||||
.annotation(annotation)
|
||||
.fold(false);
|
||||
|
@ -286,38 +283,68 @@ impl Display for MessageCodeFrame<'_> {
|
|||
}
|
||||
}
|
||||
|
||||
fn replace_whitespace(source: &str, annotation_range: TextRange) -> SourceCode {
|
||||
/// Given some source code and an annotation range, this routine replaces
|
||||
/// tabs with ASCII whitespace, and unprintable characters with printable
|
||||
/// representations of them.
|
||||
///
|
||||
/// The source code returned has an annotation that is updated to reflect
|
||||
/// changes made to the source code (if any).
|
||||
fn replace_whitespace_and_unprintable(source: &str, annotation_range: TextRange) -> SourceCode {
|
||||
let mut result = String::new();
|
||||
let mut last_end = 0;
|
||||
let mut range = annotation_range;
|
||||
let mut line_width = LineWidthBuilder::new(IndentWidth::default());
|
||||
|
||||
// Updates the range given by the caller whenever a single byte (at
|
||||
// `index` in `source`) is replaced with `len` bytes.
|
||||
//
|
||||
// When the index occurs before the start of the range, the range is
|
||||
// offset by `len`. When the range occurs after or at the start but before
|
||||
// the end, then the end of the range only is offset by `len`.
|
||||
let mut update_range = |index, len| {
|
||||
if index < usize::from(annotation_range.start()) {
|
||||
range += TextSize::new(len - 1);
|
||||
} else if index < usize::from(annotation_range.end()) {
|
||||
range = range.add_end(TextSize::new(len - 1));
|
||||
}
|
||||
};
|
||||
|
||||
// If `c` is an unprintable character, then this returns a printable
|
||||
// representation of it (using a fancier Unicode codepoint).
|
||||
let unprintable_replacement = |c: char| -> Option<char> {
|
||||
match c {
|
||||
'\x07' => Some('␇'),
|
||||
'\x08' => Some('␈'),
|
||||
'\x1b' => Some('␛'),
|
||||
'\x7f' => Some('␡'),
|
||||
_ => None,
|
||||
}
|
||||
};
|
||||
|
||||
for (index, c) in source.char_indices() {
|
||||
let old_width = line_width.get();
|
||||
line_width = line_width.add_char(c);
|
||||
|
||||
if matches!(c, '\t') {
|
||||
// SAFETY: The difference is a value in the range [1..TAB_SIZE] which is guaranteed to be less than `u32`.
|
||||
#[allow(clippy::cast_possible_truncation)]
|
||||
let tab_width = (line_width.get() - old_width) as u32;
|
||||
|
||||
if index < usize::from(annotation_range.start()) {
|
||||
range += TextSize::new(tab_width - 1);
|
||||
} else if index < usize::from(annotation_range.end()) {
|
||||
range = range.add_end(TextSize::new(tab_width - 1));
|
||||
}
|
||||
|
||||
let tab_width = u32::try_from(line_width.get() - old_width)
|
||||
.expect("small width because of tab size");
|
||||
result.push_str(&source[last_end..index]);
|
||||
|
||||
for _ in 0..tab_width {
|
||||
result.push(' ');
|
||||
}
|
||||
|
||||
last_end = index + 1;
|
||||
update_range(index, tab_width);
|
||||
} else if let Some(printable) = unprintable_replacement(c) {
|
||||
result.push_str(&source[last_end..index]);
|
||||
result.push(printable);
|
||||
last_end = index + 1;
|
||||
|
||||
let len = printable.text_len().to_u32();
|
||||
update_range(index, len);
|
||||
}
|
||||
}
|
||||
|
||||
// No tabs
|
||||
// No tabs or unprintable chars
|
||||
if result.is_empty() {
|
||||
SourceCode {
|
||||
annotation_range,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue