mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-24 21:44:37 +00:00
[pyupgrade
] Do not report when a UTF-8 comment is followed by a non-UTF-8 one (UP009
) (#14728)
## Summary Resolves #14704. ## Test Plan `cargo nextest run` and `cargo insta test`. --------- Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
a55722e740
commit
c8d505c8ea
22 changed files with 273 additions and 49 deletions
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
print('No coding coments here')
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_hashbang_utf8_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_hashbang_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
#!/usr/bin/python
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: ascii -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_many_empty_lines.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_many_empty_lines.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
|
||||
|
||||
# -*- coding: utf-8 -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# -*- coding: ascii -*-
|
||||
# -*- coding: latin -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# -*- coding: ascii -*-
|
||||
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_code_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_code_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# -*- coding: utf8 -*-
|
||||
print("the following is not a coding comment")
|
||||
# -*- coding: ascii -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: ascii -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: utf-8 -*-
|
||||
# -*- coding: ascii -*-
|
|
@ -73,12 +73,7 @@ pub(crate) fn check_tokens(
|
|||
}
|
||||
|
||||
if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
|
||||
pyupgrade::rules::unnecessary_coding_comment(
|
||||
&mut diagnostics,
|
||||
locator,
|
||||
indexer,
|
||||
comment_ranges,
|
||||
);
|
||||
pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, comment_ranges);
|
||||
}
|
||||
|
||||
if settings.rules.enabled(Rule::TabIndentation) {
|
||||
|
|
|
@ -77,6 +77,18 @@ mod tests {
|
|||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_8.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_9.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_10.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_other.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_utf8.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_other.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_code_other.py"))]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_code_utf8_utf8.py"))]
|
||||
#[test_case(
|
||||
Rule::UTF8EncodingDeclaration,
|
||||
Path::new("UP009_hashbang_utf8_other.py")
|
||||
)]
|
||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_many_empty_lines.py"))]
|
||||
#[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
|
||||
#[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
|
||||
#[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
use std::iter::FusedIterator;
|
||||
use std::sync::LazyLock;
|
||||
|
||||
use regex::Regex;
|
||||
|
||||
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
|
||||
use ruff_macros::{derive_message_formats, ViolationMetadata};
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_trivia::CommentRanges;
|
||||
use ruff_source_file::LineRanges;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::Locator;
|
||||
|
||||
|
@ -46,57 +46,134 @@ impl AlwaysFixableViolation for UTF8EncodingDeclaration {
|
|||
|
||||
// Regex from PEP263.
|
||||
static CODING_COMMENT_REGEX: LazyLock<Regex> =
|
||||
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
|
||||
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*(?<name>[-_.a-zA-Z0-9]+)").unwrap());
|
||||
|
||||
#[derive(Debug)]
|
||||
enum CodingComment {
|
||||
/// A UTF-8 encoding declaration.
|
||||
UTF8(CodingCommentRange),
|
||||
/// A declaration for any non utf8 encoding
|
||||
OtherEncoding,
|
||||
/// Any other comment
|
||||
NoEncoding,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct CodingCommentRange {
|
||||
comment: TextRange,
|
||||
line: TextRange,
|
||||
}
|
||||
|
||||
/// UP009
|
||||
pub(crate) fn unnecessary_coding_comment(
|
||||
diagnostics: &mut Vec<Diagnostic>,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
comment_ranges: &CommentRanges,
|
||||
) {
|
||||
// The coding comment must be on one of the first two lines. Since each comment spans at least
|
||||
// one line, we only need to check the first two comments at most.
|
||||
for comment_range in comment_ranges.iter().take(2) {
|
||||
// If leading content is not whitespace then it's not a valid coding comment e.g.
|
||||
// ```
|
||||
// print(x) # coding=utf8
|
||||
// ```
|
||||
let line_range = locator.full_line_range(comment_range.start());
|
||||
if !locator
|
||||
.slice(TextRange::new(line_range.start(), comment_range.start()))
|
||||
.trim()
|
||||
.is_empty()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let mut iter = CodingCommentIterator::new(locator, comment_ranges)
|
||||
.skip_while(|comment| matches!(comment, CodingComment::NoEncoding));
|
||||
|
||||
// If the line is after a continuation then it's not a valid coding comment e.g.
|
||||
// ```
|
||||
// x = 1 \
|
||||
// # coding=utf8
|
||||
// x = 2
|
||||
// ```
|
||||
if indexer
|
||||
.preceded_by_continuations(line_range.start(), locator.contents())
|
||||
.is_some()
|
||||
{
|
||||
continue;
|
||||
}
|
||||
let Some(CodingComment::UTF8(range)) = iter.next() else {
|
||||
return;
|
||||
};
|
||||
|
||||
if CODING_COMMENT_REGEX.is_match(locator.slice(line_range)) {
|
||||
#[allow(deprecated)]
|
||||
let index = locator.compute_line_index(line_range.start());
|
||||
if index.to_zero_indexed() > 1 {
|
||||
continue;
|
||||
}
|
||||
let line_index = locator.count_lines(TextRange::up_to(range.comment.start()));
|
||||
|
||||
let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *comment_range);
|
||||
diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
|
||||
line_range.start(),
|
||||
line_range.end(),
|
||||
)));
|
||||
diagnostics.push(diagnostic);
|
||||
// Comment must be on the first or second line
|
||||
if line_index > 1 {
|
||||
return;
|
||||
}
|
||||
|
||||
// ```python
|
||||
// # -*- coding: utf-8 -*-
|
||||
// # -*- coding: latin-1 -*-
|
||||
// ```
|
||||
// or
|
||||
// ```python
|
||||
// # -*- coding: utf-8 -*-
|
||||
// # comment
|
||||
// # -*- coding: latin-1 -*-
|
||||
// ```
|
||||
if matches!(
|
||||
(iter.next(), iter.next()),
|
||||
(Some(CodingComment::OtherEncoding), _)
|
||||
| (
|
||||
Some(CodingComment::NoEncoding),
|
||||
Some(CodingComment::OtherEncoding)
|
||||
)
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
let fix = Fix::safe_edit(Edit::range_deletion(range.line));
|
||||
let diagnostic = Diagnostic::new(UTF8EncodingDeclaration, range.comment);
|
||||
|
||||
diagnostics.push(diagnostic.with_fix(fix));
|
||||
}
|
||||
|
||||
struct CodingCommentIterator<'a> {
|
||||
/// End offset of the last comment trivia or `None` if there
|
||||
/// was any non-comment comment since the iterator started (e.g. a print statement)
|
||||
last_trivia_end: Option<TextSize>,
|
||||
locator: &'a Locator<'a>,
|
||||
comments: std::slice::Iter<'a, TextRange>,
|
||||
}
|
||||
|
||||
impl<'a> CodingCommentIterator<'a> {
|
||||
fn new(locator: &'a Locator<'a>, comments: &'a CommentRanges) -> Self {
|
||||
Self {
|
||||
last_trivia_end: Some(locator.bom_start_offset()),
|
||||
locator,
|
||||
comments: comments.iter(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Iterator for CodingCommentIterator<'_> {
|
||||
type Item = CodingComment;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
let comment = self.comments.next()?;
|
||||
let line_range = self.locator.full_line_range(comment.start());
|
||||
|
||||
// If leading content is not whitespace then it's not a valid coding comment e.g.
|
||||
// ```py
|
||||
// print(x) # coding=utf8
|
||||
// ```
|
||||
// or
|
||||
// ```python
|
||||
// print(test)
|
||||
// # -*- coding: utf-8 -*-
|
||||
// ```
|
||||
let last_trivia_end = self.last_trivia_end.take()?;
|
||||
let before_hash_sign = self
|
||||
.locator
|
||||
.slice(TextRange::new(last_trivia_end, comment.start()));
|
||||
|
||||
if !before_hash_sign.trim().is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
self.last_trivia_end = Some(comment.end());
|
||||
|
||||
let result = if let Some(parts_of_interest) =
|
||||
CODING_COMMENT_REGEX.captures(self.locator.slice(line_range))
|
||||
{
|
||||
let coding_name = parts_of_interest.name("name").unwrap();
|
||||
|
||||
match coding_name.as_str() {
|
||||
"utf8" | "utf-8" => CodingComment::UTF8(CodingCommentRange {
|
||||
comment: comment.range(),
|
||||
line: line_range,
|
||||
}),
|
||||
_ => CodingComment::OtherEncoding,
|
||||
}
|
||||
} else {
|
||||
CodingComment::NoEncoding
|
||||
};
|
||||
|
||||
Some(result)
|
||||
}
|
||||
}
|
||||
|
||||
impl FusedIterator for CodingCommentIterator<'_> {}
|
||||
|
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
UP009_utf8_code_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||
|
|
||||
1 | # -*- coding: utf8 -*-
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||
2 | print("the following is not a coding comment")
|
||||
3 | # -*- coding: ascii -*-
|
||||
|
|
||||
= help: Remove unnecessary coding comment
|
||||
|
||||
ℹ Safe fix
|
||||
1 |-# -*- coding: utf8 -*-
|
||||
2 1 | print("the following is not a coding comment")
|
||||
3 2 | # -*- coding: ascii -*-
|
|
@ -0,0 +1,5 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
|
|
@ -0,0 +1,15 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
UP009_utf8_utf8.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||
|
|
||||
1 | # -*- coding: utf-8 -*-
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||
2 | # -*- coding: utf-8 -*-
|
||||
|
|
||||
= help: Remove unnecessary coding comment
|
||||
|
||||
ℹ Safe fix
|
||||
1 1 | # -*- coding: utf-8 -*-
|
||||
2 |-# -*- coding: utf-8 -*-
|
|
@ -0,0 +1,17 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||
snapshot_kind: text
|
||||
---
|
||||
UP009_utf8_utf8_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||
|
|
||||
1 | # -*- coding: utf-8 -*-
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||
2 | # -*- coding: utf-8 -*-
|
||||
3 | # -*- coding: ascii -*-
|
||||
|
|
||||
= help: Remove unnecessary coding comment
|
||||
|
||||
ℹ Safe fix
|
||||
1 1 | # -*- coding: utf-8 -*-
|
||||
2 |-# -*- coding: utf-8 -*-
|
||||
3 2 | # -*- coding: ascii -*-
|
|
@ -295,6 +295,44 @@ pub trait LineRanges {
|
|||
/// ## Panics
|
||||
/// If the start or end of `range` is out of bounds.
|
||||
fn full_lines_str(&self, range: TextRange) -> &str;
|
||||
|
||||
/// The number of lines `range` spans.
|
||||
///
|
||||
/// ## Examples
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_text_size::{Ranged, TextRange};
|
||||
/// # use ruff_source_file::LineRanges;
|
||||
///
|
||||
/// assert_eq!("a\nb".count_lines(TextRange::up_to(1.into())), 0);
|
||||
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(3.into())), 1, "Up to the end of the second line");
|
||||
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(4.into())), 2, "In between the line break characters");
|
||||
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(5.into())), 2);
|
||||
/// assert_eq!("Single line".count_lines(TextRange::up_to(13.into())), 0);
|
||||
/// assert_eq!("out\nof\nbounds end".count_lines(TextRange::up_to(55.into())), 2);
|
||||
/// ```
|
||||
fn count_lines(&self, range: TextRange) -> u32 {
|
||||
let mut count = 0;
|
||||
let mut line_end = self.line_end(range.start());
|
||||
|
||||
loop {
|
||||
let next_line_start = self.full_line_end(line_end);
|
||||
|
||||
// Reached the end of the string
|
||||
if next_line_start == line_end {
|
||||
break count;
|
||||
}
|
||||
|
||||
// Range ends at the line boundary
|
||||
if line_end >= range.end() {
|
||||
break count;
|
||||
}
|
||||
|
||||
count += 1;
|
||||
|
||||
line_end = self.line_end(next_line_start);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl LineRanges for str {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue