mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-02 09:52:32 +00:00
[pyupgrade
] Do not report when a UTF-8 comment is followed by a non-UTF-8 one (UP009
) (#14728)
## Summary Resolves #14704. ## Test Plan `cargo nextest run` and `cargo insta test`. --------- Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
a55722e740
commit
c8d505c8ea
22 changed files with 273 additions and 49 deletions
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_code_utf8_utf8.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
print('No coding coments here')
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_hashbang_utf8_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_hashbang_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
#!/usr/bin/python
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: ascii -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_many_empty_lines.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_many_empty_lines.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
|
||||||
|
|
||||||
|
# -*- coding: utf-8 -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_other.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# -*- coding: ascii -*-
|
||||||
|
# -*- coding: latin -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_other_utf8.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# -*- coding: ascii -*-
|
||||||
|
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_code_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_code_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# -*- coding: utf8 -*-
|
||||||
|
print("the following is not a coding comment")
|
||||||
|
# -*- coding: ascii -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: ascii -*-
|
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py
vendored
Normal file
2
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8.py
vendored
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: utf-8 -*-
|
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py
vendored
Normal file
3
crates/ruff_linter/resources/test/fixtures/pyupgrade/UP009_utf8_utf8_other.py
vendored
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: utf-8 -*-
|
||||||
|
# -*- coding: ascii -*-
|
|
@ -73,12 +73,7 @@ pub(crate) fn check_tokens(
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
|
if settings.rules.enabled(Rule::UTF8EncodingDeclaration) {
|
||||||
pyupgrade::rules::unnecessary_coding_comment(
|
pyupgrade::rules::unnecessary_coding_comment(&mut diagnostics, locator, comment_ranges);
|
||||||
&mut diagnostics,
|
|
||||||
locator,
|
|
||||||
indexer,
|
|
||||||
comment_ranges,
|
|
||||||
);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if settings.rules.enabled(Rule::TabIndentation) {
|
if settings.rules.enabled(Rule::TabIndentation) {
|
||||||
|
|
|
@ -77,6 +77,18 @@ mod tests {
|
||||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_8.py"))]
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_8.py"))]
|
||||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_9.py"))]
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_9.py"))]
|
||||||
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_10.py"))]
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_10.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_other.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_other_utf8.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_other.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_utf8_other.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_utf8_code_other.py"))]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_code_utf8_utf8.py"))]
|
||||||
|
#[test_case(
|
||||||
|
Rule::UTF8EncodingDeclaration,
|
||||||
|
Path::new("UP009_hashbang_utf8_other.py")
|
||||||
|
)]
|
||||||
|
#[test_case(Rule::UTF8EncodingDeclaration, Path::new("UP009_many_empty_lines.py"))]
|
||||||
#[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
|
#[test_case(Rule::UnicodeKindPrefix, Path::new("UP025.py"))]
|
||||||
#[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
|
#[test_case(Rule::UnnecessaryBuiltinImport, Path::new("UP029.py"))]
|
||||||
#[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]
|
#[test_case(Rule::UnnecessaryClassParentheses, Path::new("UP039.py"))]
|
||||||
|
|
|
@ -1,13 +1,13 @@
|
||||||
|
use std::iter::FusedIterator;
|
||||||
use std::sync::LazyLock;
|
use std::sync::LazyLock;
|
||||||
|
|
||||||
use regex::Regex;
|
use regex::Regex;
|
||||||
|
|
||||||
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
|
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
|
||||||
use ruff_macros::{derive_message_formats, ViolationMetadata};
|
use ruff_macros::{derive_message_formats, ViolationMetadata};
|
||||||
use ruff_python_index::Indexer;
|
|
||||||
use ruff_python_trivia::CommentRanges;
|
use ruff_python_trivia::CommentRanges;
|
||||||
use ruff_source_file::LineRanges;
|
use ruff_source_file::LineRanges;
|
||||||
use ruff_text_size::{Ranged, TextRange};
|
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||||
|
|
||||||
use crate::Locator;
|
use crate::Locator;
|
||||||
|
|
||||||
|
@ -46,57 +46,134 @@ impl AlwaysFixableViolation for UTF8EncodingDeclaration {
|
||||||
|
|
||||||
// Regex from PEP263.
|
// Regex from PEP263.
|
||||||
static CODING_COMMENT_REGEX: LazyLock<Regex> =
|
static CODING_COMMENT_REGEX: LazyLock<Regex> =
|
||||||
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*utf-?8").unwrap());
|
LazyLock::new(|| Regex::new(r"^[ \t\f]*#.*?coding[:=][ \t]*(?<name>[-_.a-zA-Z0-9]+)").unwrap());
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
enum CodingComment {
|
||||||
|
/// A UTF-8 encoding declaration.
|
||||||
|
UTF8(CodingCommentRange),
|
||||||
|
/// A declaration for any non utf8 encoding
|
||||||
|
OtherEncoding,
|
||||||
|
/// Any other comment
|
||||||
|
NoEncoding,
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Debug)]
|
||||||
|
struct CodingCommentRange {
|
||||||
|
comment: TextRange,
|
||||||
|
line: TextRange,
|
||||||
|
}
|
||||||
|
|
||||||
/// UP009
|
/// UP009
|
||||||
pub(crate) fn unnecessary_coding_comment(
|
pub(crate) fn unnecessary_coding_comment(
|
||||||
diagnostics: &mut Vec<Diagnostic>,
|
diagnostics: &mut Vec<Diagnostic>,
|
||||||
locator: &Locator,
|
locator: &Locator,
|
||||||
indexer: &Indexer,
|
|
||||||
comment_ranges: &CommentRanges,
|
comment_ranges: &CommentRanges,
|
||||||
) {
|
) {
|
||||||
// The coding comment must be on one of the first two lines. Since each comment spans at least
|
let mut iter = CodingCommentIterator::new(locator, comment_ranges)
|
||||||
// one line, we only need to check the first two comments at most.
|
.skip_while(|comment| matches!(comment, CodingComment::NoEncoding));
|
||||||
for comment_range in comment_ranges.iter().take(2) {
|
|
||||||
// If leading content is not whitespace then it's not a valid coding comment e.g.
|
|
||||||
// ```
|
|
||||||
// print(x) # coding=utf8
|
|
||||||
// ```
|
|
||||||
let line_range = locator.full_line_range(comment_range.start());
|
|
||||||
if !locator
|
|
||||||
.slice(TextRange::new(line_range.start(), comment_range.start()))
|
|
||||||
.trim()
|
|
||||||
.is_empty()
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// If the line is after a continuation then it's not a valid coding comment e.g.
|
let Some(CodingComment::UTF8(range)) = iter.next() else {
|
||||||
// ```
|
return;
|
||||||
// x = 1 \
|
};
|
||||||
// # coding=utf8
|
|
||||||
// x = 2
|
|
||||||
// ```
|
|
||||||
if indexer
|
|
||||||
.preceded_by_continuations(line_range.start(), locator.contents())
|
|
||||||
.is_some()
|
|
||||||
{
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
if CODING_COMMENT_REGEX.is_match(locator.slice(line_range)) {
|
let line_index = locator.count_lines(TextRange::up_to(range.comment.start()));
|
||||||
#[allow(deprecated)]
|
|
||||||
let index = locator.compute_line_index(line_range.start());
|
|
||||||
if index.to_zero_indexed() > 1 {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut diagnostic = Diagnostic::new(UTF8EncodingDeclaration, *comment_range);
|
// Comment must be on the first or second line
|
||||||
diagnostic.set_fix(Fix::safe_edit(Edit::deletion(
|
if line_index > 1 {
|
||||||
line_range.start(),
|
return;
|
||||||
line_range.end(),
|
}
|
||||||
)));
|
|
||||||
diagnostics.push(diagnostic);
|
// ```python
|
||||||
|
// # -*- coding: utf-8 -*-
|
||||||
|
// # -*- coding: latin-1 -*-
|
||||||
|
// ```
|
||||||
|
// or
|
||||||
|
// ```python
|
||||||
|
// # -*- coding: utf-8 -*-
|
||||||
|
// # comment
|
||||||
|
// # -*- coding: latin-1 -*-
|
||||||
|
// ```
|
||||||
|
if matches!(
|
||||||
|
(iter.next(), iter.next()),
|
||||||
|
(Some(CodingComment::OtherEncoding), _)
|
||||||
|
| (
|
||||||
|
Some(CodingComment::NoEncoding),
|
||||||
|
Some(CodingComment::OtherEncoding)
|
||||||
|
)
|
||||||
|
) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let fix = Fix::safe_edit(Edit::range_deletion(range.line));
|
||||||
|
let diagnostic = Diagnostic::new(UTF8EncodingDeclaration, range.comment);
|
||||||
|
|
||||||
|
diagnostics.push(diagnostic.with_fix(fix));
|
||||||
|
}
|
||||||
|
|
||||||
|
struct CodingCommentIterator<'a> {
|
||||||
|
/// End offset of the last comment trivia or `None` if there
|
||||||
|
/// was any non-comment comment since the iterator started (e.g. a print statement)
|
||||||
|
last_trivia_end: Option<TextSize>,
|
||||||
|
locator: &'a Locator<'a>,
|
||||||
|
comments: std::slice::Iter<'a, TextRange>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> CodingCommentIterator<'a> {
|
||||||
|
fn new(locator: &'a Locator<'a>, comments: &'a CommentRanges) -> Self {
|
||||||
|
Self {
|
||||||
|
last_trivia_end: Some(locator.bom_start_offset()),
|
||||||
|
locator,
|
||||||
|
comments: comments.iter(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
impl Iterator for CodingCommentIterator<'_> {
|
||||||
|
type Item = CodingComment;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
let comment = self.comments.next()?;
|
||||||
|
let line_range = self.locator.full_line_range(comment.start());
|
||||||
|
|
||||||
|
// If leading content is not whitespace then it's not a valid coding comment e.g.
|
||||||
|
// ```py
|
||||||
|
// print(x) # coding=utf8
|
||||||
|
// ```
|
||||||
|
// or
|
||||||
|
// ```python
|
||||||
|
// print(test)
|
||||||
|
// # -*- coding: utf-8 -*-
|
||||||
|
// ```
|
||||||
|
let last_trivia_end = self.last_trivia_end.take()?;
|
||||||
|
let before_hash_sign = self
|
||||||
|
.locator
|
||||||
|
.slice(TextRange::new(last_trivia_end, comment.start()));
|
||||||
|
|
||||||
|
if !before_hash_sign.trim().is_empty() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
self.last_trivia_end = Some(comment.end());
|
||||||
|
|
||||||
|
let result = if let Some(parts_of_interest) =
|
||||||
|
CODING_COMMENT_REGEX.captures(self.locator.slice(line_range))
|
||||||
|
{
|
||||||
|
let coding_name = parts_of_interest.name("name").unwrap();
|
||||||
|
|
||||||
|
match coding_name.as_str() {
|
||||||
|
"utf8" | "utf-8" => CodingComment::UTF8(CodingCommentRange {
|
||||||
|
comment: comment.range(),
|
||||||
|
line: line_range,
|
||||||
|
}),
|
||||||
|
_ => CodingComment::OtherEncoding,
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
CodingComment::NoEncoding
|
||||||
|
};
|
||||||
|
|
||||||
|
Some(result)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl FusedIterator for CodingCommentIterator<'_> {}
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,17 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
UP009_utf8_code_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||||
|
|
|
||||||
|
1 | # -*- coding: utf8 -*-
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||||
|
2 | print("the following is not a coding comment")
|
||||||
|
3 | # -*- coding: ascii -*-
|
||||||
|
|
|
||||||
|
= help: Remove unnecessary coding comment
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
1 |-# -*- coding: utf8 -*-
|
||||||
|
2 1 | print("the following is not a coding comment")
|
||||||
|
3 2 | # -*- coding: ascii -*-
|
|
@ -0,0 +1,5 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
|
|
@ -0,0 +1,15 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
UP009_utf8_utf8.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||||
|
|
|
||||||
|
1 | # -*- coding: utf-8 -*-
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||||
|
2 | # -*- coding: utf-8 -*-
|
||||||
|
|
|
||||||
|
= help: Remove unnecessary coding comment
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
1 1 | # -*- coding: utf-8 -*-
|
||||||
|
2 |-# -*- coding: utf-8 -*-
|
|
@ -0,0 +1,17 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_linter/src/rules/pyupgrade/mod.rs
|
||||||
|
snapshot_kind: text
|
||||||
|
---
|
||||||
|
UP009_utf8_utf8_other.py:1:1: UP009 [*] UTF-8 encoding declaration is unnecessary
|
||||||
|
|
|
||||||
|
1 | # -*- coding: utf-8 -*-
|
||||||
|
| ^^^^^^^^^^^^^^^^^^^^^^^ UP009
|
||||||
|
2 | # -*- coding: utf-8 -*-
|
||||||
|
3 | # -*- coding: ascii -*-
|
||||||
|
|
|
||||||
|
= help: Remove unnecessary coding comment
|
||||||
|
|
||||||
|
ℹ Safe fix
|
||||||
|
1 1 | # -*- coding: utf-8 -*-
|
||||||
|
2 |-# -*- coding: utf-8 -*-
|
||||||
|
3 2 | # -*- coding: ascii -*-
|
|
@ -295,6 +295,44 @@ pub trait LineRanges {
|
||||||
/// ## Panics
|
/// ## Panics
|
||||||
/// If the start or end of `range` is out of bounds.
|
/// If the start or end of `range` is out of bounds.
|
||||||
fn full_lines_str(&self, range: TextRange) -> &str;
|
fn full_lines_str(&self, range: TextRange) -> &str;
|
||||||
|
|
||||||
|
/// The number of lines `range` spans.
|
||||||
|
///
|
||||||
|
/// ## Examples
|
||||||
|
///
|
||||||
|
/// ```
|
||||||
|
/// # use ruff_text_size::{Ranged, TextRange};
|
||||||
|
/// # use ruff_source_file::LineRanges;
|
||||||
|
///
|
||||||
|
/// assert_eq!("a\nb".count_lines(TextRange::up_to(1.into())), 0);
|
||||||
|
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(3.into())), 1, "Up to the end of the second line");
|
||||||
|
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(4.into())), 2, "In between the line break characters");
|
||||||
|
/// assert_eq!("a\nb\r\nc".count_lines(TextRange::up_to(5.into())), 2);
|
||||||
|
/// assert_eq!("Single line".count_lines(TextRange::up_to(13.into())), 0);
|
||||||
|
/// assert_eq!("out\nof\nbounds end".count_lines(TextRange::up_to(55.into())), 2);
|
||||||
|
/// ```
|
||||||
|
fn count_lines(&self, range: TextRange) -> u32 {
|
||||||
|
let mut count = 0;
|
||||||
|
let mut line_end = self.line_end(range.start());
|
||||||
|
|
||||||
|
loop {
|
||||||
|
let next_line_start = self.full_line_end(line_end);
|
||||||
|
|
||||||
|
// Reached the end of the string
|
||||||
|
if next_line_start == line_end {
|
||||||
|
break count;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Range ends at the line boundary
|
||||||
|
if line_end >= range.end() {
|
||||||
|
break count;
|
||||||
|
}
|
||||||
|
|
||||||
|
count += 1;
|
||||||
|
|
||||||
|
line_end = self.line_end(next_line_start);
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl LineRanges for str {
|
impl LineRanges for str {
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue