Move Q003 to AST checker (#10923)

## Summary

This PR moves the `Q003` rule to AST checker.

This is the final rule that used the docstring detection state machine
and thus this PR removes it as well.

resolves: #7595 
resolves: #7808 

## Test Plan

- [x] `cargo test`
- [x] Make sure there are no changes in the ecosystem
This commit is contained in:
Dhruv Manilawala 2024-04-14 23:44:12 +05:30 committed by GitHub
parent 812b0976a9
commit f9a828f493
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
9 changed files with 307 additions and 319 deletions

View file

@ -33,4 +33,7 @@ pub(crate) fn string_like(string_like: StringLike, checker: &mut Checker) {
if checker.enabled(Rule::UnnecessaryEscapedQuote) {
flake8_quotes::rules::unnecessary_escaped_quote(checker, string_like);
}
if checker.enabled(Rule::AvoidableEscapedQuote) && checker.settings.flake8_quotes.avoid_escape {
flake8_quotes::rules::avoidable_escaped_quote(checker, string_like);
}
}

View file

@ -16,7 +16,7 @@ use crate::registry::{AsRule, Rule};
use crate::rules::pycodestyle::rules::BlankLinesChecker;
use crate::rules::{
eradicate, flake8_commas, flake8_executable, flake8_fixme, flake8_implicit_str_concat,
flake8_pyi, flake8_quotes, flake8_todos, pycodestyle, pygrep_hooks, pylint, pyupgrade, ruff,
flake8_pyi, flake8_todos, pycodestyle, pygrep_hooks, pylint, pyupgrade, ruff,
};
use crate::settings::LinterSettings;
@ -122,10 +122,6 @@ pub(crate) fn check_tokens(
);
}
if settings.rules.enabled(Rule::AvoidableEscapedQuote) && settings.flake8_quotes.avoid_escape {
flake8_quotes::rules::avoidable_escaped_quote(&mut diagnostics, tokens, locator, settings);
}
if settings.rules.any_enabled(&[
Rule::SingleLineImplicitStringConcatenation,
Rule::MultiLineImplicitStringConcatenation,

View file

@ -1,122 +0,0 @@
//! Extract docstrings via tokenization.
//!
//! See: <https://github.com/zheller/flake8-quotes/blob/ef0d9a90249a080e460b70ab62bf4b65e5aa5816/flake8_quotes/docstring_detection.py#L29>
//!
//! TODO(charlie): Consolidate with the existing AST-based docstring extraction.
use ruff_python_parser::Tok;
#[derive(Default, Copy, Clone)]
enum State {
// Start of the module: first string gets marked as a docstring.
#[default]
ExpectModuleDocstring,
// After seeing a class definition, we're waiting for the block colon (and do bracket
// counting).
ExpectClassColon,
// After seeing the block colon in a class definition, we expect a docstring.
ExpectClassDocstring,
// Same as ExpectClassColon, but for function definitions.
ExpectFunctionColon,
// Same as ExpectClassDocstring, but for function definitions.
ExpectFunctionDocstring,
// Skip tokens until we observe a `class` or `def`.
Other,
}
#[derive(Default)]
pub(crate) struct StateMachine {
state: State,
bracket_count: usize,
}
impl StateMachine {
pub(crate) fn consume(&mut self, tok: &Tok) -> bool {
match tok {
Tok::NonLogicalNewline
| Tok::Newline
| Tok::Indent
| Tok::Dedent
| Tok::Comment(..) => false,
Tok::String { .. } => {
if matches!(
self.state,
State::ExpectModuleDocstring
| State::ExpectClassDocstring
| State::ExpectFunctionDocstring
) {
self.state = State::Other;
true
} else {
false
}
}
Tok::Class => {
self.state = State::ExpectClassColon;
self.bracket_count = 0;
false
}
Tok::Def => {
self.state = State::ExpectFunctionColon;
self.bracket_count = 0;
false
}
Tok::Colon => {
if self.bracket_count == 0 {
if matches!(self.state, State::ExpectClassColon) {
self.state = State::ExpectClassDocstring;
} else if matches!(self.state, State::ExpectFunctionColon) {
self.state = State::ExpectFunctionDocstring;
}
}
false
}
Tok::Lpar | Tok::Lbrace | Tok::Lsqb => {
self.bracket_count = self.bracket_count.saturating_add(1);
if matches!(
self.state,
State::ExpectModuleDocstring
| State::ExpectClassDocstring
| State::ExpectFunctionDocstring
) {
self.state = State::Other;
}
false
}
Tok::Rpar | Tok::Rbrace | Tok::Rsqb => {
self.bracket_count = self.bracket_count.saturating_sub(1);
if matches!(
self.state,
State::ExpectModuleDocstring
| State::ExpectClassDocstring
| State::ExpectFunctionDocstring
) {
self.state = State::Other;
}
false
}
_ => {
if matches!(
self.state,
State::ExpectModuleDocstring
| State::ExpectClassDocstring
| State::ExpectFunctionDocstring
) {
self.state = State::Other;
}
false
}
}
}
}

View file

@ -1 +0,0 @@
pub(crate) mod docstring_detection;

View file

@ -24,7 +24,6 @@ mod docstrings;
mod fix;
pub mod fs;
mod importer;
mod lex;
pub mod line_width;
pub mod linter;
pub mod logging;

View file

@ -256,7 +256,6 @@ impl Rule {
| Rule::MixedSpacesAndTabs
| Rule::TrailingWhitespace => LintSource::PhysicalLines,
Rule::AmbiguousUnicodeCharacterComment
| Rule::AvoidableEscapedQuote
| Rule::BlanketNOQA
| Rule::BlanketTypeIgnore
| Rule::BlankLineAfterDecorator

View file

@ -1,3 +1,12 @@
use ruff_python_ast::AnyStringKind;
use ruff_text_size::TextLen;
/// Returns the raw contents of the string given the string's contents and kind.
/// This is a string without the prefix and quotes.
pub(super) fn raw_contents(contents: &str, kind: AnyStringKind) -> &str {
&contents[kind.opener_len().to_usize()..(contents.text_len() - kind.closer_len()).to_usize()]
}
/// Return `true` if the haystack contains an escaped quote.
pub(super) fn contains_escaped_quote(haystack: &str, quote: char) -> bool {
for index in memchr::memchr_iter(quote as u8, haystack.as_bytes()) {

View file

@ -1,15 +1,16 @@
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::Tok;
use ruff_python_ast::visitor::{walk_f_string, Visitor};
use ruff_python_ast::{self as ast, AnyStringKind, StringLike};
use ruff_source_file::Locator;
use ruff_text_size::TextRange;
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::lex::docstring_detection::StateMachine;
use crate::checkers::ast::Checker;
use crate::rules::flake8_quotes;
use crate::settings::LinterSettings;
use super::super::helpers::{contains_escaped_quote, unescape_string};
use super::super::settings::Quote;
use flake8_quotes::helpers::{contains_escaped_quote, raw_contents, unescape_string};
use flake8_quotes::settings::Quote;
/// ## What it does
/// Checks for strings that include escaped quotes, and suggests changing
@ -49,197 +50,303 @@ impl AlwaysFixableViolation for AvoidableEscapedQuote {
}
}
struct FStringContext {
/// Whether to check for escaped quotes in the f-string.
check_for_escaped_quote: bool,
/// The range of the f-string start token.
start_range: TextRange,
/// The ranges of the f-string middle tokens containing escaped quotes.
middle_ranges_with_escapes: Vec<TextRange>,
}
impl FStringContext {
fn new(check_for_escaped_quote: bool, fstring_start_range: TextRange) -> Self {
Self {
check_for_escaped_quote,
start_range: fstring_start_range,
middle_ranges_with_escapes: vec![],
}
}
/// Update the context to not check for escaped quotes, and clear any
/// existing reported ranges.
fn ignore_escaped_quotes(&mut self) {
self.check_for_escaped_quote = false;
self.middle_ranges_with_escapes.clear();
}
fn push_fstring_middle_range(&mut self, range: TextRange) {
self.middle_ranges_with_escapes.push(range);
}
}
/// Q003
pub(crate) fn avoidable_escaped_quote(
diagnostics: &mut Vec<Diagnostic>,
lxr: &[LexResult],
locator: &Locator,
settings: &LinterSettings,
) {
let quotes_settings = &settings.flake8_quotes;
let supports_pep701 = settings.target_version.supports_pep701();
let mut fstrings: Vec<FStringContext> = Vec::new();
let mut state_machine = StateMachine::default();
for &(ref tok, tok_range) in lxr.iter().flatten() {
let is_docstring = state_machine.consume(tok);
if is_docstring {
continue;
pub(crate) fn avoidable_escaped_quote(checker: &mut Checker, string_like: StringLike) {
if checker.semantic().in_docstring()
|| checker.semantic().in_string_type_definition()
// This rule has support for strings nested inside another f-strings but they're checked
// via the outermost f-string. This means that we shouldn't be checking any nested string
// or f-string.
|| checker.semantic().in_f_string_replacement_field()
{
return;
}
if !supports_pep701 {
// If this is a string or a start of a f-string which is inside another
// f-string, we won't check for escaped quotes for the entire f-string
// if the target version doesn't support PEP 701. For example:
let mut rule_checker = AvoidableEscapedQuoteChecker::new(checker.locator(), checker.settings);
match string_like {
StringLike::String(expr) => {
for string_literal in &expr.value {
rule_checker.visit_string_literal(string_literal);
}
}
StringLike::Bytes(expr) => {
for bytes_literal in &expr.value {
rule_checker.visit_bytes_literal(bytes_literal);
}
}
StringLike::FString(expr) => {
for part in &expr.value {
match part {
ast::FStringPart::Literal(string_literal) => {
rule_checker.visit_string_literal(string_literal);
}
ast::FStringPart::FString(f_string) => {
rule_checker.visit_f_string(f_string);
}
}
}
}
}
checker.diagnostics.extend(rule_checker.into_diagnostics());
}
/// Checks for `Q003` violations using the [`Visitor`] implementation.
#[derive(Debug)]
struct AvoidableEscapedQuoteChecker<'a> {
locator: &'a Locator<'a>,
quotes_settings: &'a flake8_quotes::settings::Settings,
supports_pep701: bool,
diagnostics: Vec<Diagnostic>,
}
impl<'a> AvoidableEscapedQuoteChecker<'a> {
fn new(locator: &'a Locator<'a>, settings: &'a LinterSettings) -> Self {
Self {
locator,
quotes_settings: &settings.flake8_quotes,
supports_pep701: settings.target_version.supports_pep701(),
diagnostics: vec![],
}
}
/// Consumes the checker and returns a vector of [`Diagnostic`] found during the visit.
fn into_diagnostics(self) -> Vec<Diagnostic> {
self.diagnostics
}
}
impl Visitor<'_> for AvoidableEscapedQuoteChecker<'_> {
fn visit_string_literal(&mut self, string_literal: &'_ ast::StringLiteral) {
if let Some(diagnostic) = check_string_or_bytes(
self.locator,
self.quotes_settings,
string_literal.range(),
AnyStringKind::from(string_literal.flags),
) {
self.diagnostics.push(diagnostic);
}
}
fn visit_bytes_literal(&mut self, bytes_literal: &'_ ast::BytesLiteral) {
if let Some(diagnostic) = check_string_or_bytes(
self.locator,
self.quotes_settings,
bytes_literal.range(),
AnyStringKind::from(bytes_literal.flags),
) {
self.diagnostics.push(diagnostic);
}
}
fn visit_f_string(&mut self, f_string: &'_ ast::FString) {
// If the target version doesn't support PEP 701, skip this entire f-string if it contains
// any string literal in any of the expression element. For example:
//
// ```python
// f"\"foo\" {'nested'}"
// # ^^^^^^^^
// # We're here
// ```
//
// If we try to fix the above example, the outer and inner quote
// will be the same which is invalid pre 3.12:
// If we try to fix the above example, the outer and inner quote will be the same which is
// invalid for any Python version before 3.12:
//
// ```python
// f'"foo" {'nested'}"
// ```
if matches!(tok, Tok::String { .. } | Tok::FStringStart(_)) {
if let Some(fstring_context) = fstrings.last_mut() {
fstring_context.ignore_escaped_quotes();
continue;
}
//
// Note that this check needs to be done globally to ignore the entire f-string. It is
// implicitly global in that we avoid recursing into this f-string if this is the case.
if !self.supports_pep701 {
let contains_any_string = {
let mut visitor = ContainsAnyString::default();
// We need to use the `walk_f_string` instead of `visit_f_string` to avoid
// considering the top level f-string.
walk_f_string(&mut visitor, f_string);
visitor.result
};
if contains_any_string {
return;
}
}
match tok {
Tok::String {
value: string_contents,
kind,
} => {
if kind.is_raw_string() || kind.is_triple_quoted() {
continue;
let opposite_quote_char = self.quotes_settings.inline_quotes.opposite().as_char();
// If any literal part of this f-string contains the quote character which is opposite to
// the configured inline quotes, we can't change the quote style for this f-string. For
// example:
//
// ```py
// f"\"hello\" {x} 'world'"
// ```
//
// If we try to fix the above example, the f-string will end in the middle and "world" will
// be considered as a variable which is outside this f-string:
//
// ```py
// f'"hello" {x} 'world''
// # ^
// # f-string ends here now
// ```
//
// The check is local to this f-string and it shouldn't check for any literal parts of any
// nested f-string. This is correct because by this point, we know that the target version
// is 3.12 or that this f-string doesn't have any strings nested in it. For example:
//
// ```py
// f'\'normal\' {f'\'nested\' {x} "double quotes"'} normal'
// ```
//
// This contains a nested f-string but if we reached here that means the target version
// supports PEP 701. The double quotes in the nested f-string shouldn't affect the outer
// f-string because the following is valid for Python version 3.12 and later:
//
// ```py
// f"'normal' {f'\'nested\' {x} "double quotes"'} normal"
// ```
if !f_string
.literals()
.any(|literal| contains_quote(literal, opposite_quote_char))
{
if let Some(diagnostic) = check_f_string(self.locator, self.quotes_settings, f_string) {
self.diagnostics.push(diagnostic);
}
}
walk_f_string(self, f_string);
}
}
/// Checks for unnecessary escaped quotes in a string or bytes literal.
///
/// # Panics
///
/// If the string kind is an f-string.
fn check_string_or_bytes(
locator: &Locator,
quotes_settings: &flake8_quotes::settings::Settings,
range: TextRange,
kind: AnyStringKind,
) -> Option<Diagnostic> {
assert!(!kind.is_f_string());
if kind.is_triple_quoted() || kind.is_raw_string() {
return None;
}
// Check if we're using the preferred quotation style.
if Quote::from(kind.quote_style()) != quotes_settings.inline_quotes {
continue;
return None;
}
if contains_escaped_quote(string_contents, quotes_settings.inline_quotes.as_char())
&& !contains_quote(
string_contents,
quotes_settings.inline_quotes.opposite().as_char(),
)
let contents = raw_contents(locator.slice(range), kind);
if !contains_escaped_quote(contents, quotes_settings.inline_quotes.as_char())
|| contains_quote(contents, quotes_settings.inline_quotes.opposite().as_char())
{
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, tok_range);
return None;
}
let mut diagnostic = Diagnostic::new(AvoidableEscapedQuote, range);
let fixed_contents = format!(
"{prefix}{quote}{value}{quote}",
prefix = kind.prefix(),
quote = quotes_settings.inline_quotes.opposite().as_char(),
value = unescape_string(
string_contents,
quotes_settings.inline_quotes.as_char()
)
value = unescape_string(contents, quotes_settings.inline_quotes.as_char())
);
diagnostic.set_fix(Fix::safe_edit(Edit::range_replacement(
fixed_contents,
tok_range,
)));
diagnostics.push(diagnostic);
}
}
Tok::FStringStart(kind) => {
// Check for escaped quote only if we're using the preferred quotation
// style and it isn't a triple-quoted f-string.
let check_for_escaped_quote = !kind.is_triple_quoted()
&& Quote::from(kind.quote_style()) == quotes_settings.inline_quotes;
fstrings.push(FStringContext::new(check_for_escaped_quote, tok_range));
}
Tok::FStringMiddle {
value: string_contents,
kind,
} if !kind.is_raw_string() => {
let Some(context) = fstrings.last_mut() else {
continue;
};
if !context.check_for_escaped_quote {
continue;
}
// If any part of the f-string contains the opposite quote,
// we can't change the quote style in the entire f-string.
if contains_quote(
string_contents,
quotes_settings.inline_quotes.opposite().as_char(),
) {
context.ignore_escaped_quotes();
continue;
}
if contains_escaped_quote(string_contents, quotes_settings.inline_quotes.as_char())
{
context.push_fstring_middle_range(tok_range);
}
}
Tok::FStringEnd => {
let Some(context) = fstrings.pop() else {
continue;
};
if context.middle_ranges_with_escapes.is_empty() {
// There are no `FStringMiddle` tokens containing any escaped
// quotes.
continue;
}
let mut diagnostic = Diagnostic::new(
AvoidableEscapedQuote,
TextRange::new(context.start_range.start(), tok_range.end()),
);
let fstring_start_edit = Edit::range_replacement(
// No need for `r`/`R` as we don't perform the checks
// for raw strings.
format!("f{}", quotes_settings.inline_quotes.opposite().as_char()),
context.start_range,
);
let fstring_middle_and_end_edits = context
.middle_ranges_with_escapes
.iter()
.map(|&range| {
Edit::range_replacement(
unescape_string(
locator.slice(range),
quotes_settings.inline_quotes.as_char(),
),
range,
)
})
.chain(std::iter::once(
// `FStringEnd` edit
Edit::range_replacement(
quotes_settings
.inline_quotes
.opposite()
.as_char()
.to_string(),
tok_range,
)));
Some(diagnostic)
}
/// Checks for unnecessary escaped quotes in an f-string.
fn check_f_string(
locator: &Locator,
quotes_settings: &flake8_quotes::settings::Settings,
f_string: &ast::FString,
) -> Option<Diagnostic> {
let ast::FString { flags, range, .. } = f_string;
if flags.is_triple_quoted() || flags.prefix().is_raw() {
return None;
}
// Check if we're using the preferred quotation style.
if Quote::from(flags.quote_style()) != quotes_settings.inline_quotes {
return None;
}
let quote_char = quotes_settings.inline_quotes.as_char();
let opposite_quote_char = quotes_settings.inline_quotes.opposite().as_char();
let mut edits = vec![];
for literal in f_string.literals() {
let content = locator.slice(literal);
if !contains_escaped_quote(content, quote_char) {
continue;
}
edits.push(Edit::range_replacement(
unescape_string(content, quote_char),
literal.range(),
));
}
if edits.is_empty() {
return None;
}
// Replacement for the f-string opening quote. We don't perform the check for raw and
// triple-quoted f-strings, so no need to account for them.
let start_edit = Edit::range_replacement(
format!("f{opposite_quote_char}"),
TextRange::at(
range.start(),
// Prefix + quote char
TextSize::new(2),
),
);
// Replacement for the f-string ending quote. We don't perform the check for triple-quoted
// f-string, so no need to account for them.
edits.push(Edit::range_replacement(
opposite_quote_char.to_string(),
TextRange::at(
// Offset would either be the end offset of the start edit in case there are no
// elements in the f-string (e.g., `f""`) or the end offset of the last f-string
// element (e.g., `f"hello"`).
f_string
.elements
.last()
.map_or_else(|| start_edit.end(), Ranged::end),
// Quote char
TextSize::new(1),
),
));
diagnostic.set_fix(Fix::safe_edits(
fstring_start_edit,
fstring_middle_and_end_edits,
));
diagnostics.push(diagnostic);
Some(
Diagnostic::new(AvoidableEscapedQuote, *range).with_fix(Fix::safe_edits(start_edit, edits)),
)
}
_ => {}
#[derive(Debug, Default)]
struct ContainsAnyString {
result: bool,
}
impl Visitor<'_> for ContainsAnyString {
fn visit_string_literal(&mut self, _: &'_ ast::StringLiteral) {
self.result = true;
}
fn visit_bytes_literal(&mut self, _: &'_ ast::BytesLiteral) {
self.result = true;
}
fn visit_f_string(&mut self, _: &'_ ast::FString) {
self.result = true;
// We don't need to recurse into this f-string now that we already know the result.
}
}

View file

@ -2,11 +2,11 @@ use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{self as ast, AnyStringKind, StringLike};
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextLen, TextRange};
use ruff_text_size::{Ranged, TextRange};
use crate::checkers::ast::Checker;
use super::super::helpers::{contains_escaped_quote, unescape_string};
use super::super::helpers::{contains_escaped_quote, raw_contents, unescape_string};
/// ## What it does
/// Checks for strings that include unnecessarily escaped quotes.
@ -47,6 +47,10 @@ impl AlwaysFixableViolation for UnnecessaryEscapedQuote {
/// Q004
pub(crate) fn unnecessary_escaped_quote(checker: &mut Checker, string_like: StringLike) {
if checker.semantic().in_docstring() {
return;
}
let locator = checker.locator();
match string_like {
@ -147,9 +151,3 @@ fn check_f_string(locator: &Locator, f_string: &ast::FString) -> Option<Diagnost
diagnostic.set_fix(Fix::safe_edits(first, edits_iter));
Some(diagnostic)
}
/// Returns the raw contents of the string given the string's contents and kind.
/// This is a string without the prefix and quotes.
fn raw_contents(contents: &str, kind: AnyStringKind) -> &str {
&contents[kind.opener_len().to_usize()..(contents.text_len() - kind.closer_len()).to_usize()]
}