mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-30 13:51:16 +00:00
Avoid triggering pd#at
and friends on non-subscripts (#4474)
This commit is contained in:
parent
39fb2cc732
commit
cd82b83f89
8 changed files with 208 additions and 152 deletions
|
@ -2352,6 +2352,8 @@ where
|
|||
{
|
||||
flake8_simplify::rules::use_capital_environment_variables(self, expr);
|
||||
}
|
||||
|
||||
pandas_vet::rules::subscript(self, value, expr);
|
||||
}
|
||||
Expr::Tuple(ast::ExprTuple {
|
||||
elts,
|
||||
|
@ -2525,7 +2527,7 @@ where
|
|||
if self.settings.rules.enabled(Rule::PrivateMemberAccess) {
|
||||
flake8_self::rules::private_member_access(self, expr);
|
||||
}
|
||||
pandas_vet::rules::check_attr(self, attr, value, expr);
|
||||
pandas_vet::rules::attr(self, attr, value, expr);
|
||||
}
|
||||
Expr::Call(ast::ExprCall {
|
||||
func,
|
||||
|
@ -3009,7 +3011,7 @@ where
|
|||
.into_iter(),
|
||||
);
|
||||
}
|
||||
pandas_vet::rules::check_call(self, func);
|
||||
pandas_vet::rules::call(self, func);
|
||||
|
||||
if self.settings.rules.enabled(Rule::PandasUseOfPdMerge) {
|
||||
if let Some(diagnostic) = pandas_vet::rules::use_of_pd_merge(func) {
|
||||
|
|
|
@ -1,18 +1,51 @@
|
|||
use ruff_python_semantic::binding::{BindingKind, Importation};
|
||||
use ruff_python_semantic::context::Context;
|
||||
use rustpython_parser::ast;
|
||||
use rustpython_parser::ast::Expr;
|
||||
|
||||
/// Return `true` if an `Expr` _could_ be a `DataFrame`. This rules out
|
||||
/// obviously-wrong cases, like constants and literals.
|
||||
pub(crate) const fn is_dataframe_candidate(expr: &Expr) -> bool {
|
||||
!matches!(
|
||||
expr,
|
||||
Expr::Constant(_)
|
||||
| Expr::Tuple(_)
|
||||
| Expr::List(_)
|
||||
| Expr::Set(_)
|
||||
| Expr::Dict(_)
|
||||
| Expr::SetComp(_)
|
||||
| Expr::ListComp(_)
|
||||
| Expr::DictComp(_)
|
||||
| Expr::GeneratorExp(_)
|
||||
)
|
||||
pub(crate) enum Resolution {
|
||||
/// The expression resolves to an irrelevant expression type (e.g., a constant).
|
||||
IrrelevantExpression,
|
||||
/// The expression resolves to an irrelevant binding (e.g., a function definition).
|
||||
IrrelevantBinding,
|
||||
/// The expression resolves to a relevant local binding (e.g., a variable).
|
||||
RelevantLocal,
|
||||
/// The expression resolves to the `pandas` module itself.
|
||||
PandasModule,
|
||||
}
|
||||
|
||||
/// Test an [`Expr`] for relevance to Pandas-related operations.
|
||||
pub(crate) fn test_expression(expr: &Expr, context: &Context) -> Resolution {
|
||||
match expr {
|
||||
Expr::Constant(_)
|
||||
| Expr::Tuple(_)
|
||||
| Expr::List(_)
|
||||
| Expr::Set(_)
|
||||
| Expr::Dict(_)
|
||||
| Expr::SetComp(_)
|
||||
| Expr::ListComp(_)
|
||||
| Expr::DictComp(_)
|
||||
| Expr::GeneratorExp(_) => Resolution::IrrelevantExpression,
|
||||
Expr::Name(ast::ExprName { id, .. }) => {
|
||||
context
|
||||
.find_binding(id)
|
||||
.map_or(Resolution::IrrelevantBinding, |binding| {
|
||||
match binding.kind {
|
||||
BindingKind::Annotation
|
||||
| BindingKind::Argument
|
||||
| BindingKind::Assignment
|
||||
| BindingKind::NamedExprAssignment
|
||||
| BindingKind::Binding
|
||||
| BindingKind::LoopVar
|
||||
| BindingKind::Global
|
||||
| BindingKind::Nonlocal => Resolution::RelevantLocal,
|
||||
BindingKind::Importation(Importation {
|
||||
full_name: module, ..
|
||||
}) if module == "pandas" => Resolution::PandasModule,
|
||||
_ => Resolution::IrrelevantBinding,
|
||||
}
|
||||
})
|
||||
}
|
||||
_ => Resolution::RelevantLocal,
|
||||
}
|
||||
}
|
||||
|
|
|
@ -105,6 +105,32 @@ mod tests {
|
|||
x = pd.DataFrame()
|
||||
index = x.loc[:, ['B', 'A']]
|
||||
"#, &[]; "PD008_pass")]
|
||||
#[test_case(r#"
|
||||
import io
|
||||
import zipfile
|
||||
|
||||
|
||||
class MockBinaryFile(io.BytesIO):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
def close(self):
|
||||
pass # Don't allow closing the file, it would clear the buffer
|
||||
|
||||
|
||||
zip_buffer = MockBinaryFile()
|
||||
|
||||
with zipfile.ZipFile(zip_buffer, "w") as zf:
|
||||
zf.writestr("dir/file.txt", "This is a test")
|
||||
|
||||
# Reset the BytesIO object's cursor to the start.
|
||||
zip_buffer.seek(0)
|
||||
|
||||
with zipfile.ZipFile(zip_buffer, "r") as zf:
|
||||
zpath = zipfile.Path(zf, "/")
|
||||
|
||||
dir_name, file_name = zpath.at.split("/")
|
||||
"#, &[]; "PD008_pass_on_attr")]
|
||||
#[test_case(r#"
|
||||
import pandas as pd
|
||||
x = pd.DataFrame()
|
||||
|
|
47
crates/ruff/src/rules/pandas_vet/rules/attr.rs
Normal file
47
crates/ruff/src/rules/pandas_vet/rules/attr.rs
Normal file
|
@ -0,0 +1,47 @@
|
|||
use rustpython_parser::ast::{Expr, Ranged};
|
||||
|
||||
use ruff_diagnostics::Violation;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
use crate::registry::Rule;
|
||||
use crate::rules::pandas_vet::helpers::{test_expression, Resolution};
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotValues;
|
||||
|
||||
impl Violation for PandasUseOfDotValues {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.to_numpy()` instead of `.values`")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn attr(checker: &mut Checker, attr: &str, value: &Expr, attr_expr: &Expr) {
|
||||
let rules = &checker.settings.rules;
|
||||
let violation: DiagnosticKind = match attr {
|
||||
"values" if rules.enabled(Rule::PandasUseOfDotValues) => PandasUseOfDotValues.into(),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// Avoid flagging on function calls (e.g., `df.values()`).
|
||||
if let Some(parent) = checker.ctx.expr_parent() {
|
||||
if matches!(parent, Expr::Call(_)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Avoid flagging on non-DataFrames (e.g., `{"a": 1}.values`), and on irrelevant bindings
|
||||
// (like imports).
|
||||
if !matches!(
|
||||
test_expression(value, &checker.ctx),
|
||||
Resolution::RelevantLocal
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
checker
|
||||
.diagnostics
|
||||
.push(Diagnostic::new(violation, attr_expr.range()));
|
||||
}
|
|
@ -3,11 +3,10 @@ use rustpython_parser::ast::{self, Expr, Ranged};
|
|||
use ruff_diagnostics::Violation;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_semantic::binding::{BindingKind, Importation};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
use crate::registry::Rule;
|
||||
use crate::rules::pandas_vet::helpers::is_dataframe_candidate;
|
||||
use crate::rules::pandas_vet::helpers::{test_expression, Resolution};
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotIsNull;
|
||||
|
@ -61,9 +60,11 @@ impl Violation for PandasUseOfDotStack {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn check_call(checker: &mut Checker, func: &Expr) {
|
||||
pub(crate) fn call(checker: &mut Checker, func: &Expr) {
|
||||
let rules = &checker.settings.rules;
|
||||
let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else {return};
|
||||
let Expr::Attribute(ast::ExprAttribute { value, attr, .. } )= func else {
|
||||
return;
|
||||
};
|
||||
let violation: DiagnosticKind = match attr.as_str() {
|
||||
"isnull" if rules.enabled(Rule::PandasUseOfDotIsNull) => PandasUseOfDotIsNull.into(),
|
||||
"notnull" if rules.enabled(Rule::PandasUseOfDotNotNull) => PandasUseOfDotNotNull.into(),
|
||||
|
@ -77,37 +78,14 @@ pub(crate) fn check_call(checker: &mut Checker, func: &Expr) {
|
|||
_ => return,
|
||||
};
|
||||
|
||||
if !is_dataframe_candidate(value) {
|
||||
// Ignore irrelevant bindings (like imports).
|
||||
if !matches!(
|
||||
test_expression(value, &checker.ctx),
|
||||
Resolution::RelevantLocal | Resolution::PandasModule
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the target is a named variable, avoid triggering on
|
||||
// irrelevant bindings (like non-Pandas imports).
|
||||
if let Expr::Name(ast::ExprName { id, .. }) = value.as_ref() {
|
||||
if checker.ctx.find_binding(id).map_or(true, |binding| {
|
||||
if let BindingKind::Importation(Importation {
|
||||
full_name: module, ..
|
||||
}) = &binding.kind
|
||||
{
|
||||
module != &"pandas"
|
||||
} else {
|
||||
matches!(
|
||||
binding.kind,
|
||||
BindingKind::Builtin
|
||||
| BindingKind::ClassDefinition
|
||||
| BindingKind::FunctionDefinition
|
||||
| BindingKind::Export(..)
|
||||
| BindingKind::FutureImportation
|
||||
| BindingKind::Importation(..)
|
||||
| BindingKind::FromImportation(..)
|
||||
| BindingKind::SubmoduleImportation(..)
|
||||
)
|
||||
}
|
||||
}) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
checker
|
||||
.diagnostics
|
||||
.push(Diagnostic::new(violation, func.range()));
|
|
@ -1,96 +0,0 @@
|
|||
use rustpython_parser::ast::{self, Expr, Ranged};
|
||||
|
||||
use ruff_diagnostics::Violation;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_semantic::binding::BindingKind;
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
use crate::registry::Rule;
|
||||
use crate::rules::pandas_vet::helpers::is_dataframe_candidate;
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotIx;
|
||||
|
||||
impl Violation for PandasUseOfDotIx {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("`.ix` is deprecated; use more explicit `.loc` or `.iloc`")
|
||||
}
|
||||
}
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotAt;
|
||||
|
||||
impl Violation for PandasUseOfDotAt {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.loc` instead of `.at`. If speed is important, use numpy.")
|
||||
}
|
||||
}
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotIat;
|
||||
|
||||
impl Violation for PandasUseOfDotIat {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.iloc` instead of `.iat`. If speed is important, use numpy.")
|
||||
}
|
||||
}
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotValues;
|
||||
|
||||
impl Violation for PandasUseOfDotValues {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.to_numpy()` instead of `.values`")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn check_attr(checker: &mut Checker, attr: &str, value: &Expr, attr_expr: &Expr) {
|
||||
let rules = &checker.settings.rules;
|
||||
let violation: DiagnosticKind = match attr {
|
||||
"ix" if rules.enabled(Rule::PandasUseOfDotIx) => PandasUseOfDotIx.into(),
|
||||
"at" if rules.enabled(Rule::PandasUseOfDotAt) => PandasUseOfDotAt.into(),
|
||||
"iat" if rules.enabled(Rule::PandasUseOfDotIat) => PandasUseOfDotIat.into(),
|
||||
"values" if rules.enabled(Rule::PandasUseOfDotValues) => PandasUseOfDotValues.into(),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// Avoid flagging on function calls (e.g., `df.values()`).
|
||||
if let Some(parent) = checker.ctx.expr_parent() {
|
||||
if matches!(parent, Expr::Call(_)) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
// Avoid flagging on non-DataFrames (e.g., `{"a": 1}.values`).
|
||||
if !is_dataframe_candidate(value) {
|
||||
return;
|
||||
}
|
||||
|
||||
// If the target is a named variable, avoid triggering on
|
||||
// irrelevant bindings (like imports).
|
||||
if let Expr::Name(ast::ExprName { id, .. }) = value {
|
||||
if checker.ctx.find_binding(id).map_or(true, |binding| {
|
||||
matches!(
|
||||
binding.kind,
|
||||
BindingKind::Builtin
|
||||
| BindingKind::ClassDefinition
|
||||
| BindingKind::FunctionDefinition
|
||||
| BindingKind::Export(..)
|
||||
| BindingKind::FutureImportation
|
||||
| BindingKind::Importation(..)
|
||||
| BindingKind::FromImportation(..)
|
||||
| BindingKind::SubmoduleImportation(..)
|
||||
)
|
||||
}) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
checker
|
||||
.diagnostics
|
||||
.push(Diagnostic::new(violation, attr_expr.range()));
|
||||
}
|
|
@ -1,16 +1,16 @@
|
|||
pub(crate) use assignment_to_df::{assignment_to_df, PandasDfVariableName};
|
||||
pub(crate) use check_attr::{
|
||||
check_attr, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx, PandasUseOfDotValues,
|
||||
};
|
||||
pub(crate) use check_call::{
|
||||
check_call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack,
|
||||
pub(crate) use attr::{attr, PandasUseOfDotValues};
|
||||
pub(crate) use call::{
|
||||
call, PandasUseOfDotIsNull, PandasUseOfDotNotNull, PandasUseOfDotPivotOrUnstack,
|
||||
PandasUseOfDotReadTable, PandasUseOfDotStack,
|
||||
};
|
||||
pub(crate) use inplace_argument::{inplace_argument, PandasUseOfInplaceArgument};
|
||||
pub(crate) use pd_merge::{use_of_pd_merge, PandasUseOfPdMerge};
|
||||
pub(crate) use subscript::{subscript, PandasUseOfDotAt, PandasUseOfDotIat, PandasUseOfDotIx};
|
||||
|
||||
pub(crate) mod assignment_to_df;
|
||||
pub(crate) mod check_attr;
|
||||
pub(crate) mod check_call;
|
||||
pub(crate) mod attr;
|
||||
pub(crate) mod call;
|
||||
pub(crate) mod inplace_argument;
|
||||
pub(crate) mod pd_merge;
|
||||
pub(crate) mod subscript;
|
||||
|
|
66
crates/ruff/src/rules/pandas_vet/rules/subscript.rs
Normal file
66
crates/ruff/src/rules/pandas_vet/rules/subscript.rs
Normal file
|
@ -0,0 +1,66 @@
|
|||
use rustpython_parser::ast::{self, Expr, Ranged};
|
||||
|
||||
use ruff_diagnostics::Violation;
|
||||
use ruff_diagnostics::{Diagnostic, DiagnosticKind};
|
||||
use ruff_macros::{derive_message_formats, violation};
|
||||
|
||||
use crate::checkers::ast::Checker;
|
||||
use crate::registry::Rule;
|
||||
use crate::rules::pandas_vet::helpers::{test_expression, Resolution};
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotIx;
|
||||
|
||||
impl Violation for PandasUseOfDotIx {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("`.ix` is deprecated; use more explicit `.loc` or `.iloc`")
|
||||
}
|
||||
}
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotAt;
|
||||
|
||||
impl Violation for PandasUseOfDotAt {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.loc` instead of `.at`. If speed is important, use NumPy.")
|
||||
}
|
||||
}
|
||||
|
||||
#[violation]
|
||||
pub struct PandasUseOfDotIat;
|
||||
|
||||
impl Violation for PandasUseOfDotIat {
|
||||
#[derive_message_formats]
|
||||
fn message(&self) -> String {
|
||||
format!("Use `.iloc` instead of `.iat`. If speed is important, use NumPy.")
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn subscript(checker: &mut Checker, value: &Expr, expr: &Expr) {
|
||||
let Expr::Attribute(ast::ExprAttribute { attr, value, .. }) = value else {
|
||||
return;
|
||||
};
|
||||
|
||||
let rules = &checker.settings.rules;
|
||||
let violation: DiagnosticKind = match attr.as_str() {
|
||||
"ix" if rules.enabled(Rule::PandasUseOfDotIx) => PandasUseOfDotIx.into(),
|
||||
"at" if rules.enabled(Rule::PandasUseOfDotAt) => PandasUseOfDotAt.into(),
|
||||
"iat" if rules.enabled(Rule::PandasUseOfDotIat) => PandasUseOfDotIat.into(),
|
||||
_ => return,
|
||||
};
|
||||
|
||||
// Avoid flagging on non-DataFrames (e.g., `{"a": 1}.at[0]`), and on irrelevant bindings
|
||||
// (like imports).
|
||||
if !matches!(
|
||||
test_expression(value, &checker.ctx),
|
||||
Resolution::RelevantLocal
|
||||
) {
|
||||
return;
|
||||
}
|
||||
|
||||
checker
|
||||
.diagnostics
|
||||
.push(Diagnostic::new(violation, expr.range()));
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue