mirror of
https://github.com/astral-sh/ruff.git
synced 2025-11-22 13:23:18 +00:00
Improve PD011 detection to exclude non-pandas .values usage
Refactored pandas_vet helpers to more accurately distinguish between pandas and non-pandas sources for .values attribute access, explicitly excluding numpy and other irrelevant bindings. Added a new test fixture and snapshot for PD011, and updated test cases to use the fixture file.
This commit is contained in:
parent
76ac3465e9
commit
bc87b705ea
4 changed files with 97 additions and 102 deletions
38
crates/ruff_linter/resources/test/fixtures/pandas_vet/PD011.py
vendored
Normal file
38
crates/ruff_linter/resources/test/fixtures/pandas_vet/PD011.py
vendored
Normal file
|
|
@ -0,0 +1,38 @@
|
|||
import pandas as pd
|
||||
import numpy as np
|
||||
|
||||
|
||||
def test_numpy_unique_inverse():
|
||||
unique = np.unique_inverse([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
|
||||
|
||||
def test_numpy_unique_all():
|
||||
unique = np.unique_all([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
|
||||
|
||||
def test_numpy_unique_counts():
|
||||
unique = np.unique_counts([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
|
||||
|
||||
def test_numpy_typed_unique_inverse():
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from numpy.lib._arraysetops_impl import UniqueInverseResult
|
||||
unique: UniqueInverseResult[np.uint64] = np.unique_inverse([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
|
||||
|
||||
def test_simple_non_pandas():
|
||||
p = 1
|
||||
result = p.values
|
||||
|
||||
|
||||
def test_pandas_dataframe_values():
|
||||
"""This should trigger PD011 - pandas DataFrame .values usage"""
|
||||
import pandas as pd
|
||||
x = pd.DataFrame()
|
||||
result = x.values
|
||||
|
||||
|
|
@ -18,6 +18,7 @@ pub(super) enum Resolution {
|
|||
/// Test an [`Expr`] for relevance to Pandas-related operations.
|
||||
pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resolution {
|
||||
match expr {
|
||||
// Literals in the expression itself are definitely not pandas-related
|
||||
Expr::StringLiteral(_)
|
||||
| Expr::BytesLiteral(_)
|
||||
| Expr::NumberLiteral(_)
|
||||
|
|
@ -43,6 +44,7 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti
|
|||
if matches!(name.id.as_str(), "self" | "cls") {
|
||||
Resolution::IrrelevantBinding
|
||||
} else {
|
||||
// Function arguments are treated as relevant unless proven otherwise
|
||||
Resolution::RelevantLocal
|
||||
}
|
||||
}
|
||||
|
|
@ -52,72 +54,62 @@ pub(super) fn test_expression(expr: &Expr, semantic: &SemanticModel) -> Resoluti
|
|||
| BindingKind::LoopVar
|
||||
| BindingKind::Global(_)
|
||||
| BindingKind::Nonlocal(_, _) => {
|
||||
// Check if this binding comes from pandas or another relevant source
|
||||
// Check if this binding comes from a definitively non-pandas source
|
||||
if let Some(assigned_value) = find_binding_value(binding, semantic) {
|
||||
// Check if the assigned value comes from pandas
|
||||
if is_pandas_related_value(assigned_value, semantic) {
|
||||
Resolution::RelevantLocal
|
||||
} else {
|
||||
// This is a non-pandas binding (e.g., literal, numpy, etc.)
|
||||
Resolution::IrrelevantBinding
|
||||
// Recurse to check the assigned value
|
||||
match test_expression(assigned_value, semantic) {
|
||||
// If the assigned value is definitively not pandas (literals, etc.)
|
||||
Resolution::IrrelevantExpression => {
|
||||
Resolution::IrrelevantBinding
|
||||
}
|
||||
// If it's clearly pandas-related, treat as relevant
|
||||
Resolution::RelevantLocal | Resolution::PandasModule => {
|
||||
Resolution::RelevantLocal
|
||||
}
|
||||
// If we got IrrelevantBinding, it means we traced it back to a
|
||||
// non-pandas source (e.g., numpy import), so keep it as irrelevant
|
||||
Resolution::IrrelevantBinding => Resolution::IrrelevantBinding,
|
||||
}
|
||||
} else {
|
||||
// If we can't determine the source, be conservative and treat as relevant
|
||||
// If we can't determine the source, be liberal and treat as relevant
|
||||
// to avoid false negatives (e.g., function parameters with annotations)
|
||||
Resolution::RelevantLocal
|
||||
}
|
||||
}
|
||||
BindingKind::Import(import)
|
||||
if matches!(import.qualified_name().segments(), ["pandas"]) =>
|
||||
{
|
||||
Resolution::PandasModule
|
||||
BindingKind::Import(import) => {
|
||||
let segments = import.qualified_name().segments();
|
||||
if matches!(segments, ["pandas"]) {
|
||||
Resolution::PandasModule
|
||||
} else if matches!(segments, ["numpy"]) {
|
||||
// Explicitly exclude numpy imports
|
||||
Resolution::IrrelevantBinding
|
||||
} else {
|
||||
Resolution::IrrelevantBinding
|
||||
}
|
||||
}
|
||||
_ => Resolution::IrrelevantBinding,
|
||||
}
|
||||
})
|
||||
}
|
||||
// Recurse for attribute access (e.g., df.values -> check df)
|
||||
Expr::Attribute(attr) => test_expression(attr.value.as_ref(), semantic),
|
||||
// Recurse for call expressions (e.g., pd.DataFrame() -> check pd)
|
||||
Expr::Call(call) => {
|
||||
// Check if this is a pandas function call
|
||||
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
|
||||
let segments = qualified_name.segments();
|
||||
if segments.starts_with(&["pandas"]) {
|
||||
return Resolution::RelevantLocal;
|
||||
}
|
||||
// Explicitly exclude numpy function calls
|
||||
if segments.starts_with(&["numpy"]) || segments.starts_with(&["np"]) {
|
||||
return Resolution::IrrelevantBinding;
|
||||
}
|
||||
}
|
||||
// For other calls, recurse on the function expression
|
||||
test_expression(&call.func, semantic)
|
||||
}
|
||||
// For other expressions, default to relevant to avoid false negatives
|
||||
_ => Resolution::RelevantLocal,
|
||||
}
|
||||
}
|
||||
|
||||
/// Check if an expression value is related to pandas (e.g., comes from pandas module or operations).
|
||||
fn is_pandas_related_value(expr: &Expr, semantic: &SemanticModel) -> bool {
|
||||
match expr {
|
||||
// Literals are definitely not pandas-related
|
||||
Expr::StringLiteral(_)
|
||||
| Expr::BytesLiteral(_)
|
||||
| Expr::NumberLiteral(_)
|
||||
| Expr::BooleanLiteral(_)
|
||||
| Expr::NoneLiteral(_)
|
||||
| Expr::EllipsisLiteral(_)
|
||||
| Expr::Tuple(_)
|
||||
| Expr::List(_)
|
||||
| Expr::Set(_)
|
||||
| Expr::Dict(_) => false,
|
||||
|
||||
// Direct pandas module access
|
||||
Expr::Name(name) => {
|
||||
if let Some(binding_id) = semantic.resolve_name(name) {
|
||||
let binding = semantic.binding(binding_id);
|
||||
if let BindingKind::Import(import) = &binding.kind {
|
||||
return matches!(import.qualified_name().segments(), ["pandas"]);
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
// Method calls on pandas objects
|
||||
Expr::Attribute(attr) => {
|
||||
// Check if the object being accessed is pandas-related
|
||||
is_pandas_related_value(attr.value.as_ref(), semantic)
|
||||
}
|
||||
// Function calls - check if they're pandas functions
|
||||
Expr::Call(call) => {
|
||||
if let Some(qualified_name) = semantic.resolve_qualified_name(&call.func) {
|
||||
return qualified_name.segments().starts_with(&["pandas"]);
|
||||
}
|
||||
false
|
||||
}
|
||||
// For other expressions, we can't easily determine if they're pandas-related
|
||||
// so we return false to be conservative (treat as non-pandas)
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -269,53 +269,6 @@ mod tests {
|
|||
",
|
||||
"PD011_pass_node_name"
|
||||
)]
|
||||
#[test_case(
|
||||
r"
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
unique = np.unique_inverse([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
",
|
||||
"PD011_pass_numpy_unique_inverse"
|
||||
)]
|
||||
#[test_case(
|
||||
r"
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
unique = np.unique_all([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
",
|
||||
"PD011_pass_numpy_unique_all"
|
||||
)]
|
||||
#[test_case(
|
||||
r"
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
unique = np.unique_counts([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
",
|
||||
"PD011_pass_numpy_unique_counts"
|
||||
)]
|
||||
#[test_case(
|
||||
r"
|
||||
import pandas as pd
|
||||
from typing import TYPE_CHECKING
|
||||
if TYPE_CHECKING:
|
||||
from numpy.lib._arraysetops_impl import UniqueInverseResult
|
||||
import numpy as np
|
||||
unique: UniqueInverseResult[np.uint64] = np.unique_inverse([1, 2, 3, 2, 1])
|
||||
result = unique.values
|
||||
",
|
||||
"PD011_pass_numpy_typed_unique_inverse"
|
||||
)]
|
||||
#[test_case(
|
||||
r"
|
||||
import pandas as pd
|
||||
p = 1
|
||||
result = p.values
|
||||
",
|
||||
"PD011_pass_simple_non_pandas"
|
||||
)]
|
||||
#[test_case(
|
||||
r#"
|
||||
import pandas as pd
|
||||
|
|
@ -426,6 +379,7 @@ mod tests {
|
|||
)]
|
||||
#[test_case(Rule::PandasUseOfInplaceArgument, Path::new("PD002.py"))]
|
||||
#[test_case(Rule::PandasNuniqueConstantSeriesCheck, Path::new("PD101.py"))]
|
||||
#[test_case(Rule::PandasUseOfDotValues, Path::new("PD011.py"))]
|
||||
fn paths(rule_code: Rule, path: &Path) -> Result<()> {
|
||||
let snapshot = format!("{}_{}", rule_code.noqa_code(), path.to_string_lossy());
|
||||
let diagnostics = test_path(
|
||||
|
|
|
|||
|
|
@ -0,0 +1,11 @@
|
|||
---
|
||||
source: crates/ruff_linter/src/rules/pandas_vet/mod.rs
|
||||
---
|
||||
PD011 Use `.to_numpy()` instead of `.values`
|
||||
--> PD011.py:37:14
|
||||
|
|
||||
35 | import pandas as pd
|
||||
36 | x = pd.DataFrame()
|
||||
37 | result = x.values
|
||||
| ^^^^^^^^
|
||||
|
|
||||
Loading…
Add table
Add a link
Reference in a new issue